Dictionaries
Python's most powerful data structure — key-value pair storage
What is a Dictionary?
A dictionary is Python's data structure for storing key-value pairs, similar to:
- R: Named list
- JSON: Object
- Real life: An actual dictionary (word → definition)
Key Characteristics:
- Unordered (Python 3.7+ maintains insertion order)
- Keys are unique: Each key can only appear once
- Keys are immutable: Keys must be immutable types (strings, numbers, tuples)
- Values are arbitrary: Values can be any type
Creating Dictionaries
1. Basic Creation
python
# Empty dictionary
empty_dict = {}
empty_dict2 = dict()
# Basic dictionary
student = {
"name": "Alice",
"age": 25,
"major": "Economics",
"gpa": 3.85
}
# Numeric keys
scores = {
1: 85,
2: 90,
3: 78
}
# Mixed key types (not recommended)
mixed = {
"name": "Alice",
1: "first",
(10, 20): "coordinates"
}2. Creating from Lists
python
# From key-value pairs list
pairs = [("name", "Alice"), ("age", 25), ("major", "Economics")]
student = dict(pairs)
print(student) # {'name': 'Alice', 'age': 25, 'major': 'Economics'}
# From two lists
keys = ["name", "age", "major"]
values = ["Alice", 25, "Economics"]
student = dict(zip(keys, values))3. Dictionary Comprehensions
python
# Create dictionary of squares
squares = {x: x**2 for x in range(1, 6)}
print(squares) # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
# Create index dictionary from list
students = ["Alice", "Bob", "Carol"]
student_ids = {name: i+1 for i, name in enumerate(students)}
print(student_ids) # {'Alice': 1, 'Bob': 2, 'Carol': 3}Accessing Dictionaries
1. Basic Access
python
student = {
"name": "Alice",
"age": 25,
"major": "Economics",
"gpa": 3.85
}
# Method 1: Using keys
print(student["name"]) # Alice
print(student["gpa"]) # 3.85
# ❌ Accessing non-existent key raises error
# print(student["phone"]) # KeyError
# Method 2: Using get() (safer)
print(student.get("name")) # Alice
print(student.get("phone")) # None (no error)
print(student.get("phone", "Not provided")) # Not provided (custom default)2. Checking if Key Exists
python
student = {"name": "Alice", "age": 25}
# Using in
print("name" in student) # True
print("phone" in student) # False
# Safe access
if "phone" in student:
print(student["phone"])
else:
print("No phone information")✏️ Modifying Dictionaries
1. Adding/Modifying Key-Value Pairs
python
student = {"name": "Alice", "age": 25}
# Add new key
student["major"] = "Economics"
print(student) # {'name': 'Alice', 'age': 25, 'major': 'Economics'}
# Modify existing key
student["age"] = 26
print(student) # {'name': 'Alice', 'age': 26, 'major': 'Economics'}
# update(): Batch add/modify
student.update({"gpa": 3.85, "year": 3})
print(student)2. Deleting Key-Value Pairs
python
student = {
"name": "Alice",
"age": 25,
"major": "Economics",
"temp": "delete_me"
}
# del: Delete specified key
del student["temp"]
print(student)
# pop(): Delete and return value
major = student.pop("major")
print(major) # Economics
print(student) # major no longer in dictionary
# popitem(): Delete last key-value pair (Python 3.7+)
last_item = student.popitem()
print(last_item) # ('age', 25)
# clear(): Empty dictionary
student.clear()
print(student) # {}🔄 Iterating Through Dictionaries
1. Iterate Over Keys
python
student = {"name": "Alice", "age": 25, "gpa": 3.85}
# Method 1: Default iteration over keys
for key in student:
print(key)
# Method 2: Explicitly iterate over keys
for key in student.keys():
print(key)
# Output:
# name
# age
# gpa2. Iterate Over Values
python
for value in student.values():
print(value)
# Output:
# Alice
# 25
# 3.853. Iterate Over Key-Value Pairs
python
for key, value in student.items():
print(f"{key}: {value}")
# Output:
# name: Alice
# age: 25
# gpa: 3.85🔬 Real-World Cases
Case 1: Survey Data Storage
python
# Single respondent data
respondent = {
"id": 1001,
"name": "Alice",
"age": 30,
"gender": "Female",
"income": 75000,
"education": "Bachelor's",
"marital_status": "Married",
"children": 2,
"satisfaction": 4
}
# Generate report
print("=== Respondent Report ===")
print(f"ID: {respondent['id']}")
print(f"Name: {respondent['name']}")
print(f"Age: {respondent['age']} years")
print(f"Gender: {respondent['gender']}")
print(f"Income: ${respondent['income']:,}")
print(f"Education: {respondent['education']}")
print(f"Marital: {respondent['marital_status']}, {respondent['children']} children")
print(f"Satisfaction: {respondent['satisfaction']}/5")Case 2: Grouped Statistics
python
# Respondent list
respondents = [
{"name": "Alice", "age": 25, "major": "Economics"},
{"name": "Bob", "age": 30, "major": "Sociology"},
{"name": "Carol", "age": 28, "major": "Economics"},
{"name": "David", "age": 35, "major": "Political Science"},
{"name": "Emma", "age": 26, "major": "Sociology"},
]
# Group by major and count
major_counts = {}
for person in respondents:
major = person["major"]
if major in major_counts:
major_counts[major] += 1
else:
major_counts[major] = 1
print("=== Major Distribution ===")
for major, count in major_counts.items():
print(f"{major}: {count} people")
# More concise (using get)
major_counts = {}
for person in respondents:
major = person["major"]
major_counts[major] = major_counts.get(major, 0) + 1Case 3: Data Validation
python
# Respondent data
respondent = {
"id": 1001,
"age": 30,
"income": 75000,
"education": "Bachelor's"
}
# Validation rules
validation_rules = {
"age": (18, 100), # Age range
"income": (0, 1000000), # Income range
}
# Execute validation
print("=== Data Validation ===")
is_valid = True
for field, (min_val, max_val) in validation_rules.items():
if field in respondent:
value = respondent[field]
if min_val <= value <= max_val:
print(f"✅ {field}: {value} (normal)")
else:
print(f"❌ {field}: {value} (out of range {min_val}-{max_val})")
is_valid = False
else:
print(f"⚠️ {field}: missing")
if is_valid:
print("\n✅ Data validation passed")
else:
print("\n❌ Data validation failed")Case 4: Variable Mapping (Encoding)
python
# Education level encoding
education_mapping = {
"High School": 1,
"Associate Degree": 2,
"Bachelor's Degree": 3,
"Master's Degree": 4,
"Doctoral Degree": 5
}
# Raw data
students = [
{"name": "Alice", "education": "Bachelor's Degree"},
{"name": "Bob", "education": "Master's Degree"},
{"name": "Carol", "education": "High School"},
]
# Add encoding
for student in students:
edu = student["education"]
student["education_code"] = education_mapping.get(edu, 0)
# Print results
for student in students:
print(f"{student['name']}: {student['education']} (code: {student['education_code']})")🚀 Advanced Techniques
1. setdefault(): Safely Set Default Value
python
# Count word occurrences
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
word_counts = {}
for word in words:
word_counts.setdefault(word, 0) # If doesn't exist, set to 0
word_counts[word] += 1
print(word_counts) # {'apple': 3, 'banana': 2, 'cherry': 1}2. defaultdict: Auto-Initialize
python
from collections import defaultdict
# Auto-initialize to 0
word_counts = defaultdict(int)
words = ["apple", "banana", "apple", "cherry"]
for word in words:
word_counts[word] += 1 # No need to check if key exists
print(dict(word_counts)) # {'apple': 2, 'banana': 1, 'cherry': 1}
# Auto-initialize to list
groups = defaultdict(list)
students = [
{"name": "Alice", "major": "Economics"},
{"name": "Bob", "major": "Sociology"},
{"name": "Carol", "major": "Economics"},
]
for student in students:
groups[student["major"]].append(student["name"])
print(dict(groups))
# {'Economics': ['Alice', 'Carol'], 'Sociology': ['Bob']}3. Counter: Quick Counting
python
from collections import Counter
# Count major distribution
majors = ["Economics", "Sociology", "Economics", "Political Science", "Sociology", "Economics"]
major_counts = Counter(majors)
print(major_counts)
# Counter({'Economics': 3, 'Sociology': 2, 'Political Science': 1})
# Most common N items
print(major_counts.most_common(2))
# [('Economics', 3), ('Sociology', 2)]4. Nested Dictionaries
python
# Multi-level data
survey_data = {
"metadata": {
"name": "China Household Income Survey",
"year": 2024,
"sample_size": 1000
},
"respondents": [
{"id": 1, "age": 25, "income": 50000},
{"id": 2, "age": 30, "income": 75000}
],
"statistics": {
"mean_age": 27.5,
"mean_income": 62500
}
}
# Access nested data
print(survey_data["metadata"]["name"])
print(survey_data["respondents"][0]["age"])
print(survey_data["statistics"]["mean_income"])🔄 Dictionary vs List
| Scenario | Use List | Use Dictionary |
|---|---|---|
| Order matters | ✅ | ❌ |
| Access by index | ✅ list[0] | ❌ |
| Access by name | ❌ | ✅ dict["name"] |
| Store same types | ✅ | ❌ |
| Store key-value pairs | ❌ | ✅ |
| Lookup speed | Slow (O(n)) | Fast (O(1)) |
Example Comparison:
python
# Using list (not good)
student = ["Alice", 25, "Economics", 3.85]
print(student[2]) # Economics (not intuitive, can't remember index)
# Using dictionary (good)
student = {"name": "Alice", "age": 25, "major": "Economics", "gpa": 3.85}
print(student["major"]) # Economics (clear at a glance)⚠️ Common Errors
Error 1: Using Non-Existent Key
python
student = {"name": "Alice"}
print(student["age"]) # ❌ KeyError
# Correct approach
print(student.get("age", "Unknown")) # ✅ UnknownError 2: Using Mutable Type as Key
python
# ❌ Lists cannot be keys
# d = {[1, 2]: "value"} # TypeError
# ✅ Tuples can
d = {(1, 2): "value"}Error 3: Modifying Dictionary While Iterating
python
# ❌ Dangerous operation
d = {"a": 1, "b": 2, "c": 3}
for key in d:
if d[key] == 2:
del d[key] # RuntimeError
# ✅ Correct approach: iterate over copy
d = {"a": 1, "b": 2, "c": 3}
for key in list(d.keys()):
if d[key] == 2:
del d[key]💪 Practice Problems
Exercise 1: Grade Management
python
# Create student grade dictionary
# Key: student name, value: list of grades
# Calculate each student's average grade
students_scores = {
"Alice": [85, 90, 92],
"Bob": [78, 82, 88],
"Carol": [95, 92, 89]
}
# Task: Calculate and print each student's average gradeExercise 2: Survey Data Statistics
python
# Given survey data
responses = [
{"gender": "Male", "age_group": "18-30", "satisfaction": 4},
{"gender": "Female", "age_group": "31-45", "satisfaction": 5},
{"gender": "Male", "age_group": "18-30", "satisfaction": 3},
{"gender": "Female", "age_group": "18-30", "satisfaction": 4},
]
# Tasks:
# 1. Count people by gender
# 2. Count people by age group
# 3. Calculate average satisfactionExercise 3: Data Transformation
python
# Given list
data = [
["Alice", 25, 75000],
["Bob", 30, 85000],
["Carol", 28, 70000]
]
# Task: Transform to list of dictionaries
# [
# {"name": "Alice", "age": 25, "income": 75000},
# {"name": "Bob", "age": 30, "income": 85000},
# ...
# ]📚 Next Steps
In the next section, we'll learn about Sets, which store unique elements and are perfect for deduplication and set operations.
Keep learning!