Skip to content

Dictionaries

Python's most powerful data structure — key-value pair storage


What is a Dictionary?

A dictionary is Python's data structure for storing key-value pairs, similar to:

  • R: Named list
  • JSON: Object
  • Real life: An actual dictionary (word → definition)

Key Characteristics:

  • Unordered (Python 3.7+ maintains insertion order)
  • Keys are unique: Each key can only appear once
  • Keys are immutable: Keys must be immutable types (strings, numbers, tuples)
  • Values are arbitrary: Values can be any type

Creating Dictionaries

1. Basic Creation

python
# Empty dictionary
empty_dict = {}
empty_dict2 = dict()

# Basic dictionary
student = {
    "name": "Alice",
    "age": 25,
    "major": "Economics",
    "gpa": 3.85
}

# Numeric keys
scores = {
    1: 85,
    2: 90,
    3: 78
}

# Mixed key types (not recommended)
mixed = {
    "name": "Alice",
    1: "first",
    (10, 20): "coordinates"
}

2. Creating from Lists

python
# From key-value pairs list
pairs = [("name", "Alice"), ("age", 25), ("major", "Economics")]
student = dict(pairs)
print(student)  # {'name': 'Alice', 'age': 25, 'major': 'Economics'}

# From two lists
keys = ["name", "age", "major"]
values = ["Alice", 25, "Economics"]
student = dict(zip(keys, values))

3. Dictionary Comprehensions

python
# Create dictionary of squares
squares = {x: x**2 for x in range(1, 6)}
print(squares)  # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

# Create index dictionary from list
students = ["Alice", "Bob", "Carol"]
student_ids = {name: i+1 for i, name in enumerate(students)}
print(student_ids)  # {'Alice': 1, 'Bob': 2, 'Carol': 3}

Accessing Dictionaries

1. Basic Access

python
student = {
    "name": "Alice",
    "age": 25,
    "major": "Economics",
    "gpa": 3.85
}

# Method 1: Using keys
print(student["name"])   # Alice
print(student["gpa"])    # 3.85

# ❌ Accessing non-existent key raises error
# print(student["phone"])  # KeyError

# Method 2: Using get() (safer)
print(student.get("name"))    # Alice
print(student.get("phone"))   # None (no error)
print(student.get("phone", "Not provided"))  # Not provided (custom default)

2. Checking if Key Exists

python
student = {"name": "Alice", "age": 25}

# Using in
print("name" in student)   # True
print("phone" in student)  # False

# Safe access
if "phone" in student:
    print(student["phone"])
else:
    print("No phone information")

✏️ Modifying Dictionaries

1. Adding/Modifying Key-Value Pairs

python
student = {"name": "Alice", "age": 25}

# Add new key
student["major"] = "Economics"
print(student)  # {'name': 'Alice', 'age': 25, 'major': 'Economics'}

# Modify existing key
student["age"] = 26
print(student)  # {'name': 'Alice', 'age': 26, 'major': 'Economics'}

# update(): Batch add/modify
student.update({"gpa": 3.85, "year": 3})
print(student)

2. Deleting Key-Value Pairs

python
student = {
    "name": "Alice",
    "age": 25,
    "major": "Economics",
    "temp": "delete_me"
}

# del: Delete specified key
del student["temp"]
print(student)

# pop(): Delete and return value
major = student.pop("major")
print(major)     # Economics
print(student)   # major no longer in dictionary

# popitem(): Delete last key-value pair (Python 3.7+)
last_item = student.popitem()
print(last_item)  # ('age', 25)

# clear(): Empty dictionary
student.clear()
print(student)  # {}

🔄 Iterating Through Dictionaries

1. Iterate Over Keys

python
student = {"name": "Alice", "age": 25, "gpa": 3.85}

# Method 1: Default iteration over keys
for key in student:
    print(key)

# Method 2: Explicitly iterate over keys
for key in student.keys():
    print(key)

# Output:
# name
# age
# gpa

2. Iterate Over Values

python
for value in student.values():
    print(value)

# Output:
# Alice
# 25
# 3.85

3. Iterate Over Key-Value Pairs

python
for key, value in student.items():
    print(f"{key}: {value}")

# Output:
# name: Alice
# age: 25
# gpa: 3.85

🔬 Real-World Cases

Case 1: Survey Data Storage

python
# Single respondent data
respondent = {
    "id": 1001,
    "name": "Alice",
    "age": 30,
    "gender": "Female",
    "income": 75000,
    "education": "Bachelor's",
    "marital_status": "Married",
    "children": 2,
    "satisfaction": 4
}

# Generate report
print("=== Respondent Report ===")
print(f"ID: {respondent['id']}")
print(f"Name: {respondent['name']}")
print(f"Age: {respondent['age']} years")
print(f"Gender: {respondent['gender']}")
print(f"Income: ${respondent['income']:,}")
print(f"Education: {respondent['education']}")
print(f"Marital: {respondent['marital_status']}, {respondent['children']} children")
print(f"Satisfaction: {respondent['satisfaction']}/5")

Case 2: Grouped Statistics

python
# Respondent list
respondents = [
    {"name": "Alice", "age": 25, "major": "Economics"},
    {"name": "Bob", "age": 30, "major": "Sociology"},
    {"name": "Carol", "age": 28, "major": "Economics"},
    {"name": "David", "age": 35, "major": "Political Science"},
    {"name": "Emma", "age": 26, "major": "Sociology"},
]

# Group by major and count
major_counts = {}
for person in respondents:
    major = person["major"]
    if major in major_counts:
        major_counts[major] += 1
    else:
        major_counts[major] = 1

print("=== Major Distribution ===")
for major, count in major_counts.items():
    print(f"{major}: {count} people")

# More concise (using get)
major_counts = {}
for person in respondents:
    major = person["major"]
    major_counts[major] = major_counts.get(major, 0) + 1

Case 3: Data Validation

python
# Respondent data
respondent = {
    "id": 1001,
    "age": 30,
    "income": 75000,
    "education": "Bachelor's"
}

# Validation rules
validation_rules = {
    "age": (18, 100),      # Age range
    "income": (0, 1000000), # Income range
}

# Execute validation
print("=== Data Validation ===")
is_valid = True

for field, (min_val, max_val) in validation_rules.items():
    if field in respondent:
        value = respondent[field]
        if min_val <= value <= max_val:
            print(f"✅ {field}: {value} (normal)")
        else:
            print(f"❌ {field}: {value} (out of range {min_val}-{max_val})")
            is_valid = False
    else:
        print(f"⚠️  {field}: missing")

if is_valid:
    print("\n✅ Data validation passed")
else:
    print("\n❌ Data validation failed")

Case 4: Variable Mapping (Encoding)

python
# Education level encoding
education_mapping = {
    "High School": 1,
    "Associate Degree": 2,
    "Bachelor's Degree": 3,
    "Master's Degree": 4,
    "Doctoral Degree": 5
}

# Raw data
students = [
    {"name": "Alice", "education": "Bachelor's Degree"},
    {"name": "Bob", "education": "Master's Degree"},
    {"name": "Carol", "education": "High School"},
]

# Add encoding
for student in students:
    edu = student["education"]
    student["education_code"] = education_mapping.get(edu, 0)

# Print results
for student in students:
    print(f"{student['name']}: {student['education']} (code: {student['education_code']})")

🚀 Advanced Techniques

1. setdefault(): Safely Set Default Value

python
# Count word occurrences
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]

word_counts = {}
for word in words:
    word_counts.setdefault(word, 0)  # If doesn't exist, set to 0
    word_counts[word] += 1

print(word_counts)  # {'apple': 3, 'banana': 2, 'cherry': 1}

2. defaultdict: Auto-Initialize

python
from collections import defaultdict

# Auto-initialize to 0
word_counts = defaultdict(int)
words = ["apple", "banana", "apple", "cherry"]

for word in words:
    word_counts[word] += 1  # No need to check if key exists

print(dict(word_counts))  # {'apple': 2, 'banana': 1, 'cherry': 1}

# Auto-initialize to list
groups = defaultdict(list)
students = [
    {"name": "Alice", "major": "Economics"},
    {"name": "Bob", "major": "Sociology"},
    {"name": "Carol", "major": "Economics"},
]

for student in students:
    groups[student["major"]].append(student["name"])

print(dict(groups))
# {'Economics': ['Alice', 'Carol'], 'Sociology': ['Bob']}

3. Counter: Quick Counting

python
from collections import Counter

# Count major distribution
majors = ["Economics", "Sociology", "Economics", "Political Science", "Sociology", "Economics"]

major_counts = Counter(majors)
print(major_counts)
# Counter({'Economics': 3, 'Sociology': 2, 'Political Science': 1})

# Most common N items
print(major_counts.most_common(2))
# [('Economics', 3), ('Sociology', 2)]

4. Nested Dictionaries

python
# Multi-level data
survey_data = {
    "metadata": {
        "name": "China Household Income Survey",
        "year": 2024,
        "sample_size": 1000
    },
    "respondents": [
        {"id": 1, "age": 25, "income": 50000},
        {"id": 2, "age": 30, "income": 75000}
    ],
    "statistics": {
        "mean_age": 27.5,
        "mean_income": 62500
    }
}

# Access nested data
print(survey_data["metadata"]["name"])
print(survey_data["respondents"][0]["age"])
print(survey_data["statistics"]["mean_income"])

🔄 Dictionary vs List

ScenarioUse ListUse Dictionary
Order matters
Access by indexlist[0]
Access by namedict["name"]
Store same types
Store key-value pairs
Lookup speedSlow (O(n))Fast (O(1))

Example Comparison:

python
# Using list (not good)
student = ["Alice", 25, "Economics", 3.85]
print(student[2])  # Economics (not intuitive, can't remember index)

# Using dictionary (good)
student = {"name": "Alice", "age": 25, "major": "Economics", "gpa": 3.85}
print(student["major"])  # Economics (clear at a glance)

⚠️ Common Errors

Error 1: Using Non-Existent Key

python
student = {"name": "Alice"}
print(student["age"])  # ❌ KeyError

# Correct approach
print(student.get("age", "Unknown"))  # ✅ Unknown

Error 2: Using Mutable Type as Key

python
# ❌ Lists cannot be keys
# d = {[1, 2]: "value"}  # TypeError

# ✅ Tuples can
d = {(1, 2): "value"}

Error 3: Modifying Dictionary While Iterating

python
# ❌ Dangerous operation
d = {"a": 1, "b": 2, "c": 3}
for key in d:
    if d[key] == 2:
        del d[key]  # RuntimeError

# ✅ Correct approach: iterate over copy
d = {"a": 1, "b": 2, "c": 3}
for key in list(d.keys()):
    if d[key] == 2:
        del d[key]

💪 Practice Problems

Exercise 1: Grade Management

python
# Create student grade dictionary
# Key: student name, value: list of grades
# Calculate each student's average grade

students_scores = {
    "Alice": [85, 90, 92],
    "Bob": [78, 82, 88],
    "Carol": [95, 92, 89]
}

# Task: Calculate and print each student's average grade

Exercise 2: Survey Data Statistics

python
# Given survey data
responses = [
    {"gender": "Male", "age_group": "18-30", "satisfaction": 4},
    {"gender": "Female", "age_group": "31-45", "satisfaction": 5},
    {"gender": "Male", "age_group": "18-30", "satisfaction": 3},
    {"gender": "Female", "age_group": "18-30", "satisfaction": 4},
]

# Tasks:
# 1. Count people by gender
# 2. Count people by age group
# 3. Calculate average satisfaction

Exercise 3: Data Transformation

python
# Given list
data = [
    ["Alice", 25, 75000],
    ["Bob", 30, 85000],
    ["Carol", 28, 70000]
]

# Task: Transform to list of dictionaries
# [
#     {"name": "Alice", "age": 25, "income": 75000},
#     {"name": "Bob", "age": 30, "income": 85000},
#     ...
# ]

📚 Next Steps

In the next section, we'll learn about Sets, which store unique elements and are perfect for deduplication and set operations.

Keep learning!

Released under the MIT License. Content © Author.