Skip to content

字典(Dictionaries)

Python 最强大的数据结构 —— 键值对存储


什么是字典?

字典(Dictionary)是 Python 中存储 键值对(key-value pairs) 的数据结构,类似于:

  • R: Named list
  • JSON: Object
  • 现实生活: 真正的字典(单词→释义)

关键特点

  • 无序(Python 3.7+ 保持插入顺序)
  • 键唯一:每个键只能出现一次
  • 键不可变:键必须是不可变类型(字符串、数字、元组)
  • 值任意:值可以是任何类型

创建字典

1. 基本创建

python
# 空字典
empty_dict = {}
empty_dict2 = dict()

# 基本字典
student = {
    "name": "Alice",
    "age": 25,
    "major": "Economics",
    "gpa": 3.85
}

# 数字键
scores = {
    1: 85,
    2: 90,
    3: 78
}

# 混合键类型(不推荐)
mixed = {
    "name": "Alice",
    1: "first",
    (10, 20): "coordinates"
}

2. 从列表创建

python
# 从键值对列表
pairs = [("name", "Alice"), ("age", 25), ("major", "Economics")]
student = dict(pairs)
print(student)  # {'name': 'Alice', 'age': 25, 'major': 'Economics'}

# 从两个列表
keys = ["name", "age", "major"]
values = ["Alice", 25, "Economics"]
student = dict(zip(keys, values))

3. 字典推导式

python
# 创建平方数字典
squares = {x: x**2 for x in range(1, 6)}
print(squares)  # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

# 从列表创建索引字典
students = ["Alice", "Bob", "Carol"]
student_ids = {name: i+1 for i, name in enumerate(students)}
print(student_ids)  # {'Alice': 1, 'Bob': 2, 'Carol': 3}

访问字典

1. 基本访问

python
student = {
    "name": "Alice",
    "age": 25,
    "major": "Economics",
    "gpa": 3.85
}

# 方式 1: 使用键访问
print(student["name"])   # Alice
print(student["gpa"])    # 3.85

#  访问不存在的键会报错
# print(student["phone"])  # KeyError

# 方式 2: 使用 get()(更安全)
print(student.get("name"))    # Alice
print(student.get("phone"))   # None(不报错)
print(student.get("phone", "未提供"))  # 未提供(自定义默认值)

2. 检查键是否存在

python
student = {"name": "Alice", "age": 25}

# 使用 in
print("name" in student)   # True
print("phone" in student)  # False

# 安全访问
if "phone" in student:
    print(student["phone"])
else:
    print("无电话信息")

️ 修改字典

1. 添加/修改键值对

python
student = {"name": "Alice", "age": 25}

# 添加新键
student["major"] = "Economics"
print(student)  # {'name': 'Alice', 'age': 25, 'major': 'Economics'}

# 修改已有键
student["age"] = 26
print(student)  # {'name': 'Alice', 'age': 26, 'major': 'Economics'}

# update(): 批量添加/修改
student.update({"gpa": 3.85, "year": 3})
print(student)

2. 删除键值对

python
student = {
    "name": "Alice",
    "age": 25,
    "major": "Economics",
    "temp": "delete_me"
}

# del: 删除指定键
del student["temp"]
print(student)

# pop(): 删除并返回值
major = student.pop("major")
print(major)     # Economics
print(student)   # 字典中已没有 major

# popitem(): 删除最后一个键值对(Python 3.7+)
last_item = student.popitem()
print(last_item)  # ('age', 25)

# clear(): 清空字典
student.clear()
print(student)  # {}

遍历字典

1. 遍历键

python
student = {"name": "Alice", "age": 25, "gpa": 3.85}

# 方式 1: 默认遍历键
for key in student:
    print(key)

# 方式 2: 明确遍历键
for key in student.keys():
    print(key)

# 输出:
# name
# age
# gpa

2. 遍历值

python
for value in student.values():
    print(value)

# 输出:
# Alice
# 25
# 3.85

3. 遍历键值对

python
for key, value in student.items():
    print(f"{key}: {value}")

# 输出:
# name: Alice
# age: 25
# gpa: 3.85

实战案例

案例 1:问卷数据存储

python
# 单个受访者数据
respondent = {
    "id": 1001,
    "name": "Alice",
    "age": 30,
    "gender": "Female",
    "income": 75000,
    "education": "Bachelor's",
    "marital_status": "Married",
    "children": 2,
    "satisfaction": 4
}

# 生成报告
print("=== 受访者报告 ===")
print(f"ID: {respondent['id']}")
print(f"姓名: {respondent['name']}")
print(f"年龄: {respondent['age']} 岁")
print(f"性别: {respondent['gender']}")
print(f"收入: ${respondent['income']:,}")
print(f"教育: {respondent['education']}")
print(f"婚姻: {respondent['marital_status']}, {respondent['children']} 个孩子")
print(f"满意度: {respondent['satisfaction']}/5")

案例 2:分组统计

python
# 受访者列表
respondents = [
    {"name": "Alice", "age": 25, "major": "Economics"},
    {"name": "Bob", "age": 30, "major": "Sociology"},
    {"name": "Carol", "age": 28, "major": "Economics"},
    {"name": "David", "age": 35, "major": "Political Science"},
    {"name": "Emma", "age": 26, "major": "Sociology"},
]

# 按专业分组计数
major_counts = {}
for person in respondents:
    major = person["major"]
    if major in major_counts:
        major_counts[major] += 1
    else:
        major_counts[major] = 1

print("=== 专业分布 ===")
for major, count in major_counts.items():
    print(f"{major}: {count} 人")

# 更简洁的写法(使用 get)
major_counts = {}
for person in respondents:
    major = person["major"]
    major_counts[major] = major_counts.get(major, 0) + 1

案例 3:数据验证

python
# 受访者数据
respondent = {
    "id": 1001,
    "age": 30,
    "income": 75000,
    "education": "Bachelor's"
}

# 验证规则
validation_rules = {
    "age": (18, 100),      # 年龄范围
    "income": (0, 1000000), # 收入范围
}

# 执行验证
print("=== 数据验证 ===")
is_valid = True

for field, (min_val, max_val) in validation_rules.items():
    if field in respondent:
        value = respondent[field]
        if min_val <= value <= max_val:
            print(f" {field}: {value} (正常)")
        else:
            print(f" {field}: {value} (超出范围 {min_val}-{max_val})")
            is_valid = False
    else:
        print(f"️  {field}: 缺失")

if is_valid:
    print("\n 数据验证通过")
else:
    print("\n 数据验证失败")

案例 4:变量映射(编码)

python
# 教育水平编码
education_mapping = {
    "High School": 1,
    "Associate Degree": 2,
    "Bachelor's Degree": 3,
    "Master's Degree": 4,
    "Doctoral Degree": 5
}

# 原始数据
students = [
    {"name": "Alice", "education": "Bachelor's Degree"},
    {"name": "Bob", "education": "Master's Degree"},
    {"name": "Carol", "education": "High School"},
]

# 添加编码
for student in students:
    edu = student["education"]
    student["education_code"] = education_mapping.get(edu, 0)

# 打印结果
for student in students:
    print(f"{student['name']}: {student['education']} (代码: {student['education_code']})")

高级技巧

1. setdefault(): 安全设置默认值

python
# 统计单词出现次数
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]

word_counts = {}
for word in words:
    word_counts.setdefault(word, 0)  # 如果不存在,设为 0
    word_counts[word] += 1

print(word_counts)  # {'apple': 3, 'banana': 2, 'cherry': 1}

2. defaultdict: 自动初始化

python
from collections import defaultdict

# 自动初始化为 0
word_counts = defaultdict(int)
words = ["apple", "banana", "apple", "cherry"]

for word in words:
    word_counts[word] += 1  # 无需检查键是否存在

print(dict(word_counts))  # {'apple': 2, 'banana': 1, 'cherry': 1}

# 自动初始化为列表
groups = defaultdict(list)
students = [
    {"name": "Alice", "major": "Economics"},
    {"name": "Bob", "major": "Sociology"},
    {"name": "Carol", "major": "Economics"},
]

for student in students:
    groups[student["major"]].append(student["name"])

print(dict(groups))
# {'Economics': ['Alice', 'Carol'], 'Sociology': ['Bob']}

3. Counter: 快速计数

python
from collections import Counter

# 统计专业分布
majors = ["Economics", "Sociology", "Economics", "Political Science", "Sociology", "Economics"]

major_counts = Counter(majors)
print(major_counts)
# Counter({'Economics': 3, 'Sociology': 2, 'Political Science': 1})

# 最常见的 N 个
print(major_counts.most_common(2))
# [('Economics', 3), ('Sociology', 2)]

4. 嵌套字典

python
# 多层级数据
survey_data = {
    "metadata": {
        "name": "中国居民收入调查",
        "year": 2024,
        "sample_size": 1000
    },
    "respondents": [
        {"id": 1, "age": 25, "income": 50000},
        {"id": 2, "age": 30, "income": 75000}
    ],
    "statistics": {
        "mean_age": 27.5,
        "mean_income": 62500
    }
}

# 访问嵌套数据
print(survey_data["metadata"]["name"])
print(survey_data["respondents"][0]["age"])
print(survey_data["statistics"]["mean_income"])

字典 vs 列表

场景使用列表使用字典
顺序重要
按索引访问list[0]
按名称访问dict["name"]
存储相同类型
存储键值对
查找速度慢(O(n))快(O(1))

示例对比

python
# 用列表(不好)
student = ["Alice", 25, "Economics", 3.85]
print(student[2])  # Economics(不直观,记不住索引)

# 用字典(好)
student = {"name": "Alice", "age": 25, "major": "Economics", "gpa": 3.85}
print(student["major"])  # Economics(一目了然)

常见错误

错误 1:使用不存在的键

python
student = {"name": "Alice"}
print(student["age"])  #  KeyError

# 正确做法
print(student.get("age", "未知"))  #  未知

错误 2:使用可变类型作为键

python
#  列表不能作为键
# d = {[1, 2]: "value"}  # TypeError

#  元组可以
d = {(1, 2): "value"}

错误 3:在遍历时修改字典

python
#  危险操作
d = {"a": 1, "b": 2, "c": 3}
for key in d:
    if d[key] == 2:
        del d[key]  # RuntimeError

#  正确做法:遍历副本
d = {"a": 1, "b": 2, "c": 3}
for key in list(d.keys()):
    if d[key] == 2:
        del d[key]

练习题

练习 1:成绩管理

python
# 创建学生成绩字典
# 键:学生姓名,值:成绩列表
# 计算每个学生的平均分

students_scores = {
    "Alice": [85, 90, 92],
    "Bob": [78, 82, 88],
    "Carol": [95, 92, 89]
}

# 任务:计算并打印每个学生的平均分

练习 2:问卷数据统计

python
# 给定问卷数据
responses = [
    {"gender": "Male", "age_group": "18-30", "satisfaction": 4},
    {"gender": "Female", "age_group": "31-45", "satisfaction": 5},
    {"gender": "Male", "age_group": "18-30", "satisfaction": 3},
    {"gender": "Female", "age_group": "18-30", "satisfaction": 4},
]

# 任务:
# 1. 统计各性别人数
# 2. 统计各年龄组人数
# 3. 计算平均满意度

练习 3:数据转换

python
# 给定列表
data = [
    ["Alice", 25, 75000],
    ["Bob", 30, 85000],
    ["Carol", 28, 70000]
]

# 任务:转换为字典列表
# [
#     {"name": "Alice", "age": 25, "income": 75000},
#     {"name": "Bob", "age": 30, "income": 85000},
#     ...
# ]

下一步

在下一节中,我们将学习 集合(Sets),它用于存储唯一元素,非常适合去重和集合运算。

继续学习!

基于 MIT 许可证发布。内容版权归作者所有。