字典(Dictionaries)
Python 最强大的数据结构 —— 键值对存储
什么是字典?
字典(Dictionary)是 Python 中存储 键值对(key-value pairs) 的数据结构,类似于:
- R: Named list
- JSON: Object
- 现实生活: 真正的字典(单词→释义)
关键特点:
- 无序(Python 3.7+ 保持插入顺序)
- 键唯一:每个键只能出现一次
- 键不可变:键必须是不可变类型(字符串、数字、元组)
- 值任意:值可以是任何类型
创建字典
1. 基本创建
python
# 空字典
empty_dict = {}
empty_dict2 = dict()
# 基本字典
student = {
"name": "Alice",
"age": 25,
"major": "Economics",
"gpa": 3.85
}
# 数字键
scores = {
1: 85,
2: 90,
3: 78
}
# 混合键类型(不推荐)
mixed = {
"name": "Alice",
1: "first",
(10, 20): "coordinates"
}2. 从列表创建
python
# 从键值对列表
pairs = [("name", "Alice"), ("age", 25), ("major", "Economics")]
student = dict(pairs)
print(student) # {'name': 'Alice', 'age': 25, 'major': 'Economics'}
# 从两个列表
keys = ["name", "age", "major"]
values = ["Alice", 25, "Economics"]
student = dict(zip(keys, values))3. 字典推导式
python
# 创建平方数字典
squares = {x: x**2 for x in range(1, 6)}
print(squares) # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}
# 从列表创建索引字典
students = ["Alice", "Bob", "Carol"]
student_ids = {name: i+1 for i, name in enumerate(students)}
print(student_ids) # {'Alice': 1, 'Bob': 2, 'Carol': 3}访问字典
1. 基本访问
python
student = {
"name": "Alice",
"age": 25,
"major": "Economics",
"gpa": 3.85
}
# 方式 1: 使用键访问
print(student["name"]) # Alice
print(student["gpa"]) # 3.85
# 访问不存在的键会报错
# print(student["phone"]) # KeyError
# 方式 2: 使用 get()(更安全)
print(student.get("name")) # Alice
print(student.get("phone")) # None(不报错)
print(student.get("phone", "未提供")) # 未提供(自定义默认值)2. 检查键是否存在
python
student = {"name": "Alice", "age": 25}
# 使用 in
print("name" in student) # True
print("phone" in student) # False
# 安全访问
if "phone" in student:
print(student["phone"])
else:
print("无电话信息")️ 修改字典
1. 添加/修改键值对
python
student = {"name": "Alice", "age": 25}
# 添加新键
student["major"] = "Economics"
print(student) # {'name': 'Alice', 'age': 25, 'major': 'Economics'}
# 修改已有键
student["age"] = 26
print(student) # {'name': 'Alice', 'age': 26, 'major': 'Economics'}
# update(): 批量添加/修改
student.update({"gpa": 3.85, "year": 3})
print(student)2. 删除键值对
python
student = {
"name": "Alice",
"age": 25,
"major": "Economics",
"temp": "delete_me"
}
# del: 删除指定键
del student["temp"]
print(student)
# pop(): 删除并返回值
major = student.pop("major")
print(major) # Economics
print(student) # 字典中已没有 major
# popitem(): 删除最后一个键值对(Python 3.7+)
last_item = student.popitem()
print(last_item) # ('age', 25)
# clear(): 清空字典
student.clear()
print(student) # {}遍历字典
1. 遍历键
python
student = {"name": "Alice", "age": 25, "gpa": 3.85}
# 方式 1: 默认遍历键
for key in student:
print(key)
# 方式 2: 明确遍历键
for key in student.keys():
print(key)
# 输出:
# name
# age
# gpa2. 遍历值
python
for value in student.values():
print(value)
# 输出:
# Alice
# 25
# 3.853. 遍历键值对
python
for key, value in student.items():
print(f"{key}: {value}")
# 输出:
# name: Alice
# age: 25
# gpa: 3.85实战案例
案例 1:问卷数据存储
python
# 单个受访者数据
respondent = {
"id": 1001,
"name": "Alice",
"age": 30,
"gender": "Female",
"income": 75000,
"education": "Bachelor's",
"marital_status": "Married",
"children": 2,
"satisfaction": 4
}
# 生成报告
print("=== 受访者报告 ===")
print(f"ID: {respondent['id']}")
print(f"姓名: {respondent['name']}")
print(f"年龄: {respondent['age']} 岁")
print(f"性别: {respondent['gender']}")
print(f"收入: ${respondent['income']:,}")
print(f"教育: {respondent['education']}")
print(f"婚姻: {respondent['marital_status']}, {respondent['children']} 个孩子")
print(f"满意度: {respondent['satisfaction']}/5")案例 2:分组统计
python
# 受访者列表
respondents = [
{"name": "Alice", "age": 25, "major": "Economics"},
{"name": "Bob", "age": 30, "major": "Sociology"},
{"name": "Carol", "age": 28, "major": "Economics"},
{"name": "David", "age": 35, "major": "Political Science"},
{"name": "Emma", "age": 26, "major": "Sociology"},
]
# 按专业分组计数
major_counts = {}
for person in respondents:
major = person["major"]
if major in major_counts:
major_counts[major] += 1
else:
major_counts[major] = 1
print("=== 专业分布 ===")
for major, count in major_counts.items():
print(f"{major}: {count} 人")
# 更简洁的写法(使用 get)
major_counts = {}
for person in respondents:
major = person["major"]
major_counts[major] = major_counts.get(major, 0) + 1案例 3:数据验证
python
# 受访者数据
respondent = {
"id": 1001,
"age": 30,
"income": 75000,
"education": "Bachelor's"
}
# 验证规则
validation_rules = {
"age": (18, 100), # 年龄范围
"income": (0, 1000000), # 收入范围
}
# 执行验证
print("=== 数据验证 ===")
is_valid = True
for field, (min_val, max_val) in validation_rules.items():
if field in respondent:
value = respondent[field]
if min_val <= value <= max_val:
print(f" {field}: {value} (正常)")
else:
print(f" {field}: {value} (超出范围 {min_val}-{max_val})")
is_valid = False
else:
print(f"️ {field}: 缺失")
if is_valid:
print("\n 数据验证通过")
else:
print("\n 数据验证失败")案例 4:变量映射(编码)
python
# 教育水平编码
education_mapping = {
"High School": 1,
"Associate Degree": 2,
"Bachelor's Degree": 3,
"Master's Degree": 4,
"Doctoral Degree": 5
}
# 原始数据
students = [
{"name": "Alice", "education": "Bachelor's Degree"},
{"name": "Bob", "education": "Master's Degree"},
{"name": "Carol", "education": "High School"},
]
# 添加编码
for student in students:
edu = student["education"]
student["education_code"] = education_mapping.get(edu, 0)
# 打印结果
for student in students:
print(f"{student['name']}: {student['education']} (代码: {student['education_code']})")高级技巧
1. setdefault(): 安全设置默认值
python
# 统计单词出现次数
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
word_counts = {}
for word in words:
word_counts.setdefault(word, 0) # 如果不存在,设为 0
word_counts[word] += 1
print(word_counts) # {'apple': 3, 'banana': 2, 'cherry': 1}2. defaultdict: 自动初始化
python
from collections import defaultdict
# 自动初始化为 0
word_counts = defaultdict(int)
words = ["apple", "banana", "apple", "cherry"]
for word in words:
word_counts[word] += 1 # 无需检查键是否存在
print(dict(word_counts)) # {'apple': 2, 'banana': 1, 'cherry': 1}
# 自动初始化为列表
groups = defaultdict(list)
students = [
{"name": "Alice", "major": "Economics"},
{"name": "Bob", "major": "Sociology"},
{"name": "Carol", "major": "Economics"},
]
for student in students:
groups[student["major"]].append(student["name"])
print(dict(groups))
# {'Economics': ['Alice', 'Carol'], 'Sociology': ['Bob']}3. Counter: 快速计数
python
from collections import Counter
# 统计专业分布
majors = ["Economics", "Sociology", "Economics", "Political Science", "Sociology", "Economics"]
major_counts = Counter(majors)
print(major_counts)
# Counter({'Economics': 3, 'Sociology': 2, 'Political Science': 1})
# 最常见的 N 个
print(major_counts.most_common(2))
# [('Economics', 3), ('Sociology', 2)]4. 嵌套字典
python
# 多层级数据
survey_data = {
"metadata": {
"name": "中国居民收入调查",
"year": 2024,
"sample_size": 1000
},
"respondents": [
{"id": 1, "age": 25, "income": 50000},
{"id": 2, "age": 30, "income": 75000}
],
"statistics": {
"mean_age": 27.5,
"mean_income": 62500
}
}
# 访问嵌套数据
print(survey_data["metadata"]["name"])
print(survey_data["respondents"][0]["age"])
print(survey_data["statistics"]["mean_income"])字典 vs 列表
| 场景 | 使用列表 | 使用字典 |
|---|---|---|
| 顺序重要 | ||
| 按索引访问 | list[0] | |
| 按名称访问 | dict["name"] | |
| 存储相同类型 | ||
| 存储键值对 | ||
| 查找速度 | 慢(O(n)) | 快(O(1)) |
示例对比:
python
# 用列表(不好)
student = ["Alice", 25, "Economics", 3.85]
print(student[2]) # Economics(不直观,记不住索引)
# 用字典(好)
student = {"name": "Alice", "age": 25, "major": "Economics", "gpa": 3.85}
print(student["major"]) # Economics(一目了然)常见错误
错误 1:使用不存在的键
python
student = {"name": "Alice"}
print(student["age"]) # KeyError
# 正确做法
print(student.get("age", "未知")) # 未知错误 2:使用可变类型作为键
python
# 列表不能作为键
# d = {[1, 2]: "value"} # TypeError
# 元组可以
d = {(1, 2): "value"}错误 3:在遍历时修改字典
python
# 危险操作
d = {"a": 1, "b": 2, "c": 3}
for key in d:
if d[key] == 2:
del d[key] # RuntimeError
# 正确做法:遍历副本
d = {"a": 1, "b": 2, "c": 3}
for key in list(d.keys()):
if d[key] == 2:
del d[key]练习题
练习 1:成绩管理
python
# 创建学生成绩字典
# 键:学生姓名,值:成绩列表
# 计算每个学生的平均分
students_scores = {
"Alice": [85, 90, 92],
"Bob": [78, 82, 88],
"Carol": [95, 92, 89]
}
# 任务:计算并打印每个学生的平均分练习 2:问卷数据统计
python
# 给定问卷数据
responses = [
{"gender": "Male", "age_group": "18-30", "satisfaction": 4},
{"gender": "Female", "age_group": "31-45", "satisfaction": 5},
{"gender": "Male", "age_group": "18-30", "satisfaction": 3},
{"gender": "Female", "age_group": "18-30", "satisfaction": 4},
]
# 任务:
# 1. 统计各性别人数
# 2. 统计各年龄组人数
# 3. 计算平均满意度练习 3:数据转换
python
# 给定列表
data = [
["Alice", 25, 75000],
["Bob", 30, 85000],
["Carol", 28, 70000]
]
# 任务:转换为字典列表
# [
# {"name": "Alice", "age": 25, "income": 75000},
# {"name": "Bob", "age": 30, "income": 85000},
# ...
# ]下一步
在下一节中,我们将学习 集合(Sets),它用于存储唯一元素,非常适合去重和集合运算。
继续学习!