File I/O Basics
Starting with Text Files — The First Step in Data Persistence
Why Do We Need File Operations?
- Save data analysis results
- Read external data
- Generate reports
- Data backup
Basic File Operations
1. Writing to Files
python
# Write to text file
with open('output.txt', 'w', encoding='utf-8') as f:
f.write("This is the first line\n")
f.write("This is the second line\n")
# Append content
with open('output.txt', 'a', encoding='utf-8') as f:
f.write("This is appended content\n")Mode descriptions:
'r': Read-only (default)'w': Write (overwrite)'a': Append'r+': Read and write
2. Reading Files
python
# Method 1: Read entire file at once
with open('output.txt', 'r', encoding='utf-8') as f:
content = f.read()
print(content)
# Method 2: Read line by line
with open('output.txt', 'r', encoding='utf-8') as f:
for line in f:
print(line.strip()) # strip() removes newline characters
# Method 3: Read all lines into a list
with open('output.txt', 'r', encoding='utf-8') as f:
lines = f.readlines()
print(lines) # ['First line\n', 'Second line\n', ...]3. Context Managers (with Statement)
python
# Not recommended (requires manual closing)
f = open('file.txt', 'r')
content = f.read()
f.close()
# Recommended (automatically closes)
with open('file.txt', 'r') as f:
content = f.read()
# File is automatically closed when exiting the with blockPractical Cases
Case 1: Save Survey Results
python
respondents = [
{'id': 1001, 'age': 25, 'income': 50000},
{'id': 1002, 'age': 30, 'income': 75000},
{'id': 1003, 'age': 35, 'income': 85000}
]
# Save to text file
with open('survey_results.txt', 'w', encoding='utf-8') as f:
f.write("Survey Results\n")
f.write("=" * 40 + "\n")
for resp in respondents:
line = f"ID:{resp['id']}, Age:{resp['age']}, Income:{resp['income']}\n"
f.write(line)
print("Results saved to survey_results.txt")Case 2: Read and Process Data
python
# Read data file
ages = []
with open('ages.txt', 'r') as f:
for line in f:
age = int(line.strip())
ages.append(age)
# Statistics
print(f"Average age: {sum(ages) / len(ages):.1f}")
print(f"Maximum age: {max(ages)}")
print(f"Minimum age: {min(ages)}")Case 3: Logging
python
from datetime import datetime
def log_analysis(message):
"""Record analysis log"""
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open('analysis.log', 'a', encoding='utf-8') as f:
f.write(f"[{timestamp}] {message}\n")
# Usage
log_analysis("Started data cleaning")
log_analysis("Removed 15 outliers")
log_analysis("Data cleaning completed")Path Operations
Using pathlib (Recommended)
python
from pathlib import Path
# Create path objects
data_dir = Path('data')
raw_dir = data_dir / 'raw' # Path concatenation
processed_dir = data_dir / 'processed'
# Create directories
raw_dir.mkdir(parents=True, exist_ok=True)
# Check if file exists
file_path = raw_dir / 'survey.txt'
if file_path.exists():
print("File exists")
# List files in directory
for file in data_dir.glob('*.txt'):
print(file.name)Using os.path (Traditional Method)
python
import os
# Join paths
file_path = os.path.join('data', 'raw', 'survey.txt')
# Check existence
if os.path.exists(file_path):
print("File exists")
# Create directory
os.makedirs('data/processed', exist_ok=True)
# List files
files = os.listdir('data')Best Practices
1. Always Specify Encoding
python
# Explicitly specify UTF-8
with open('file.txt', 'r', encoding='utf-8') as f:
content = f.read()2. Use with Statement
python
# Automatically closes file
with open('file.txt', 'r') as f:
content = f.read()3. Handle File Not Found Cases
python
from pathlib import Path
file_path = Path('data.txt')
if file_path.exists():
with open(file_path, 'r') as f:
content = f.read()
else:
print("File does not exist")Practice Exercises
python
# Exercise 1: Create student grades file
# Write the following data to scores.txt
scores = [
('Alice', 85),
('Bob', 92),
('Carol', 78)
]
# Format: Alice: 85
# Exercise 2: Analyze text file
# Read file and count:
# - Total number of lines
# - Total number of characters
# - Number of lines containing a specific keywordNext Steps
In the next section, we'll learn about CSV and Excel file processing.
Keep going!