Module 5 ӌ

�p!W �� 0��

=� �ƹ;�

1. �p�@

**8Â�**

�p/� (��W �דe�p �ޓ�Ӝ
DRY �Don't Repeat Yourself M��

��ӄ

python

def function_name(parameters):
    """�cW&2Docstring	"""
    # �pS
    return result

**��<{�**

U*<return x
*<C� return mean, max, min
Wxreturn {'mean': mean, 'max': max}
��<return Noneؤ

2. �p�p

**�p{�h�**

�p{�	��	y�	:�
Mn�p	`func(a, b)`	�{ z�	`power(2, 3)`
ؤ�p	`func(a, b=10)`	ؤ< �	`greet(name, greeting="Hello")`
s.W�p	`func(a=1, b=2)`	��z�	`func(age=30, name="Alice")`
��Mn�p	`func(*args)`	��p�	`sum(*numbers)`
��s.W�p	`func(**kwargs)`	��s.W�p	`create(**info)`
�Ps.W�p	`func(*, kwonly)`	`*` �{(s.W	`func(income, *, tax_rate=0.25)`

�pz��

python

def function(
    pos1, pos2,              # Mn�p
    default_arg=10,          # ؤ�p
    *args,                   # ��Mn�p
    kwonly_arg,              # �Ps.W�p
    **kwargs                 # ��s.W�p
):
    pass

́w1

python

# L qi���a\:ؤ�p
def add_item(item, items=[]):
    items.append(item)
    return items

#  cnZ�
def add_item(item, items=None):
    if items is None:
        items = []
    items.append(item)
    return items

3. Lambda �p

�I

? �p� W
ULh�
(��U��!'�\

**��**

python

lambda �p: h�

Lambda vs n�p

y'	Lambda	n�p
��	`lambda x: x**2`	`def f(x): return x**2`

�	?

�

B�	�ULh�	�L
�c	��	��cW&2
(😮	�U�!'
B�
(

4. !W

**��**

**!WModule *� .py ��
**Package **+ __init__.py ��9
**�Library **��s�*

�e�

python

# 1. �et*!W
import math
math.sqrt(16)

# 2. �ev�}
import pandas as pd

# 3. �ey��p
from math import sqrt, pi

# 4. �e@	
�P	
from math import *  # L �����	��

8(Ɠ

�	(	8(�p
`math`	pfЗ	`sqrt()`, `log()`, `exp()`, `pi`, `e`
`statistics`	ߡ	`mean()`, `median()`, `stdev()`, `variance()`
`random`	�:p	`randint()`, `random()`, `choice()`, `sample()`
`datetime`	��	`datetime.now()`, `timedelta()`, `strftime()`
`json`	JSON pn	`dumps()`, `loads()`, `dump()`, `load()`

<� Python vs Stata vs R

�p�I��

Python:

python

def calculate_mean(data):
    return sum(data) / len(data)

Stata:

stata

program define calc_mean
    args varname
    summarize `varname'
    return scalar mean = r(mean)
end

calculate_mean <- function(data) {
  mean(data)
}

��

�\	Python	Stata	R
��	`pip install pandas`	`ssc install outreg2`	`install.packages("dplyr")`
�e	`import pandas as pd`	`which outreg2`	`library(dplyr)`
��	`pip list`	`ado dir`	`installed.packages()`

� 8��

1. ��a:ؤ�p

python

# L �
def add_student(name, courses=[]):
    courses.append(name)
    return courses

#  cn
def add_student(name, courses=None):
    if courses is None:
        courses = []
    courses.append(name)
    return courses

2. ذ return ��

python

# L �
def calculate_tax(income):
    tax = income * 0.25  # ذ return

#  cn
def calculate_tax(income):
    return income * 0.25

3. �pz��

python

# L �
def register(name="Alice", age, major):  # SyntaxError
    pass

#  cn
def register(age, major, name="Alice"):
    pass

=� �s��

1. �p}

�

python

#  }�}
���4	
def calculate_total(price, quantity)
def validate_email(email)
def is_adult(age)

# L 
}�}
def f(x, y)
def data()

2. U�L#�

python

# L �pZ*�
def process_data(data):
    #  + � + �� + �X
    pass

#  �:*�p
def clean_data(data):
    pass

def analyze_data(data):
    pass

3. �cW&2

python

def calculate_gini(incomes):
    """���<�p

    �p:
        incomes (list): 6eh

    ��:
        float: �<�p (0-1)

    :�:
        >>> calculate_gini([10000, 20000, 30000])
        0.222
    """
    pass

<� �`

�` 16��h�@

**��**PP **��**15 �

��*/��h

python

def calculate_progressive_tax(income, brackets):
    """
    ��/�

    �p:
        income: t6e
        brackets: �:� [(
P, �), ...]
    """
    pass

# K�
brackets = [(50000, 0.10), (100000, 0.20), (float('inf'), 0.30)]
print(calculate_progressive_tax(75000, brackets))  # 12500

�TH

python

def calculate_progressive_tax(income, brackets):
    """��/�"""
    tax = 0
    previous_limit = 0

    for limit, rate in brackets:
        if income <= previous_limit:
            break

        taxable = min(income, limit) - previous_limit
        tax += taxable * rate
        previous_limit = limit

    return tax

# K�
brackets = [(50000, 0.10), (100000, 0.20), (float('inf'), 0.30)]
assert calculate_progressive_tax(40000, brackets) == 4000
assert calculate_progressive_tax(75000, brackets) == 12500
assert calculate_progressive_tax(120000, brackets) == 26000
print("@	K��")

�` 2pn[ h�@

**��**PP **��**20 �

python

def filter_respondents(data, **criteria):
    """
    9n*a�[	׿

    /�a�:
        min_age, max_age, gender, min_income, education, city
    """
    pass

# K�
respondents = [
    {'id': 1, 'age': 25, 'gender': 'F', 'income': 50000},
    {'id': 2, 'age': 35, 'gender': 'M', 'income': 80000},
]

result = filter_respondents(respondents, min_age=30, gender='M')

�TH

python

def filter_respondents(data, **criteria):
    """9n*a�[	׿"""
    filtered = []

    for person in data:
        match = True

        # t��
        if 'min_age' in criteria and person.get('age', 0) < criteria['min_age']:
            match = False
        if 'max_age' in criteria and person.get('age', 999) > criteria['max_age']:
            match = False

        # 6e�
        if 'min_income' in criteria and person.get('income', 0) < criteria['min_income']:
            match = False

        # �n9M
        for field in ['gender', 'education', 'city']:
            if field in criteria and person.get(field) != criteria[field]:
                match = False

        if match:
            filtered.append(person)

    return filtered

�` 3�wpn��h-I

**��**PPP **��**30 �

��*��!W

python

def validate_age(age, min_age=18, max_age=100):
    """��t��� (is_valid, error_message)"""
    pass

def validate_income(income, min_income=0):
    """��6e"""
    pass

def validate_email(email):
    """����<"""
    pass

def validate_response(response, rules):
    """��t*͔"""
    pass

�TH8��

python

def validate_age(age, min_age=18, max_age=100):
    if not isinstance(age, (int, float)):
        return False, "t��{/pW"
    if age < min_age:
        return False, f"t�
�N� {min_age}"
    if age > max_age:
        return False, f"t�
��� {max_age}"
    return True, ""

def validate_email(email):
    if not isinstance(email, str):
        return False, "���{/W&2"
    if '@' not in email or '.' not in email.split('@')[1]:
        return False, "��<�H"
    return True, ""

def validate_response(response, rules=None):
    errors = []

    if 'age' in response:
        if rules and 'age' in rules:
            min_age, max_age = rules['age']
            is_valid, error = validate_age(response['age'], min_age, max_age)
        else:
            is_valid, error = validate_age(response['age'])
        if not is_valid:
            errors.append(f"t�: {error}")

    if 'email' in response:
        is_valid, error = validate_email(response['email'])
        if not is_valid:
            errors.append(f"��: {error}")

    return len(errors) == 0, errors

�` 4pnlbA4�-I

**��**PPP **��**30 �

python

def create_pipeline(*functions):
    """�pnA4�"""
    pass

# :�
normalize = lambda x: x / 10000
discount = lambda x: x * 0.8
round_result = lambda x: round(x, 2)

pipeline = create_pipeline(normalize, discount, round_result)
result = pipeline(75000)  # 6.0

�TH

python

from functools import reduce

def create_pipeline(*functions):
    """�pnA4�"""
    def pipeline(data):
        result = data
        for func in functions:
            result = func(result)
        return result
    return pipeline

# ( reduce �Ş�
def create_pipeline_v2(*functions):
    return lambda data: reduce(lambda x, f: f(x), functions, data)

# K�
normalize = lambda x: x / 10000
discount = lambda x: x * 0.8
round_result = lambda x: round(x, 2)

pipeline = create_pipeline(normalize, discount, round_result)
print(pipeline(75000))   # 6.0
print(pipeline(120000))  # 9.6

�` 5!Wy�-I

**��**PPP **��**40 �

��*!W��w�y�

survey_project/
�� utils/
   �� __init__.py
   �� validation.py
   �� stats.py
�� analysis/
   �� __init__.py
   �� descriptive.py
�� main.py

=� y�F�

python

# utils/stats.py
def calculate_mean(values):
    return sum(values) / len(values) if values else 0

def calculate_median(values):
    sorted_values = sorted(values)
    n = len(sorted_values)
    if n % 2 == 0:
        return (sorted_values[n//2-1] + sorted_values[n//2]) / 2
    return sorted_values[n//2]

# analysis/descriptive.py
from utils.stats import calculate_mean, calculate_median

def describe_variable(data, variable):
    values = [record[variable] for record in data if variable in record]
    return {
        'count': len(values),
        'mean': calculate_mean(values),
        'median': calculate_median(values),
        'min': min(values) if values else 0,
        'max': max(values) if values else 0
    }

# main.py
from analysis.descriptive import describe_variable

data = [
    {'age': 25, 'income': 50000},
    {'age': 35, 'income': 80000},
]

stats = describe_variable(data, 'income')
print(stats)

�` 66e

sI��6

**��**PPPP **��**45 �

��<�p��M�

python

def calculate_gini(incomes):
    """���<�p"""
    pass

def calculate_quintiles(incomes):
    """	6e�M�"""
    pass

def analyze_inequality(data):
    """��6e
sI"""
    pass

�TH�<�p

python

def calculate_gini(incomes):
    """���<�p"""
    valid_incomes = [inc for inc in incomes if inc > 0]
    if len(valid_incomes) <= 1:
        return 0.0

    sorted_incomes = sorted(valid_incomes)
    n = len(sorted_incomes)

    # l: G = (2 * �(i * x_i)) / (n * �x_i) - (n+1)/n
    numerator = sum((i + 1) * income for i, income in enumerate(sorted_incomes))
    denominator = n * sum(sorted_incomes)

    gini = (2 * numerator) / denominator - (n + 1) / n
    return round(gini, 4)

# K�
incomes = [30000, 50000, 75000, 120000, 200000]
print(f"�<�p: {calculate_gini(incomes)}")  # � 0.3

�` 7R�p�6

**��**PPPP **��**35 �

LW��ӄ

python

organization = {
    'name': 'CEO',
    'salary': 500000,
    'subordinates': [
        {'name': 'VP', 'salary': 300000, 'subordinates': []}
    ]
}

def count_employees(org):
    """Rߡ;�p"""
    pass

def calculate_total_salary(org):
    """R��;�D"""
    pass

�TH

python

def count_employees(org):
    """Rߡ;�p"""
    return 1 + sum(count_employees(sub) for sub in org.get('subordinates', []))

def calculate_total_salary(org):
    """R��;�D"""
    return org['salary'] + sum(
        calculate_total_salary(sub) for sub in org.get('subordinates', [])
    )

def get_max_depth(org, current_depth=1):
    """R���'B��"""
    subordinates = org.get('subordinates', [])
    if not subordinates:
        return current_depth
    return max(get_max_depth(sub, current_depth + 1) for sub in subordinates)

# K�
organization = {
    'name': 'CEO',
    'salary': 500000,
    'subordinates': [
        {
            'name': 'VP',
            'salary': 300000,
            'subordinates': [
                {'name': 'Manager', 'salary': 150000, 'subordinates': []}
            ]
        }
    ]
}

print(f";�p: {count_employees(organization)}")           # 3
print(f";�D: ${calculate_total_salary(organization):,}") # $950,000
print(f"B��: {get_max_depth(organization)}")           # 3

=� �e

�,� `�ό�

�p�I�(
��p{�
Lambda �p��p
!W��

m�`� Module 5 <�

( Module 6 - �f`**b�aOOP **

=� iU�

�}�e OOP �L� =�

Module 5 ӌ ​

=� �ƹ;� ​

1. �p�@ ​

2. �p�p ​

3. Lambda �p ​

4. !W ​

<� Python vs Stata vs R ​

�p�I�� ​

��� ​

� 8�� ​

1. ���a:ؤ�p ​

2. ذ return �� ​

3. �pz�� ​

=� �s�� ​

1. �p} ​

2. U�L#� ​

3. �cW&2 ​

<�  �` ​

�` 16��h�@ ​

�` 2pn[ h�@ ​

�` 3�wpn��h-I ​

�` 4pnlbA4�-I ​

�` 5!Wy�-I ​

�` 66e ​

�` 7R�p�6 ​

=� �e ​

=� iU� ​

Module 5 ӌ

=� �ƹ;�

1. �p�@

2. �p�p

3. Lambda �p

4. !W

<� Python vs Stata vs R

�p�I��

��

� 8��

1. ��a:ؤ�p

2. ذ return ��

3. �pz��

=� �s��

1. �p}

2. U�L#�

3. �cW&2

<� �`

�` 16��h�@

�` 2pn[ h�@

�` 3�wpn��h-I

�` 4pnlbA4�-I

�` 5!Wy�-I

�` 66e

�` 7R�p�6

=� �e

=� iU�