
import pandas as pd
gss = pd.read_csv('https://raw.githubusercontent.com/AllenDowney/ThinkBayes2/master/data/gss_bayes.csv')
banker = (gss['indus10'] == 6870)
print(f'은행원 수: { banker.sum() }')
print(f'은행원 비율: { banker.mean() }')
은행원 수: 728
은행원 비율: 0.014769730168391155
확률
def prob(A):
"""A의 확률 계산"""
return A.mean()
prob(banker)
female = (gss['sex'] == 2)
prob(female)
liberal = (gss['polviews'] <= 3)
prob(liberal)
democrat = (gss['partyid'] <= 1)
prob(democrat)
조건부 확률
selected = democrat[liberal]
prob(selected)
진보(liberal) 성향 중 민주당(democrat)의 비율
def conditional(proposition, given):
return prob(proposition[given])
conditional(liberal, given=female)
prob(liberal & female) / prob(female)
\(P(A|B) = \frac{P(A and B)}{P(B)}\)
print(prob(female) * conditional(liberal, given=female))
print(prob(female & liberal))
0.14834652059241224
0.14834652059241227
\(P(A and B) = P(B)P(A|B)\)
conditional(female, given=liberal)
조건부 확률은 교환 불가
print(conditional(female, given=liberal))
print(prob(female) * conditional(liberal, given=female) / prob(liberal))
0.5419106203216483
0.5419106203216482
\(P(A|B) = \frac{P(A)*P(B|A)}{P(B)}\)
male = (gss['sex'] == 1)
sum(prob(gss['sex'] == i) * conditional(banker, gss['sex'] == i) for i in range(1, 3))
\(P(A) = P(B_1)P(A|B_1) + P(B_2)P(A|B_2)\)
조건과 논리곱
conditional(female, given=liberal & democrat)
연습문제
1-1
prob(female), prob(liberal), prob(democrat)
(0.5378575776019476, 0.27374721038750255, 0.3662609048488537)
1-2
conditional(liberal, given=democrat), conditional(democrat, given=liberal)
(0.3891320002215698, 0.5206403320240125)
1-3
young = (gss['age'] < 30)
old = (gss['age'] >= 65)
conservative = (gss['polviews'] >= 5)
prob(young & liberal), conditional(liberal, given=young), prob(old & conservative), conditional(old, given=conservative)
(0.06579427875836884,
0.338517745302714,
0.06701156421180766,
0.19597721609113564)
맨 위로