import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
'font.family'] = 'Noto Sans KR'
plt.rcParams[= pd.read_csv('https://github.com/AllenDowney/ThinkBayes2/raw/master/data/2239075.csv', parse_dates=[2])
df 'YEAR'] = df['DATE'].dt.year
df[= df.groupby('YEAR')['SNOW'].sum()
snow = snow.iloc[1:-1]
snow ='', marker='o', label='강설량')
snow.plot(ls plt.legend()
회귀
확률 통계
더 많은 눈이 내렸을까?
from empiricaldist import Pmf
import statsmodels.formula.api as smf
= snow.reset_index()
data = data['YEAR'].mean().round()
offset 'x'] = data['YEAR'] - offset
data['y'] = data['SNOW']
data[
= 'y ~ x'
formula = smf.ols(formula, data=data).fit()
results results.params
Intercept 64.446325
x 0.511880
dtype: float64
사전분포
= np.linspace(-0.5, 1.5, 51)
qs = Pmf.from_seq(qs)
prior_slope = np.linspace(54, 75, 41)
qs = Pmf.from_seq(qs)
prior_inter = np.linspace(20, 35, 31)
qs = Pmf.from_seq(qs) prior_sigma
def make_joint(pmf1, pmf2):
= np.meshgrid(pmf1, pmf2)
X, Y return pd.DataFrame(X * Y, columns=pmf1.index, index=pmf2.index)
def make_joint3(pmf1, pmf2, pmf3):
= make_joint(pmf2, pmf1).stack()
joint2 = make_joint(pmf3, joint2).stack()
joint3 return Pmf(joint3)
= make_joint3(prior_slope, prior_inter, prior_sigma)
prior prior
probs | |||
---|---|---|---|
-0.5 | 54.0 | 20.0 | 0.000015 |
20.5 | 0.000015 | ||
21.0 | 0.000015 | ||
21.5 | 0.000015 | ||
22.0 | 0.000015 | ||
... | ... | ... | ... |
1.5 | 75.0 | 33.0 | 0.000015 |
33.5 | 0.000015 | ||
34.0 | 0.000015 | ||
34.5 | 0.000015 | ||
35.0 | 0.000015 |
64821 rows × 1 columns