import numpy as np
import matplotlib.pyplot as plt
= 2 * np.random.rand(100, 1)
X = 6 + 4 * X + np.random.randn(100, 1)
y plt.scatter(X, y)
회귀
머신 러닝
경사하강법
def get_cost(y, y_pred):
= len(y)
N = np.sum(np.square(y - y_pred)) / N
cost return cost
def get_weight_updates(w1, w0, X, y, learning_rate=0.01):
= len(y)
N = np.zeros_like(w1)
w1_update = np.zeros_like(w0)
w0_update = np.dot(X, w1.T) + w0
y_pred = y - y_pred
diff
= -(2/N) * learning_rate * np.dot(X.T, diff)
w1_update = -(2/N) * learning_rate * np.sum(diff)
w0_update
return w1_update, w0_update
def gradient_descent_steps(X, y, iters=10000):
= np.zeros((1, 1))
w0 = np.zeros((1, 1))
w1
for _ in range(iters):
= get_weight_updates(w1, w0, X, y, learning_rate=0.01)
w1_update, w0_update = w1 - w1_update
w1 = w0 - w0_update
w0
return w1, w0
= gradient_descent_steps(X, y, iters=1000)
w1, w0 = w1[0, 0] * X + w0
y_pred print(f'w0: {w0[0, 0]:.3f} w1: {w1[0, 0]:.3f}, total cost: {get_cost(y, y_pred):.3f}')
plt.scatter(X, y) plt.plot(X, y_pred)
w0: 5.955 w1: 3.940, total cost: 1.018
- 일반 경사하강법은 시간이 오래걸려서 잘 안씀
미니 배치 확률적 경사 하강법
def stochastic_gradient_descent_steps(X, y, batch_size=10, iters=1000):
= np.zeros((1, 1))
w0 = np.zeros((1, 1))
w1
for ind in range(iters):
= np.random.permutation(X.shape[0])
stochastic_random_index = X[stochastic_random_index[0:batch_size]]
sample_X = y[stochastic_random_index[0:batch_size]]
sample_y
= get_weight_updates(w1, w0, sample_X, sample_y, learning_rate=0.01)
w1_update, w0_update = w1 - w1_update
w1 = w0 - w0_update
w0
return w1, w0
= stochastic_gradient_descent_steps(X, y, iters=1000)
w1, w0 = w1[0, 0] * X + w0
y_pred print(f'w0: {w0[0, 0]:.3f} w1: {w1[0, 0]:.3f}, total cost: {get_cost(y, y_pred):.3f}')
plt.scatter(X, y) plt.plot(X, y_pred)
w0: 5.931 w1: 3.978, total cost: 1.021
선형 회귀
import pandas as pd
import seaborn as sns
from scipy import stats
from sklearn.datasets import load_boston
import warnings
'ignore')
warnings.filterwarnings(
= load_boston()
boston
= pd.DataFrame(boston.data, columns=boston.feature_names)
df 'price'] = boston.target
df[ df.head()
= ['RM', 'ZN', 'INDUS', 'NOX', 'AGE', 'PTRAIO', 'LSTAT', 'RAD']
lm_features
= plt.subplots(figsize=(16, 8), ncols=len(lm_features) // 2, nrows=2)
fig, axs
for i, feature in enumerate(lm_features):
= i // 4
row = i % 4
col
=feature, y='price', data=df, ax=axs[row][col]) sns.regplot(x
boston 데이터가 윤리적 문제로 사용 불가능하다고 한다.
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
= df['price']
y_target = df.drop(['price'], axis=1, inplace=False)
X_data = LinearRegression()
lr
= cross_val_score(lr, X_data, y_target, scoring="neg_mean_squared_error", cv=5)
neg_mse_scores = np.sqrt(-1 * neg_mse_scores)
rmse_scores = np.mean(rmse_scores) avg_rmse
cross_val_score는 값이 큰걸 좋게 평가해서 neg를 기준으로 넣어줘야함
다항 회귀
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
def polynominal_func(X):
= 1 + 2 * X[:, 0] + 3 * X[:, 0]**2 + 4 * X[:, 1]**3
y return y
= Pipeline([('poly', PolynomialFeatures(degree=3)),
model 'linear', LinearRegression())])
(= np.arange(4).reshape(2, 2)
X = polynominal_func(X)
y
= model.fit(X, y)
model
round(model.named_steps['linear'].coef_, 2) np.
array([0. , 0.18, 0.18, 0.36, 0.54, 0.72, 0.72, 1.08, 1.62, 2.34])
규제
L2 규제(Ridge): \(min(RSS(W) + \lambda ||W||^2)\)
L1 규제(Lasso): \(min(RSS(W) + \lambda ||W||_1)\)
λ가 크면, 회귀계수의 크기가 작아지고, λ가 0이 되면 일반 선형회귀와 같아짐
L1 규제는 영향력이 작은 피처의 계수를 0으로 만들어서 피처 선택 효과가 있음. L2는 0으로 만들지는 않음
릿지
from sklearn.linear_model import Ridge
= Ridge(alpha = 10)
ridge = cross_val_score(ridge, X_data, y_target, scoring="neg_mean_squared_error", cv=5)
neg_mse_scores = np.sqrt(-1 * neg_mse_scores)
rmse_scores = np.mean(rmse_scores) avg_rmse
라쏘 엘라스틱넷
import pandas as pd
from sklearn.linear_model import Ridge, Lasso, ElasticNet
def get_linear_reg_eval(model_name, params=None, X_data_n=None, y_target_n=None):
= pd.DataFrame()
coeff_df for param in params:
if model_name == 'Ridge':
= Ridge(alpha=param)
model elif model_name == 'Lasso':
= Lasso(alpha=param)
model elif model_name == 'ElasticNet':
= ElasticNet(alpha=param, l1_ratio=0.7)
model = cross_val_score(model, X_data_n, y_target_n, scoring="neg_mean_squared_error", cv=5)
neg_mse_scores = np.sqrt(-1 * neg_mse_scores)
rmse_scores = np.mean(rmse_scores)
avg_rmse print(f'{param}: {avg_rmse:.3f}')
model.fit(X_data_n, y_target_n)= pd.Series(data=model.coef_, index=X_data_n.columns)
coeff = 'alpha:' + str(param)
colname = coeff
coeff_df[colname]
return coeff_df
선형 회귀 모델을 위한 데이터 변환
- 로그 변환: 언더플로우를 고려해서 logp 보다는 log1p를 사용한다.
np.log1p(data)