import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
= pd.read_csv('_data/03.csv')
dataset = dataset.iloc[:, :-1].values
x = dataset.iloc[:, -1].values y
Multiple Linear Regression
machine learning
preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
# model automatically avoid dummy variable trap
= ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
ct = np.array(ct.fit_transform(x))
x
from sklearn.model_selection import train_test_split
= train_test_split(x, y, train_size=0.2)
X_train, X_test, y_train, y_test
# in multiple linear regression, we don't need to apply feature scaling
modeling
from sklearn.linear_model import LinearRegression
# model automatically choose best model (dont need to apply 후진제거법)
= LinearRegression()
regressor regressor.fit(X_train, y_train)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
predict
= regressor.predict(X_test)
y_pred =2)
np.set_printoptions(precisionprint(np.concatenate((y_pred.reshape(len(y_pred), 1),
len(y_test), 1)), 1)) y_test.reshape(
[[ 93771.16 101004.64]
[ 74466.6 78239.91]
[ 50652.19 69758.98]
[105956.49 99937.59]
[145441.49 129917.04]
[ 37211.29 64926.08]
[ 71764.09 71498.49]
[111249.19 108552.04]
[ 59299.22 65200.33]
[120840.79 118474.03]
[162627.18 149759.96]
[120230.67 126992.93]
[166842.52 156991.12]
[119593.28 108733.99]
[123155.58 110352.25]
[170183.11 155752.6 ]
[198552.13 191050.39]
[157465.39 132602.65]
[ 98810.02 97427.84]
[195490.18 192261.83]
[142926.02 144259.4 ]
[106210.25 96778.92]
[ 89245.08 97483.56]
[ 45068.42 14681.4 ]
[ 41277.39 42559.73]
[115735.66 105733.54]
[176218.16 182901.99]
[137986.12 141585.52]
[ 88264.34 96479.51]
[ 96578.58 89949.14]
[ 82567.67 77798.83]
[196724.5 191792.06]
[111704. 111313.02]
[102560.86 103282.38]
[139310.55 124266.9 ]
[ 87261.82 81005.76]
[ 86587.2 96712.8 ]
[107978.65 122776.86]
[130333.68 134307.35]
[188346.5 166187.94]]
evaluate
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)
0.913139010635797