import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset = pd.read_csv('_data/03.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].valuesMultiple Linear Regression
machine learning
![]()
preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
# model automatically avoid dummy variable trap
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [3])], remainder='passthrough')
x = np.array(ct.fit_transform(x))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=0.2)
# in multiple linear regression, we don't need to apply feature scalingmodeling
from sklearn.linear_model import LinearRegression
# model automatically choose best model (dont need to apply 후진제거법)
regressor = LinearRegression()
regressor.fit(X_train, y_train)LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
predict
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred), 1),
y_test.reshape(len(y_test), 1)), 1))[[ 93771.16 101004.64]
[ 74466.6 78239.91]
[ 50652.19 69758.98]
[105956.49 99937.59]
[145441.49 129917.04]
[ 37211.29 64926.08]
[ 71764.09 71498.49]
[111249.19 108552.04]
[ 59299.22 65200.33]
[120840.79 118474.03]
[162627.18 149759.96]
[120230.67 126992.93]
[166842.52 156991.12]
[119593.28 108733.99]
[123155.58 110352.25]
[170183.11 155752.6 ]
[198552.13 191050.39]
[157465.39 132602.65]
[ 98810.02 97427.84]
[195490.18 192261.83]
[142926.02 144259.4 ]
[106210.25 96778.92]
[ 89245.08 97483.56]
[ 45068.42 14681.4 ]
[ 41277.39 42559.73]
[115735.66 105733.54]
[176218.16 182901.99]
[137986.12 141585.52]
[ 88264.34 96479.51]
[ 96578.58 89949.14]
[ 82567.67 77798.83]
[196724.5 191792.06]
[111704. 111313.02]
[102560.86 103282.38]
[139310.55 124266.9 ]
[ 87261.82 81005.76]
[ 86587.2 96712.8 ]
[107978.65 122776.86]
[130333.68 134307.35]
[188346.5 166187.94]]
evaluate
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)0.913139010635797