import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
= pd.read_csv('_data/02-data.csv')
dataset
= dataset.iloc[:, :-1].values
x = dataset.iloc[:, -1].values
y
from sklearn.model_selection import train_test_split
= train_test_split(x, y, test_size=0.2) X_train, X_test, y_train, y_test
Simple Linear Regression
machine learning
preprocessing
train
from sklearn.linear_model import LinearRegression
= LinearRegression()
regressor regressor.fit(X_train, y_train)
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
predict
= regressor.predict(X_test) y_pred
visualize
='red')
plt.scatter(X_train, y_train, color='blue')
plt.plot(X_train, regressor.predict(X_train), color'Salary vs Experience (training set)')
plt.title('Years of Experience')
plt.xlabel('Salary')
plt.ylabel( plt.show()
='red')
plt.scatter(X_test, y_test, color='blue')
plt.plot(X_train, regressor.predict(X_train), color'Salary vs Experience (test set)')
plt.title('Years of Experience')
plt.xlabel('Salary')
plt.ylabel( plt.show()
evaluate
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)
0.9261621443754907