数据描述:“美国波士顿地区房价预测”
性能测评
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
平均绝对误差(MAE)
回归值与真实值之间的误差绝对值。
平均绝对误差(MAE)= mean_absolute_error(y_test, lr_y_predict)
均方误差(MSE)
回归值与真实值之间的误差平方值。
均方误差(MSE)= mean_squared_error(y_test, lr_y_predict)
:既考量了回归值与真实值的差异,同时也兼顾了问题本身真实值的变动。
1减去回归值与真实值之间的误差平方值与测试数据真实值的方差的比值。
R_2 = lr.score(X_test, y_test)
R_2 = r2_score(y_test, lr_y_predict))
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
boston = load_boston()
print(boston.DESCR)
X = boston.data
y = boston.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=33)
#分析回归目标值的差异
print("The max target value is ", np.max(boston.target))
print("The min target value is ", np.min(boston.target))
print("The average value is ", np.mean(boston.target))
#分别初始化对特征和目标值的标准化器
ss_X = StandardScaler()
ss_y = StandardScaler()
#标准化处理
X_train = ss_X.fit_transform(X_train)
X_test = ss_X.transform(X_test)
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)
y_train = ss_y.fit_transform(y_train)
y_test = ss_y.transform(y_test)
#使用线性回归模型LinearRegression和SGDRegression分别对波士顿地区房价进行预测。
lr = LinearRegression()
lr.fit(X_train, y_train)
lr_y_predict = lr.predict(X_test)
sgdr = SGDRegressor()
sgdr.fit(X_train, y_train)
sgdr_y_predict = sgdr.predict(X_test)
#性能测评,R_2 平均绝对误差(MAE),均方误差(MSE)
print('The value of default measurement of LinearRegression is ', lr.score(X_test, y_test))
print('The value of R-squared of LinearRegression is ', r2_score(y_test, lr_y_predict))
print('The mean squared error of LinearRegression is ', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict)))
print('The mean absoluate error of LinearRegression is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(lr_y_predict)))
print('The value of default measurement of SGDRegressor is ', sgdr.score(X_test, y_test))
print('The value of R-squared of SGDRegressor is ', r2_score(y_test, sgdr_y_predict))
print('The mean squared error of SGDRegressor is ', mean_squared_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict)))
print('The mean absoluate error of SGDRegressor is', mean_absolute_error(ss_y.inverse_transform(y_test), ss_y.inverse_transform(sgdr_y_predict)))