sklearn 线性回归实践

import numpy as np
import pandas as pd 
from sklearn import linear_model,datasets,metrics
import matplotlib.pyplot as plt
data=pd.read_csv('Folds5x2_pp.csv')
##  print(data)   ### 9568*5
X = data[['AT', 'V', 'AP', 'RH']]
### print(X.shape)    ###  9568*4
y = data[['PE']] 
### print(y.shape)    ### 9568*1


from sklearn.model_selection import train_test_split  
## 随机划分数据集,测试集占25%,设定随机种子,每次选取的测试值都是一样的,实验可以重现
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=1)
# print(X_train)   ###  7176*4
# print(X_test)    ###  2392*4
# print(y_train)   ###  7176*4
# print(y_test)    ###  2392*1


from sklearn.linear_model import  LinearRegression
LR = LinearRegression()
### 对训练数据进行拟合训练
LR.fit(X_train, y_train)
### 输出参数,分别是截距(intercept_)和权重参数(coef_)
print('LR.intercept:\n',LR.intercept_)
print('LR.coef:\n',LR.coef_)
### 计算确定系数R^2,取值范[0,1],值越大,说明模拟的拟合度越好,对模型的解释能力越强
print('R^2:\n',LR.score(X_test,y_test))
### 根据测试数据计算预测值y_predict
y_predict=LR.predict(X_test)
### MSE为均方误差,用测试数据来验证,MSE为预测数据和测试数据误差平方和的均值
print ("MSE:",metrics.mean_squared_error(y_test,y_predict))
### RMSE为均方根无误差
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test,y_predict)))
LR.intercept:
 [ 447.06297099]
LR.coef:
 [[-1.97376045 -0.23229086  0.0693515  -0.15806957]]
R^2:
 0.931716257578
MSE: 20.0804012021
RMSE: 4.48111606657
print('y_predict:\n',y_predict[0:5])    ### 输出预测值前5行
print('y_test:\n',y_test[0:5])          ### 输出测试值前5行
plt.scatter(y_test,y_predict,c='b',alpha=0.5,marker='*')   
plt.xlabel('y_test')
plt.ylabel('y_predict')
plt.plot([y_test.min(),y_test.max()],[y_test.min(),y_test.max()],'k--',lw=4)   ### 画出y=x这条线
plt.show()
y_predict:
 [[ 459.32136845]
 [ 433.9320719 ]
 [ 474.84501331]
 [ 434.21338967]
 [ 452.56159683]]
y_test:
           PE
5014  458.92
6947  430.55
9230  473.85
4290  435.02
6477  456.44

from sklearn.model_selection import cross_val_predict
from sklearn import metrics
### 交叉验证   若cv=5,把训练集平均分成5份,其中4份作为训练集,剩余的一份作为验证集,一共有5中组合方式,
cross_predict = cross_val_predict(LR,X_train,y_train,cv=5)
print(cross_predict)
###  print(cross_predict.shape)   ### 7176*1
print ("MSE:",metrics.mean_squared_error(y_train,cross_predict))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train,cross_predict)))
plt.scatter(y_train,cross_predict,c='b',alpha=0.5,marker='*')   
plt.xlabel('y_train')
plt.ylabel('cross_predict')
plt.plot([y_train.min(),y_train.max()],[y_train.min(),y_train.max()],'k--',lw=4)   ### 画出y=x这条线
plt.show()  
MSE: 21.0332779559
RMSE: 4.5862051803
 
  

猜你喜欢

转载自blog.csdn.net/qq_21840201/article/details/81050994