数据划分:
from sklearn.model_selection import train_test_split #切分数据
new_train_pca_16=new_train_pca_16.fillna(0)#采用PCA保留的16维特征的数据
train=new_train_pca_16[new_train_pca_16.columns]
target=new_train_pca_16['target']
#划分数据集 训练集80%验证机20%
train_data,test_data,train_target,test_target=train_test_split(train,target,\
test_size=0.2,random_state=0)
from sklearn.metrics import mean_squared_error #评价指标
线性回归模型:
#从sklearn引入线性模型
from sklearn.linear_model import LinearRegression
clf=LinearRegression()
clf.fit(train_data,train_target)
test_pred=clf.predict(test_data)
score=mean_squared_error(test_target,clf.predict(test_data))
print("LinearRegression: ",score)
k近邻回归模型:
from sklearn.neighbors import KNeighborsRegressor
clf=KNeighborsRegressor(n_neighbors=3)#最近的三个
clf.fit(train_data,train_target)
test_pred=clf.predict(test_data)
score=mean_squared_error(test_target,clf.predict(test_data))
print("KNeighborsRegressor: ",score)
决策树回归模型:
from sklearn.tree import DecisionTreeRegressor
clf=DecisionTreeRegressor()
clf.fit(train_data,train_target)
test_pred=clf.predict(test_data)
score=mean_squared_error(test_target,clf.predict(test_data))
print("DecisionTreeRegressor: ",score)
随机森林回归模型:
from sklearn.ensemble import RandomForestRegressor
clf=RandomForestRegressor(n_estimators=200) #200树
clf.fit(train_data,train_target)
test_pred=clf.predict(test_data)
score=mean_squared_error(test_target,clf.predict(test_data))
print("RandomForestRegressor: ",score)
LightGBM回归模型:
from lightgbm import LGBMRegressor
clf=LGBMRegressor(learning_rate=0.01,\
max_depth=-1,\
n_estimators=5000,\
boosting_type='gbdt',\
random_state=2019,\
objective='regression')
clf.fit(train_data,train_target)
test_pred=clf.predict(test_data)
score=mean_squared_error(test_target,clf.predict(test_data))
print("LGBMRegressor: ",score)