决策树学习笔记2(Python)

一、导入数据

1、库

%matplotlib inline

import matplotlib.pyplot as plt

import pandas as pd

2、sklearn导入数据

from sklearn.datasets.california_housing import fetch_california_housing
housing = fetch_california_housing()
print(housing.DESCR)

3、建树

from sklearn import tree
dtr = tree.DecisionTreeRegressor(max_depth = 2)
dtr.fit(housing.data[:,[6,7]], housing.target)

4、可视化

# 可视化显示, 先安装graphviz
dot_data = \
    tree.export_graphviz(
        dtr, #构造的对象
        out_file = None,
        feature_names = housing.feature_names[6:8], #特征名字传进来
        filled = True,
        impurity = False,
        rounded = True
    )
# pip install pydotplus
import pydotplus
graph = pydotplus.graph_from_dot_data(dot_data)
graph.get_nodes()[7].set_fillcolor("#FFF2DD")
from IPython.display import Image
Image(graph.create_png())

                                     

5、参数选择:

                                  

from sklearn.model_selection import train_test_split
data_train, data_test, target_train, target_test = \
    train_test_split(housing.data, housing.target, test_size = 0.1, random_state = 42) #test_size:测试取10% random_state:生成随机数的种子
dtr = tree.DecisionTreeRegressor(random_state = 42)
dtr.fit(data_train, target_train)

dtr.score(data_test, target_test)
from sklearn.ensemble import RandomForestRegressor
rfr = RandomForestRegressor(random_state = 42)
rfr.fit(data_train, target_train)
rfr.score(data_test, target_test)
from sklearn.model_selection import GridSearchCV #遍历参数,选择最优参数
tree_param_grid = {'min_samples_split': list((3,6,9)), 'n_estimators': list((10,50,100))}#字典
grid = GridSearchCV(RandomForestRegressor(),param_grid=tree_param_grid, cv=5, return_train_score=True)#cv:交叉验证
grid.fit(data_train, target_train)
grid.cv_results_, grid.best_params_, grid.best_score_

猜你喜欢

转载自blog.csdn.net/m0_37712157/article/details/81126448
今日推荐