✌ 케이스 전투 : 여러 모델의 학습 곡선
1. ✌ 관련 라이브러리 가져 오기
from sklearn.ensemble import RandomForestClassifier # 随机森林模型
from sklearn.tree import DecisionTreeClassifier # 决策树
from sklearn.linear_model import LogisticRegression # 逻辑回归
from sklearn.svm import SVC # 支持向量机
from sklearn.naive_bayes import GaussianNB # 朴素贝叶斯
import lightgbm as lgb # lightgbm模型
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve # 用于画学习曲线
from sklearn.model_selection import ShuffleSplit # 分割数据集
from time import time # 导入时间模块
import datetime
from sklearn.datasets import load_digits # 手写数字数据集
2. ✌ 그리기 기능 정의
def plot_learning_curve(estimator,title,x,y,ax,ylim=None,cv=None,n_jobs=None):
train_sizes,train_scores,test_scores=learning_curve(estimator,x,y,cv=cv,n_jobs=n_jobs)
ax.set_title(title) # 设置每个子图的标题
if ylim is not None:
ax.set_ylim(*ylim) # 设置纵坐标的范围
ax.set_xlabel("training examples") # 设置子图的x轴名称
ax.set_ylabel("score") # 设置子图的y轴名称
ax.grid() # 画网格图
# 横坐标为训练样本数,纵坐标为每折下的分数均值
ax.plot(train_sizesLin,np.mean(train_scores,axis=1),'o-',color='r',label='train score')
ax.plot(train_sizes,np.mean(test_scores,axis=1),'o-',color='g',label='test score')
ax.legend(loc='best') # 设置图例
return ax
3. ✌ 데이터 준비
data=load_digits() # 加载数据集
x=data.data # 特征矩阵
y=data.target # 标签
# 每张子图的名称
title=['Naive Bayes','DecisionTree','SVM','RandomForest','Logistic','lgb']
# 每个模型
model=[GaussianNB(),DecisionTreeClassifier(),SVC(gamma=0.001),RandomForestClassifier(n_estimators=50),LogisticRegression(C=0.1,solver='lbfgs'),lgb.LGBMClassifier()]
# 定义分割数据集的类
cv=ShuffleSplit(n_splits=50,test_size=0.2,random_state=0)
4. ✌ 그릴 순환 호출 기능
fig,axes=plt.subplots(2,3,figsize=(18,12)) # 定义画布和子图,2行3列
axes=axes.ravel() # 子图数据降维,便于后文引用,否则为二维数组
for ind,title_,estimator in zip(range(len(title)),title,model):
times=time() # 定义初始时间
# 调用函数
plot_learning_curve(estimator,title_,x,y,ax=axes[ind],ylim=[0.7,1.05],n_jobs=4,cv=cv)
# 打印各模型的运行时间信息
print("{:15s}{}".format(title_,datetime.datetime.fromtimestamp(time()-times).strftime("%M:%S:%f")))
plt.show()