sklearn의 다중 학습 곡선 피팅 모델 (머신 러닝)의 특성과 실행 속도


✌ 케이스 전투 : 여러 모델의 학습 곡선

1. ✌ 관련 라이브러리 가져 오기

from sklearn.ensemble import RandomForestClassifier # 随机森林模型
from sklearn.tree import DecisionTreeClassifier # 决策树
from sklearn.linear_model import LogisticRegression # 逻辑回归
from sklearn.svm import SVC # 支持向量机
from sklearn.naive_bayes import GaussianNB # 朴素贝叶斯
import lightgbm as lgb # lightgbm模型

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import learning_curve # 用于画学习曲线
from sklearn.model_selection import ShuffleSplit # 分割数据集

from time import time # 导入时间模块
import datetime

from sklearn.datasets import load_digits # 手写数字数据集

2. ✌ 그리기 기능 정의

def plot_learning_curve(estimator,title,x,y,ax,ylim=None,cv=None,n_jobs=None):
    train_sizes,train_scores,test_scores=learning_curve(estimator,x,y,cv=cv,n_jobs=n_jobs)
    ax.set_title(title) # 设置每个子图的标题
    if ylim is not None:
        ax.set_ylim(*ylim) # 设置纵坐标的范围
    ax.set_xlabel("training examples") # 设置子图的x轴名称
    ax.set_ylabel("score") # 设置子图的y轴名称
    ax.grid() # 画网格图
    # 横坐标为训练样本数,纵坐标为每折下的分数均值
    ax.plot(train_sizesLin,np.mean(train_scores,axis=1),'o-',color='r',label='train score')
    ax.plot(train_sizes,np.mean(test_scores,axis=1),'o-',color='g',label='test score')
    ax.legend(loc='best') # 设置图例
    return ax

3. ✌ 데이터 준비

data=load_digits() # 加载数据集
x=data.data # 特征矩阵
y=data.target # 标签
# 每张子图的名称
title=['Naive Bayes','DecisionTree','SVM','RandomForest','Logistic','lgb']
# 每个模型
model=[GaussianNB(),DecisionTreeClassifier(),SVC(gamma=0.001),RandomForestClassifier(n_estimators=50),LogisticRegression(C=0.1,solver='lbfgs'),lgb.LGBMClassifier()]
# 定义分割数据集的类
cv=ShuffleSplit(n_splits=50,test_size=0.2,random_state=0)

4. ✌ 그릴 순환 호출 기능

fig,axes=plt.subplots(2,3,figsize=(18,12)) # 定义画布和子图,2行3列
axes=axes.ravel() # 子图数据降维,便于后文引用,否则为二维数组
for ind,title_,estimator in zip(range(len(title)),title,model):
    times=time() # 定义初始时间
    # 调用函数
    plot_learning_curve(estimator,title_,x,y,ax=axes[ind],ylim=[0.7,1.05],n_jobs=4,cv=cv) 
    # 打印各模型的运行时间信息
    print("{:15s}{}".format(title_,datetime.datetime.fromtimestamp(time()-times).strftime("%M:%S:%f")))
plt.show()

여기에 사진 설명 삽입
여기에 사진 설명 삽입

추천

출처blog.csdn.net/m0_47256162/article/details/113763241