Computer vision - python draws multiple ROC lines in a picture

When verifying the quality of the classification algorithm, AUC curves are often used, and when doing comparative experiments of different classification models, it is necessary to draw the AUC curves of different models into a graph.

1. Comparison of small classification models, which can be called directly

Using the same data set as an example, simply get the prediction results of each classification model directly.

from sklearn.datasets import load_breast_cancer
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import train_test_split
import pylab as plt
import warnings; warnings.filterwarnings('ignore')

dataset = load_breast_cancer()
data = dataset.data
target = dataset.target
X_train, X_test, y_train, y_test = train_test_split(data,target,test_size=0.2)
# 模型1调用sklearn中的RandomForestClassifier
rf1 = RandomForestClassifier(n_estimators=5)
rf1.fit(X_train, y_train)
pred1 = rf1.predict_proba(X_test)[:,1]
# 模型2调用sklearn中的ExtraTreesClassifier
rf2 = ExtraTreesClassifier(n_estimators=5)
rf2.fit(X_train, y_train)
pred2 = rf2.predict_proba(X_test)[:,1]

# 画图部分
fpr1, tpr1, threshold1 = metrics.roc_curve(y_test, pred1)       # <class 'numpy.ndarray'> <class 'numpy.ndarray'>
roc_auc1 = metrics.auc(fpr1, tpr1)

fpr2, tpr2, threshold2 = metrics.roc_curve(y_test, pred2)       # <class 'numpy.ndarray'> <class 'numpy.ndarray'>
roc_auc2 = metrics.auc(fpr2, tpr2)

plt.figure(figsize=(6,6))
plt.title('Validation ROC')
plt.plot(fpr1, tpr1, 'b', label = 'RandomForestClassifier AUC = %0.3f' % roc_auc1)
plt.plot(fpr2, tpr2, 'b', label = 'ExtraTreesClassifier AUC = %0.3f' % roc_auc2)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.savefig("filename.png")
plt.show()
plt.close()

insert image description here

2. Large-scale CNN models cannot directly obtain results.

2.1 Run each classification model separately first, and save the predicted results in a csv file.

from sklearn.datasets import load_breast_cancer
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import train_test_split
import pylab as plt
import warnings; warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression   #线性回归
import csv

dataset = load_breast_cancer()
data = dataset.data
target = dataset.target
X_train, X_test, y_train, y_test = train_test_split(data,target,test_size=0.2)
rf1 = RandomForestClassifier(n_estimators=5)
rf1.fit(X_train, y_train)
pred1 = rf1.predict_proba(X_test)[:,1]

dataframe = pd.DataFrame({
    
    'label':y_test,'pred':pred1})
dataframe.to_csv("test1.csv",index=False,sep=',')

rf2 = ExtraTreesClassifier(n_estimators=5)
rf2.fit(X_train, y_train)
pred2 = rf2.predict_proba(X_test)[:,1]

dataframe = pd.DataFrame({
    
    'label':y_test,'pred':pred2})
dataframe.to_csv("test2.csv",index=False,sep=',')


2.2 Read the prediction results of each model from the csv file and draw the AUC curve

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

def Draw_ROC(file1,file2):
    '''这里注意读取csv的编码方式,
    如果csv里有中文,在windows系统上可以直接用encoding='ANSI',
    但是到了Mac或者Linux系统上会报错:`LookupError: unknown encoding: ansi`。
    解决方法:
    1. 可以改成encoding='gbk';
    2. 或者把csv文件里的列名改成英文,就不用选择encoding的方式了。
    '''
    data1=pd.read_csv(file1, encoding='ANSI')
    data1=pd.DataFrame(data1)
    data2=pd.read_csv(file2, encoding='ANSI')
    data2=pd.DataFrame(data2)
    print(list(data1['label']), list(data1['pred']))
    print(list(data2['label']), list(data2['pred']))

    fpr_CSNN,tpr_CSNN,thresholds=roc_curve(list(data1['label']),
                                           list(data1['pred']))
    roc_auc_CSSSNN=auc(fpr_CSNN,tpr_CSNN)

    fpr_NN,tpr_NN,thresholds=roc_curve(list(data2['label']),
                                       list(data2['pred']))
    roc_auc_DL=auc(fpr_NN,tpr_NN)

    font = {
    
    'family': 'Times New Roman',
            'size': 12,
            }
    '''这里很多电脑上也许默认是'DejaVu Sans'格式,但是在写论文时,
    往往需要'Times New Roman'格式,可以参考[这篇教程](https://blog.csdn.net/weixin_43543177/article/details/109723328)
    '''
    sns.set(font_scale=1.2)
    plt.rc('font',family='Times New Roman')

    plt.plot(fpr_NN,tpr_NN,'purple',label='NN_AUC = %0.2f'% roc_auc_DL)
    plt.plot(fpr_CSNN,tpr_CSNN,'blue',label='CSNN_AUC = %0.2f'% roc_auc_CSSSNN)
    plt.legend(loc='lower right',fontsize = 12)
    plt.plot([0,1],[0,1],'r--')
    plt.ylabel('True Positive Rate',fontsize = 14)
    plt.xlabel('Flase Positive Rate',fontsize = 14)
    plt.show()

if __name__=="__main__":
    Draw_ROC('./test1.csv',
             './test2.csv')

insert image description here

Guess you like

Origin blog.csdn.net/everyxing1007/article/details/127879275