对分类器而言,我们希望它做对两件事:一是用类标签标记真正属于该类的对象,二是不用这个标签去标记不属于此类的对象。这两件事对应得到的正确计数值分别称为真阳(true positive)和真阴(true negative)。真阳、假阳、真阴、假阴这 4 个数合并成一个 2 × 2 矩阵,称为误差矩阵或混淆矩阵
import pandas as pd
import numpy as np
y_true = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
y_pred = np.array([0, 0, 1, 1, 1, 1, 1, 0, 0, 0])
# 计算模型得到的正确结果
true_positives = ((y_pred == y_true) & (y_pred == 1)).sum()
print(true_positives)
true_negatives = ((y_pred == y_true) & (y_pred == 0)).sum()
print(true_negatives)
# 计算模型得到的错误结果
false_positives = ((y_pred != y_true) & (y_pred == 1)).sum()
print(false_positives)
false_negatives = ((y_pred != y_true) & (y_pred == 0)).sum()
print(false_negatives)
# 误差矩阵或混淆矩阵
confusion = [[true_positives, false_positives],
[false_negatives, true_negatives]]
print(confusion)
confusion = pd.DataFrame(confusion, columns=[1, 0], index=[1, 0])
confusion.index.name = r'pred \ truth'
print(confusion)
# 正确率
precision = true_positives / (true_positives + false_positives)
print(precision)
# 召回率也被称为灵敏度、真阳率或查全率
recall = true_positives / (true_positives + false_negatives)
print(recall)