最大似然估计+最小错误率贝叶斯决策实现二分类

最大似然估计+最小错误率贝叶斯决策实现二分类,以区分数字5,8为例,进行了模型的训练和测试。

代码如下 :

import cv2
import numpy as np

def grid_feature(im,grid=(9,16)):
    gh=grid[0]
    gw=grid[1]
    h,w = im.shape
    _,im = cv2.threshold(im, 128, 255, cv2.THRESH_BINARY)
    sh = h // gh
    sw = h // gw
    gridFeat = np.zeros((gh*gw))
    for i in range(gh):
        for j in range(gw):
            tmp = im[sh * i:sh * (i+1) + 1, sw*j: sw * (j+1) +1]
            gridFeat[i * gw + j] = np.sum(tmp)
    gridFeat = gridFeat / (np.sum(gridFeat))
    return gridFeat

nGrids = (3, 3)
nDim = nGrids[0] * nGrids[1]
n_train = 40
n_test = 10
X_pos = np.zeros((n_train, nDim))
X_neg = np.zeros((n_train, nDim))
X_test = np.zeros((n_test*2, nDim))
y_train = np.append(np.ones(n_train), -np.ones(n_train))
y_test = np.append(np.ones(n_test), -np.ones(n_test))
y_predict = np.zeros(2*n_test)
# 组织训练集
for i in range(1, n_train+1):
    im = cv2.imread("E:/data/5/{}.jpg".format(i), 0)
    aFeat = grid_feature(im, nGrids)
    X_pos[i-1] = aFeat

for i in range(1, n_train+1):
    im = cv2.imread("E:/data/8/{}.jpg".format(i), 0)
    aFeat = grid_feature(im, nGrids)
    X_neg[i-1] = aFeat

# 组织测试集
for cat in range(2):
    for i in range(1, n_test+1):
        im = cv2.imread("E:/data/{}/{}.jpg".format(5 if cat == 0 else 8, i+n_train), 0)
        aFeat = grid_feature(im, nGrids)
        X_test[cat * n_test + i-1] = aFeat

# MLE,predicted labels should be put into y_predict for evaluation
prior_pos = np.sum(y_train == 1) / len(y_train)
prior_neg = np.sum(y_train == -1) / len(y_train)
def multivariate_gaussian(x, mean, cov):
    d = len(x)
    det_cov = np.linalg.det(cov)
    inv_cov = np.linalg.inv(cov)
    diff = x - mean
    exponent = -0.5 * np.dot(diff, np.dot(inv_cov, diff))
    norm_const = 1 / ((2 * np.pi) ** (d / 2) * np.sqrt(det_cov))
    return norm_const * np.exp(exponent)


# 最大似然估计部分
mean_pos = np.mean(X_pos, axis=0)
mean_neg = np.mean(X_neg, axis=0)
cov_pos = np.cov(X_pos, rowvar=False)
cov_neg = np.cov(X_neg, rowvar=False)

# 最小错误率贝叶斯决策部分
for i in range(2 * n_test):
    x = X_test[i]
    p_pos = multivariate_gaussian(x, mean_pos, cov_pos) * prior_pos
    p_neg = multivariate_gaussian(x, mean_neg, cov_neg) * prior_neg
    if p_pos > p_neg:
        y_predict[i] = 1
    else:
        y_predict[i] = -1


acc = 1 - np.sum(np.abs(y_test - y_predict)) / (2*2*n_test)
print("正确率", acc)