最大似然估计+最小错误率贝叶斯决策实现二分类,以区分数字5,8为例,进行了模型的训练和测试。
代码如下 :
import cv2
import numpy as np
def grid_feature(im,grid=(9,16)):
gh=grid[0]
gw=grid[1]
h,w = im.shape
_,im = cv2.threshold(im, 128, 255, cv2.THRESH_BINARY)
sh = h // gh
sw = h // gw
gridFeat = np.zeros((gh*gw))
for i in range(gh):
for j in range(gw):
tmp = im[sh * i:sh * (i+1) + 1, sw*j: sw * (j+1) +1]
gridFeat[i * gw + j] = np.sum(tmp)
gridFeat = gridFeat / (np.sum(gridFeat))
return gridFeat
nGrids = (3, 3)
nDim = nGrids[0] * nGrids[1]
n_train = 40
n_test = 10
X_pos = np.zeros((n_train, nDim))
X_neg = np.zeros((n_train, nDim))
X_test = np.zeros((n_test*2, nDim))
y_train = np.append(np.ones(n_train), -np.ones(n_train))
y_test = np.append(np.ones(n_test), -np.ones(n_test))
y_predict = np.zeros(2*n_test)
# 组织训练集
for i in range(1, n_train+1):
im = cv2.imread("E:/data/5/{}.jpg".format(i), 0)
aFeat = grid_feature(im, nGrids)
X_pos[i-1] = aFeat
for i in range(1, n_train+1):
im = cv2.imread("E:/data/8/{}.jpg".format(i), 0)
aFeat = grid_feature(im, nGrids)
X_neg[i-1] = aFeat
# 组织测试集
for cat in range(2):
for i in range(1, n_test+1):
im = cv2.imread("E:/data/{}/{}.jpg".format(5 if cat == 0 else 8, i+n_train), 0)
aFeat = grid_feature(im, nGrids)
X_test[cat * n_test + i-1] = aFeat
# MLE,predicted labels should be put into y_predict for evaluation
prior_pos = np.sum(y_train == 1) / len(y_train)
prior_neg = np.sum(y_train == -1) / len(y_train)
def multivariate_gaussian(x, mean, cov):
d = len(x)
det_cov = np.linalg.det(cov)
inv_cov = np.linalg.inv(cov)
diff = x - mean
exponent = -0.5 * np.dot(diff, np.dot(inv_cov, diff))
norm_const = 1 / ((2 * np.pi) ** (d / 2) * np.sqrt(det_cov))
return norm_const * np.exp(exponent)
# 最大似然估计部分
mean_pos = np.mean(X_pos, axis=0)
mean_neg = np.mean(X_neg, axis=0)
cov_pos = np.cov(X_pos, rowvar=False)
cov_neg = np.cov(X_neg, rowvar=False)
# 最小错误率贝叶斯决策部分
for i in range(2 * n_test):
x = X_test[i]
p_pos = multivariate_gaussian(x, mean_pos, cov_pos) * prior_pos
p_neg = multivariate_gaussian(x, mean_neg, cov_neg) * prior_neg
if p_pos > p_neg:
y_predict[i] = 1
else:
y_predict[i] = -1
acc = 1 - np.sum(np.abs(y_test - y_predict)) / (2*2*n_test)
print("正确率", acc)