Softmax回归简介
Softmax回归是一种多分类模型,也称作多类Logistic回归。在NLP中,其与最大熵模型等价。他是一种广泛使用的分类算法,常常作为深度学习分类模型的最后一层执行分类预测。
模型假设
概率模型
当样本特征为x时,预测其为第j分类的概率采用softmax函数,即:当前预测的指数值与所有预测指数值的比值。
对数似然函数
通俗点说,即是每个样本在当前theta下预测正确的概率的对数和(编程时我打印了h矩阵,每个样本预测正确的概率都趋近于1时但对数似然函数依然在不停的增长,不过增长的速率会减慢)
梯度上升优化
随机梯度上升优化
对数似然函数求导过程及结果
简化后得到
可视化:根据theta矩阵,使得两两 θ T x \theta^Tx θTx相等(即概率相等)即可得到两两种类的边界,先在原基础上补充简单可视化部分。
python实现代码如下所示(添加简单可视化):
from numpy import *
import matplotlib.pyplot as plt
import random
def normalization(x):
mu = mean(x, axis=0)
sigma = std(x, axis=0)
return (x - mu) / sigma
def softmax(mat):
expsum = sum(exp(mat), axis = 1)
return exp(mat) / expsum
def calError(h, label, type):
ans = zeros((shape(h)[0], 1))
for i in range(shape(h)[0]):
if label[i, 0] == type:
ans[i, 0] = 1 - h[i, type]
else:
ans[i, 0] = -h[i, type]
return ans
def loadTrainSet():
f = open('Iris/train/x.txt')
data = []
for line in f:
lineArr = line.strip().split()
data.append([float(lineArr[0]), float(lineArr[1])])
f.close()
f = open('Iris/train/y.txt')
label = []
for line in f:
lineArr = line.strip().split()
label.append(int(float(lineArr[0])))
f.close()
data = normalization(data)
data1 = []
for i in data:
data1.append([1, i[0], i[1]])
return data1, label
def loadTestSet():
f = open('Iris/test/x.txt')
data = []
for line in f:
lineArr = line.strip().split()
data.append([float(lineArr[0]), float(lineArr[1])])
f.close()
f = open('Iris/test/y.txt')
label = []
for line in f:
lineArr = line.strip().split()
label.append(int(float(lineArr[0])))
f.close()
data = normalization(data)
data1 = []
for i in data:
data1.append([1, i[0], i[1]])
return data1, label
def getTestPredict(theta, testdata):
testdataMat = mat(testdata)
htest = testdataMat * theta
label = argmax(htest, axis=1)
return label
def getCorrectRate(predictlabel, testlabel):
correctnum = 0
for i in range(shape(testlabel)[0]):
if predictlabel[i, 0] == testlabel[i, 0]:
correctnum = correctnum + 1
return correctnum / shape(testlabel)[0]
def calLikelihood(h, labelMat):
ans = 0
for i in range(shape(h)[0]):
ans = ans + log(h[i, labelMat[i, 0]])
return ans
def softmaxRegression(data, label):
dataMat = mat(data)
labelMat = mat(label).transpose()
n, m = shape(dataMat) # n samples, m features
theta = zeros((m, 4)) # m features, 3 tpyes + 1
alpha = 0.001
maxCycle = 10000
episilon = 0.0005
preLikelihood = 0.0
for k in range(maxCycle):
h = softmax(dataMat * theta)
likelihood = calLikelihood(h, labelMat)
if abs(likelihood - preLikelihood) < episilon:
break
preLikelihood = likelihood
for i in range(shape(h)[1]):
delta = alpha * dataMat.transpose() * calError(h, labelMat, i)
theta[:, i] = theta[:, i] + delta.transpose()
print(k)
return theta
def stocSoftmaxRegression(data, label):
dataMat = mat(data)
labelMat = mat(label).transpose()
n, m = shape(dataMat) # n samples, m features
theta = zeros((m, 4)) # m features, 3 tpyes + 1
alpha = 0.001
maxCycle = 50000
episilon = 1e-7
preLikelihood = 0.0
for k in range(maxCycle):
h = softmax(dataMat * theta)
likelihood = calLikelihood(h, labelMat)
if abs(likelihood - preLikelihood) < episilon:
break
preLikelihood = likelihood
# choose one sample only
rand = random.randint(0, n - 1)
for i in range(shape(h)[1]):
if labelMat[rand, 0] == i:
delta = alpha * (1 - h[rand, i]) * dataMat[rand]
else:
delta = alpha * (-h[rand, i]) * dataMat[rand]
theta[:, i] = theta[:, i] + delta
print(k)
return theta
def plotBestFit(fig, data, label, theta, name, subplot):
dataMat = mat(data)
labelMat = mat(label).transpose()
xcord0 = []; ycord0 = []
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []
for i in range(shape(data)[0]):
if label[i] == 0:
xcord0.append(dataMat[i, 1])
ycord0.append(dataMat[i, 2])
elif label[i] == 1:
xcord1.append(dataMat[i, 1])
ycord1.append(dataMat[i, 2])
elif label[i] == 2:
xcord2.append(dataMat[i, 1])
ycord2.append(dataMat[i, 2])
ax = fig.add_subplot(subplot)
ax.set_title(name, fontsize=8)
ax.scatter(xcord0, ycord0, s = 30, c = 'red')
ax.scatter(xcord1, ycord1, s = 30, c = 'green')
ax.scatter(xcord2, ycord2, s = 30, c = 'blue')
plotBoundary(theta[0, 0] - theta[0, 1], theta[1, 0] - theta[1, 1], theta[2,0] - theta[2, 1], "red-green")
plotBoundary(theta[0, 0] - theta[0, 2], theta[1, 0] - theta[1, 2], theta[2,0] - theta[2, 2], "red-blue")
plotBoundary(theta[0, 1] - theta[0, 2], theta[1, 1] - theta[1, 2], theta[2,1] - theta[2, 2], "green-blue")
def plotBoundary(para0, para1, para2, name):
x = arange(-3, 3, 0.1)
y = (-para1 * x - para0) / para2
plt.plot(x, y, label = name)
plt.legend()
def main():
fig = plt.figure()
data1, label = loadTrainSet()
testdata, testlabel = loadTestSet()
print("SoftmaxRegression:")
print("theta:")
theta1 = softmaxRegression(data1, label)
print(theta1)
print("to TestDataSet:")
predictlabel1 = getTestPredict(theta1, testdata)
print("accuracy:")
print(getCorrectRate(predictlabel1, mat(testlabel).transpose()))
plotBestFit(fig, testdata, testlabel, theta1, "Softmax, ToTestDataSet", 221)
print("to TrainDataSet:")
predictlabel2 = getTestPredict(theta1, data1)
print("accuracy:")
print(getCorrectRate(predictlabel2, mat(label).transpose()))
plotBestFit(fig, data1, label, theta1, "Softmax, ToTrainDataSet", 222)
print("stocSoftmaxRegression:")
print("theta")
theta2 = stocSoftmaxRegression(data1, label)
print(theta2)
print("to TestDataSet:")
predictlabel1 = getTestPredict(theta2, testdata)
print("accuracy:")
print(getCorrectRate(predictlabel1, mat(testlabel).transpose()))
plotBestFit(fig, testdata, testlabel, theta2, "stocSoftmax, ToTestDataSet", 223)
print("to TrainDataSet:")
predictlabel2 = getTestPredict(theta2, data1)
print("accuracy:")
print(getCorrectRate(predictlabel2, mat(label).transpose()))
plotBestFit(fig, data1, label, theta2, "stocSoftmax, toTrainDataSet", 224)
plt.show()
if __name__=='__main__':
main()
运行结果
结果显示,随机梯度上升需要更多次的迭代更新才能达到梯度上升的预测准确率。
简单可视化运行结果:
基本符合预期。
PS:训练集和测试集文件见评论