习题6.1
题目:确认逻辑斯谛分布属于指数分布族.
习题6.2
题目:写出逻辑斯谛回归模型学习的梯度下降算法.
习题6.3
题目:写出最大熵模型学习的DFP算法.(关于一般的DFP算法参见附录B)
解:这个DFP算法可参考书本附录B
对于习题6.2的逻辑斯蒂回归算法,这里用python自编程实现(学习算法采用梯度下降法)
梯度下降法:
"""
python自编程实现逻辑斯谛回归
学习方法:梯度下降法
任务:根据正负实例数据集,分类正负实例点
"""
import numpy as np
import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
class LogisticRegression:
def __init__(self, lr=0.1, max_iter=10000, tol=1e-2):
self.lr = lr
self.max_iter = max_iter
self.tol = tol
self.w = None
def preprocessing(self, x):
"""
因为公式中将权值向量和输入向量进行了扩充
w后面补个b, x后面补个1(这里对x补就行,因为w还没确定)
"""
row = x.shape[0]
add = np.ones(row).reshape(row, 1)
x_last = np.hstack((x, add))
return x_last
def sigmoid(self, x):
return 1/(1+np.exp(-x))
def fit(self, x_train, y_train):
x = self.preprocessing(x_train)
y = y_train.T
#初始化权重
self.w = np.array([[0]*x.shape[1]], dtype=np.float)
#用来记录迭代次数
k = 0
for loop in range(self.max_iter):
z = np.dot(x, self.w.T)
grad = x*(y - self.sigmoid(z))
grad = grad.sum(axis=0)
# 利用梯度的绝对值作为迭代中止的条件
if (np.abs(grad) <= self.tol).all():
break
else:
#因为是最大化似然函数,所以这里用梯度上升求极大值,所以加
self.w += self.lr*grad
k += 1
print("迭代次数:{}次".format(k))
print("最终梯度:{}".format(grad))
print("最终权重:{}".format(self.w))
def predict(self, x):
p = self.sigmoid(np.dot(self.preprocessing(x), self.w.T))
p[np.where(p > 0.5)] = 1
p[np.where(p < 0.5)] = 0
return p
def draw(self, X, y):
# 分离正负实例点
y = y[0]
X_po = X[np.where(y == 1)]
X_ne = X[np.where(y == 0)]
# 绘制数据集散点图
ax = plt.axes(projection='3d')
x_1 = X_po[0, :]
y_1 = X_po[1, :]
z_1 = X_po[2, :]
x_2 = X_ne[0, :]
y_2 = X_ne[1, :]
z_2 = X_ne[2, :]
ax.scatter(x_1, y_1, z_1, c="r", label="正实例")
ax.scatter(x_2, y_2, z_2, c="b", label="负实例")
ax.legend(loc='best')
# 绘制p=0.5的区分平面
x = np.linspace(-3, 3, 3)
y = np.linspace(-3, 3, 3)
x_3, y_3 = np.meshgrid(x, y)
a, b, c, d = self.w[0]
z_3 = -(a * x_3 + b * y_3 + d) / c
ax.plot_surface(x_3, y_3, z_3, alpha=0.5) # 调节透明度
plt.show()
def main():
start = time.time()
#3个正实例点,3个负实例点
x_train = np.array([[3, 3, 3], [4, 3, 2], [2, 1, 2], [1, 1, 1], [-1, 0, 1], [2, -2, 1]])
y_train = np.array([[1, 1, 1, 0, 0, 0]])
#训练
clf = LogisticRegression()
clf.fit(x_train, y_train)
#预测
x_test = np.array([[1, 2, -2]])
y_predict = clf.predict(x_test)
print("{}被分类为:{}".format(x_test[0], y_predict[0]))
clf.draw(x_train, y_train)
end = time.time()
print("用时:{:.3f}s".format(end - start))
if __name__ == '__main__':
main()
随机梯度下降法:
因为梯度下降法中每次更新梯度都要遍历一变训练集计算,在训练集较大的情况下会很慢。这里以批量大小为一个样本为例
"""
python自编程实现逻辑斯谛回归
学习方法:随机梯度下降法(批量大小为一个样本)
任务:根据正负实例数据集,分类正负实例点
"""
import numpy as np
import time
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
class LogisticRegression:
def __init__(self,lr=0.1,max_iter=10000,tol=1e-3):
self.lr = lr #学习率
self.max_iter=max_iter #迭代次数
self.tol=tol #迭代停止阈值
self.w=None #权重
def preprocessing(self,X):
"""将原始X末尾加上一列,该列数值全部为1"""
row=X.shape[0]
y=np.ones(row).reshape(row, 1)
X_prepro =np.hstack((X,y))
return X_prepro
def sigmod(self,x):
return 1/(1+np.exp(-x))
def fit(self,X_train,y_train):
X=self.preprocessing(X_train)
y=y_train.T
#初始化权重w
self.w=np.array([[0]*X.shape[1]],dtype=np.float)
i=0
k=0
for loop in range(self.max_iter):
# 计算梯度(每次只用一个样本点)
z=np.dot(X[i],self.w.T)
grad=X[i]*(y[i]-self.sigmod(z))
# 利用梯度的绝对值作为迭代中止的条件
if (np.abs(grad)<=self.tol).all():
break
else:
# 更新权重w 梯度上升——求极大值
self.w+=self.lr*grad
k+=1
i=(i+1)%X.shape[0]
print("迭代次数:{}次".format(k))
print("最终梯度:{}".format(grad))
print("最终权重:{}".format(self.w[0]))
def predict(self,x):
p=self.sigmod(np.dot(self.preprocessing(x),self.w.T))
p[np.where(p>0.5)]=1
p[np.where(p<0.5)]=0
return p
def draw(self,X,y):
# 分离正负实例点
y=y[0]
X_po=X[np.where(y==1)]
X_ne=X[np.where(y==0)]
# 绘制数据集散点图
ax=plt.axes(projection='3d')
x_1=X_po[0,:]
y_1=X_po[1,:]
z_1=X_po[2,:]
x_2=X_ne[0,:]
y_2=X_ne[1,:]
z_2=X_ne[2,:]
ax.scatter(x_1,y_1,z_1,c="r",label="正实例")
ax.scatter(x_2,y_2,z_2,c="b",label="负实例")
ax.legend(loc='best')
# 绘制p=0.5的区分平面
x = np.linspace(-3,3,3)
y = np.linspace(-3,3,3)
x_3, y_3 = np.meshgrid(x,y)
a,b,c,d=self.w[0]
z_3 =-(a*x_3+b*y_3+d)/c
ax.plot_surface(x_3,y_3,z_3,alpha=0.5) #调节透明度
plt.show()
def main():
start=time.time()
# 训练数据集
X_train=np.array([[3,3,3],[4,3,2],[2,1,2],[1,1,1],[-1,0,1],[2,-2,1]])
y_train=np.array([[1,1,1,0,0,0]])
# 构建实例,进行训练
clf=LogisticRegression()
clf.fit(X_train,y_train)
# 预测新数据
X_new=np.array([[1,2,-2]])
y_predict=clf.predict(X_new)
print("{}被分类为:{}".format(X_new[0],y_predict[0]))
clf.draw(X_train,y_train)
end=time.time()
print("用时:{:.3f}s".format(end-start))
if __name__=="__main__":
main()
调用sklearn模块实现牛顿法,拟牛顿法,梯度下降(只需修改个参数即可更改学习算法):
"""
逻辑斯蒂回归算法实现(牛顿法,拟牛顿法,梯度下降法)-调用sklearn模块
"""
from sklearn.linear_model import LogisticRegression
import numpy as np
def main():
# 训练数据集
X_train=np.array([[3,3,3],[4,3,2],[2,1,2],[1,1,1],[-1,0,1],[2,-2,1]])
y_train=np.array([1,1,1,0,0,0])
# 选择不同solver,构建实例,进行训练、测试
methodes=["liblinear","newton-cg","lbfgs","sag","saga"]
res=[]
X_new = np.array([[1, 2, -2]])
for method in methodes:
clf=LogisticRegression(solver=method,intercept_scaling=2,max_iter=1000)
clf.fit(X_train,y_train)
# 预测新数据
y_predict=clf.predict(X_new)
#利用已有数据对训练模型进行评价
X_test=X_train
y_test=y_train
correct_rate=clf.score(X_test,y_test)
res.append((y_predict,correct_rate))
# 格式化输出
methodes=["liblinear","newton-cg","lbfgs ","sag ","saga "]
print("solver选择: {}".format(" ".join(method for method in methodes)))
print("{}被分类为: {}".format(X_new[0]," ".join(str(re[0]) for re in res)))
print("测试{}组数据,正确率: {}".format(X_train.shape[0]," ".join(str(round(re[1],1)) for re in res)))
if __name__=="__main__":
main()
关于用逻辑斯蒂回归与最大熵实现mnist数据集分类,
请参考链接:
李航《统计学习方法》第2版 第6章 Python编程逻辑斯谛回归与最大熵模型 实现mnist数据集分类