数据集如下图所示:
其中x.txt 中,第i行表示第i个样本的两个特征值
y.txt中第i行则表示第i个样本的分类:0或1
现在要求使用logistic回归画出一条分界直线,可以区分两类样本。预期效果如下所示:
方法1 梯度上升法
代价函数
梯度上升优化
方法2 随机梯度上升法
代价函数与梯度上升法相同,但是优化时,只选择一个训练样本进行计算“误差 * 特征”这一因子
即:
方法3 牛顿法
采用如上图这种不断求切线的方法,逼近可以使代价函数导数为0(即:使代价函数得到极值)的theta值。
代价函数
迭代/更新公式
其中:
注:法一法二均将代价函数取负值,为了是与牛顿法对应。
三种方法的比较
将梯度上升法比喻为盲人上山,盲人会根据全局(所有样本),选择theta上升的方向
将随机梯度上升法比喻为猴子上山,猴子只会根据眼下(随机的一个样本),选择theta上升的方向
将牛顿法比喻为有望远镜的人上山,能看到之后的情况,根据全局,选择theta上升的方向,在这里二阶导数(Hessian矩阵)起到了望远镜的效果(在图解法中通过做切线,更快的到达目的点(代价函数导数为0的点))
代码
python实现代码如下:
1 梯度下降
from numpy import *
import matplotlib.pyplot as plt
import random
from matplotlib.pyplot import MultipleLocator
import warnings
def normalization(x):
mu = mean(x, axis=0)
sigma = std(x, axis=0)
return (x - mu) / sigma
def loadDataSet():
f = open('Exam/train/x.txt')
data = []
for line in f:
lineArr = line.strip().split()
data.append([float(lineArr[0]), float(lineArr[1])])
f.close()
f = open('Exam/train/y.txt')
label = []
for line in f:
lineArr = line.strip().split()
label.append(int(float(lineArr[0])))
f.close()
# data归一化 添加1
data = normalization(data)
data1 = []
for i in data:
data1.append([1, i[0], i[1]])
return data1, label
def sigmoid(x):
return 1.0 / (1 + exp(-x))
def gradDescent(data, label):
dataMat = mat(data)
labelMat = mat(label).transpose()
n, m = shape(dataMat) # n samples, m features
theta = mat([[1], [-1], [1]])
alpha = 0.001
maxCycle = 10000
episilon = 0.01
h = sigmoid(dataMat * theta)
error = h - labelMat
precost = (-1) * (labelMat.transpose() * log(h) + (ones(
(n, 1)) - labelMat).transpose() * log(ones((n, 1)) - h))
plt.ion()
xs = [0, 0]
ys = [0, precost[0, 0]]
for k in range(maxCycle):
theta = theta + alpha * (dataMat.transpose() * (-error))
h = sigmoid(dataMat * theta)
error = h - labelMat
cost = (-1) * (labelMat.transpose() * log(h) + (ones(
(n, 1)) - labelMat).transpose() * log(ones((n, 1)) - h))
xs[0] = xs[1]
ys[0] = ys[1]
xs[1] = k + 1
ys[1] = cost[0, 0]
fig = plt.figure(1)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ax = plt.subplot(121)
ax.set_title("cost", fontsize = 8)
ax.plot(xs, ys)
plotResult(data, label, theta, fig)
plt.pause(0.01)
if abs(precost - cost) < episilon:
break
precost = cost
print(k)
return theta
def plotResult(data, label, theta, fig):
dataMat = mat(data)
labelMat = mat(label).transpose()
xcord1 = []
ycord1 = []
xcord0 = []
ycord0 = []
for i in range(shape(data)[0]):
if (label[i]) == 0:
xcord0.append(dataMat[i, 1])
ycord0.append(dataMat[i, 2])
else:
xcord1.append(dataMat[i, 1])
ycord1.append(dataMat[i, 2])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ax = plt.subplot(122)
plt.cla()
ax.scatter(xcord0, ycord0, s=30, c='red', marker='s')
ax.scatter(xcord1, ycord1, s=30, c='green')
x_major_locator = MultipleLocator(1)
y_major_locator = MultipleLocator(2)
ax.xaxis.set_major_locator(x_major_locator)
ax.yaxis.set_major_locator(y_major_locator)
plt.xlim(-3, 3)
plt.ylim(-3, 3)
x = arange(-3, 3, 0.1)
y = (-theta[1, 0] * x - theta[0, 0]) / theta[2, 0]
ax.plot(x, y)
def main():
data1, label = loadDataSet()
theta = gradDescent(data1, label)
print(theta)
plt.ioff()
plt.show()
if __name__ == '__main__':
main()
2 随机梯度下降
from numpy import *
import matplotlib.pyplot as plt
import random
from matplotlib.pyplot import MultipleLocator
import warnings
def normalization(x):
mu = mean(x, axis=0)
sigma = std(x, axis=0)
return (x - mu) / sigma
def loadDataSet():
f = open('Exam/train/x.txt')
data = []
for line in f:
lineArr = line.strip().split()
data.append([float(lineArr[0]), float(lineArr[1])])
f.close()
f = open('Exam/train/y.txt')
label = []
for line in f:
lineArr = line.strip().split()
label.append(int(float(lineArr[0])))
f.close()
# data归一化 添加1
data = normalization(data)
data1 = []
for i in data:
data1.append([1, i[0], i[1]])
return data1, label
def sigmoid(x):
return 1.0 / (1 + exp(-x))
def stocGradDescent(data, label):
dataMat = mat(data)
labelMat = mat(label).transpose()
n, m = shape(dataMat) # n samples, m features
theta = mat([[1], [-1], [1]])
alpha = 0.001
maxCycle = 500
episilon = 0.1
h = sigmoid(dataMat * theta)
error = h - labelMat
precost = (-1) * (labelMat.transpose() * log(h) + (ones(
(n, 1)) - labelMat).transpose() * log(ones((n, 1)) - h))
plt.ion()
xs = [0, 0]
ys = [0, precost[0, 0]]
for k in range(maxCycle):
arr = list(range(n))
while len(arr):
rand = random.randint(0, len(arr) - 1)
arr.pop(rand)
# choose only one sample
theta = theta + alpha * ((-error[rand][0, 0]) * dataMat[rand].transpose())
h = sigmoid(dataMat * theta)
error = h - labelMat
cost = (-1) * (labelMat.transpose() * log(h) + (ones(
(n, 1)) - labelMat).transpose() * log(ones((n, 1)) - h))
xs[0] = xs[1]
ys[0] = ys[1]
xs[1] = k + 1
ys[1] = cost[0, 0]
fig = plt.figure(1)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ax = plt.subplot(121)
ax.set_title("cost", fontsize = 8)
ax.plot(xs, ys)
plotResult(data, label, theta, fig)
plt.pause(0.001)
if abs(precost - cost) < episilon:
break
print(k)
return theta
def plotResult(data, label, theta, fig):
dataMat = mat(data)
labelMat = mat(label).transpose()
xcord1 = []
ycord1 = []
xcord0 = []
ycord0 = []
for i in range(shape(data)[0]):
if (label[i]) == 0:
xcord0.append(dataMat[i, 1])
ycord0.append(dataMat[i, 2])
else:
xcord1.append(dataMat[i, 1])
ycord1.append(dataMat[i, 2])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ax = plt.subplot(122)
plt.cla()
ax.scatter(xcord0, ycord0, s=30, c='red', marker='s')
ax.scatter(xcord1, ycord1, s=30, c='green')
x_major_locator = MultipleLocator(1)
y_major_locator = MultipleLocator(2)
ax.xaxis.set_major_locator(x_major_locator)
ax.yaxis.set_major_locator(y_major_locator)
plt.xlim(-3, 3)
plt.ylim(-3, 3)
x = arange(-3, 3, 0.1)
y = (-theta[1, 0] * x - theta[0, 0]) / theta[2, 0]
ax.plot(x, y)
def main():
data1, label = loadDataSet()
theta = stocGradDescent(data1, label)
print(theta)
plt.ioff()
plt.show()
if __name__ == '__main__':
main()
3. 牛顿法
from numpy import *
import matplotlib.pyplot as plt
import random
from matplotlib.pyplot import MultipleLocator
import warnings
def normalization(x):
mu = mean(x, axis=0)
sigma = std(x, axis=0)
return (x - mu) / sigma
def loadDataSet():
f = open('Exam/train/x.txt')
data = []
for line in f:
lineArr = line.strip().split()
data.append([float(lineArr[0]), float(lineArr[1])])
f.close()
f = open('Exam/train/y.txt')
label = []
for line in f:
lineArr = line.strip().split()
label.append(int(float(lineArr[0])))
f.close()
# data归一化 添加1
data = normalization(data)
data1 = []
for i in data:
data1.append([1, i[0], i[1]])
return data1, label
def sigmoid(x):
return 1.0 / (1 + exp(-x))
def mat2list(x):
xlist = []
for i in range(shape(x)[0]):
xlist.append(x[i, 0])
return xlist
def newtonMethod(data, label):
dataMat = mat(data)
labelMat = mat(label).transpose()
n, m = shape(dataMat) # n samples, m features
theta = mat([[1], [-1], [1]])
alpha = 0.001
maxCycle = 10000
episilon = 0.01
h = sigmoid(dataMat * theta)
error = h - labelMat
precost = (-labelMat.transpose() * log(h) - (ones(
(n, 1)) - labelMat).transpose() * log(ones((n, 1)) - h))
deltaJTheta = (dataMat.transpose() * error) / n
para = multiply(h, ones((n, 1)) - h)
paralist = mat2list(para)
H = dataMat.transpose() * mat(diag(paralist)) * dataMat
plt.ion()
xs = [0, 0]
ys = [0, precost[0, 0]]
for k in range(maxCycle):
theta = theta - linalg.inv(H / n) * deltaJTheta
h = sigmoid(dataMat * theta)
cost = (-labelMat.transpose() * log(h) - (ones(
(n, 1)) - labelMat).transpose() * log(ones((n, 1)) - h))
error = h - labelMat
deltaJTheta = (dataMat.transpose() * error) / n
para = multiply(h, ones((n, 1)) - h)
paralist = mat2list(para)
H = dataMat.transpose() * mat(diag(paralist)) * dataMat
xs[0] = xs[1]
ys[0] = ys[1]
xs[1] = k + 1
ys[1] = cost[0, 0]
fig = plt.figure(1)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ax = plt.subplot(121)
ax.set_title("cost", fontsize = 8)
ax.plot(xs, ys)
plotResult(data, label, theta, fig)
plt.pause(0.2)
if abs(precost - cost) < episilon:
break
precost = cost
print(k)
return theta
def plotResult(data, label, theta, fig):
dataMat = mat(data)
labelMat = mat(label).transpose()
xcord1 = []
ycord1 = []
xcord0 = []
ycord0 = []
for i in range(shape(data)[0]):
if (label[i]) == 0:
xcord0.append(dataMat[i, 1])
ycord0.append(dataMat[i, 2])
else:
xcord1.append(dataMat[i, 1])
ycord1.append(dataMat[i, 2])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
ax = plt.subplot(122)
plt.cla()
ax.scatter(xcord0, ycord0, s=30, c='red', marker='s')
ax.scatter(xcord1, ycord1, s=30, c='green')
x_major_locator = MultipleLocator(1)
y_major_locator = MultipleLocator(2)
ax.xaxis.set_major_locator(x_major_locator)
ax.yaxis.set_major_locator(y_major_locator)
plt.xlim(-3, 3)
plt.ylim(-3, 3)
x = arange(-3, 3, 0.1)
y = (-theta[1, 0] * x - theta[0, 0]) / theta[2, 0]
ax.plot(x, y)
def main():
data1, label = loadDataSet()
theta = newtonMethod(data1, label)
print(theta)
plt.ioff()
plt.show()
if __name__ == '__main__':
main()