今天参考github的项目进行分类实现,使用的是课程课后练习数据
使用的是随机梯度下降,
的选择和遍历次数设置,还是复杂一点。原 文章用的数据分类清晰,效果比较好,课后习题的数据分类比较模糊,如果输出的不是’0‘、’1’,而是
。要达到较好的效果(收敛)需要遍历350次,如果只是画边界线的话,30次就不错了。
下图是遍历30次的图像,明显没有收敛。
但是边界线的效果已经可以了:(边界线函数
)
from sklearn.datasets.samples_generator import make_blobs
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from math import exp, log
import math
def Nomalize_data(data):
for i in range(data.shape[1]):
data[:,i]=(data[:,i]-data[:,i].mean())/data[:,i].std()
return data
def sigmoid_function(z):
g = 1 / (1 + exp(-z))
return g
def hypothesis(x, THETA):
hypothesis = np.matmul(THETA.T, x)
hypothesis = sigmoid_function(hypothesis[0])
return hypothesis
def compute_loss(X, Y, THETA):
loss = 0
for x, y in zip(X, Y):
h_x = hypothesis(x, THETA)
# if h_x == 1 --> log(1-1) --> error
if h_x == 1:
h_x = 1-0.0000000000001
loss += (-y) *(log(h_x) - (1-y) *log(1-h_x))
return loss/(X.shape[0])
def update_parameters(THETA, LR, y, h_x, x):
x = np.reshape(x, THETA.shape)
THETA = THETA + LR *(y - h_x) * x
return THETA
if __name__ == '__main__':
# 数据导入及处理
LR = 0.003
EPOCH = 30
data=np.loadtxt('ex2data1.txt',delimiter=',')
X=data[:,:2]
Y=data[:,2]
X_train=Nomalize_data(X)
# X_train=X
Y_train=Y
o_train = np.ones([X_train.shape[0], 1], dtype=X_train.dtype)
X_train = np.concatenate((o_train, X_train), axis=1)
H_train = np.zeros([Y_train.shape[0], 1], dtype=Y_train.dtype)
THETA = np.random.normal(0, 0.1, 3).reshape(3, 1) # learnable parameters
#主要计算,及J函数图像
plt.figure(0)
cost=[]
for epoch in range(EPOCH):
i = 0 # retrieve H_x
for x, y in zip(X_train, Y_train):
loss = compute_loss(X_train, Y_train, THETA)
H_train[i] = hypothesis(x, THETA)
THETA = update_parameters(THETA, LR, y, H_train[i], x)
cost.append(loss)
i+=1
print(THETA)
plt.scatter(range(len(cost)),cost)
plt.show()
#边界图
plt.figure(1)
x = np.linspace(-2,1.5, 50)
y = (log(1,math.e)-THETA[0]-THETA[1]*x)/THETA[2]
plt.plot(x,y)
plt.scatter(X_train[:, 1], X_train[:, 2], c=Y_train, edgecolors='white', marker='s')
print(X_train[50:60, 1], X_train[50:60, 2], H_train[50:60, 0])
plt.show()