西瓜3.0a数据集如下:
密度 含糖量 好瓜
0.6792 0.46 1
0.774 0.376 1
0.634 0.264 1
0.608 0.318 1
0.556 0.215 1
0.403 0.237 1
0.481 0.149 1
0.437 0.211 1
0.666 0.091 0
0.243 0.267 0
0.245 0.057 0
0.343 0.099 0
0.639 0.161 0
0.657 0.198 0
0.36 0.37 0
0.593 0.042 0
0.719 0.103 0
由‘机器学习第三章’中的推导公式,将其转换为代码如下
将转化为如下代码:
def P1(x_data,beta):
per1=np.exp(np.dot(beta.T,x_data))/(1+np.exp(np.dot(beta.T,x_data)))
return per1
将转化如下:
def derivative(x_data,y_data,beta):
sum1 = 0
sum2 = 0
for x,y in zip(x_data,y_data):
xi = np.append(x,1)
sum1 -=xi*(y-P1(xi,beta))#(3.30)
sum2 +=xi*xi.T*P1(xi,beta)*(1-P1(xi,beta)) #(3.31)
#print(sum1/sum2)
return sum1/sum2
将最后的牛顿算法进行转换
def Newton_method(x_data,y_data,iterations,alpha=1):
print('start')
w = np.zeros(x_data.shape[1])
b = 0
old_beta=beta = np.append(w,b)
for i in range(iterations):
beta = old_beta- alpha*derivative(x_data,y_data,old_beta) #(3.29)
#beta值判定
if converge(old_beta,beta):
print(i)
break
old_beta = beta
print('end')
return beta
中间添加了beta值判定,代码如下:当误差降到0.001后停止
def converge(old_beta,beta):
sub = beta - old_beta
for i,j in zip(sub,beta):
if j != 0 and abs(i/j) >= 0.001:
return False
return True
全部工程代码如下:
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 21 21:58:19 2018
@author: Zch
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#load dataset
df_train = pd.read_csv('wm3.0a.csv')
X = np.array(df_train[['密度','含糖量']])
y = np.array(df_train['好瓜'])
def Newton_method(x_data,y_data,iterations,alpha=1):
#(3.29)P59
print('start')
w = np.zeros(x_data.shape[1])
b = 0
old_beta=beta = np.append(w,b)
for i in range(iterations):
beta = old_beta- alpha*derivative(x_data,y_data,old_beta) #(3.29)
if converge(old_beta,beta):
print(i)
break
old_beta = beta
print('end')
return beta
def converge(old_beta,beta):
sub = beta - old_beta
for i,j in zip(sub,beta):
if j != 0 and abs(i/j) >= 0.001:
return False
return True
def derivative(x_data,y_data,beta):
#(3.30)(3.31) P60
sum1 = 0
sum2 = 0
for x,y in zip(x_data,y_data):
xi = np.append(x,1)
sum1 -=xi*(y-P1(xi,beta))#(3.30)
sum2 +=xi*xi.T*P1(xi,beta)*(1-P1(xi,beta)) #(3.31)
#print(sum1/sum2)
return sum1/sum2
def P1(x_data,beta):
#(3.23) P59
per1=np.exp(np.dot(beta.T,x_data))/(1+np.exp(np.dot(beta.T,x_data)))
return per1
def predic(x_data,beta):
y = 1/(1+np.exp(-np.dot(beta.T,x_data)))
if y < 0.5:
return False
elif y > 0.5:
return True
if __name__ == '__main__':
beta = Newton_method(X,y,1000,0.05)
print(beta)
#plt.scatter(x,y)
由于没有找到相关测试集,所以还没进行调优,路过的大佬,有相关测试集的,麻烦提供一下