运用感知机模型实现对鸢尾花分类
运用感知机实现对鸢尾花进行分类
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
plt.rcParams['font.sans-serif'] = ['SimHei'] # 步骤一(替换sans-serif字体)
plt.rcParams['axes.unicode_minus'] = False # 步骤二(解决坐标轴负数的负号显示问题)
df=pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) #读取数据
class Perceptron():
"""自定义感知机算法"""
def __init__(self,learning_rate=0.01,num_iter=50,random_state=1): #类的一个方法 该方法里面包含三个属性
self.learning_rate=learning_rate
self.num_iter=num_iter
self.random_state=random_state
def fit(self,x,y):
rgen=np.random.RandomState(self.random_state)
self.w=rgen.normal(loc=0.0,scale=0.01,size=1+x.shape[1]) #正态分布
self.errors=[]
for _ in range(self.num_iter):
errors=0
for x_i,target in zip(x,y):
update=self.learning_rate*(target-self.predict(x_i))
self.w[1:]+=update*x_i
self.w[0]+=update
errors+=int(update!=0.0)
self.errors.append(errors)
return self
def predict_input(self,x):
return np.dot(x,self.w[1:])+self.w[0] #wx+b
def predict(self,x):
return np.where(self.predict_input(x)>=0.0,1,-1) #sign(wx+b)>=0?1,-1
#取出前100行的第五列即生成标记向量
y=df.iloc[0:100,4].values
y=np.where(y=='Iris-versicolor',1,-1)
# 取出前100行的第一列和第三列的特征即生成特征向量
x=df.iloc[0:100,[0,2]].values
plt.scatter(x[:50,0],x[:50,1],color='r',s=50,marker='x',label='山鸢尾')
plt.scatter(x[50:100,0],x[50:100,1],color='b',s=50,marker='o',label='杂色鸢尾')
plt.xlabel('花瓣长度(cm)')
plt.ylabel('花瓣宽度(cm)')
plt.legend()
plt.show()
#构造决策边界
def plot_decesion_regions(x,y,classifier,resolution=0.02):
#构造颜色映射
marker_list=['o','x','s']
color_list=['r','b','g']
cmap=ListedColormap(color_list[:len(np.unique(y))])
# 构造网格采样点并使用算法训练阵列中每个元素
x1_min,x1_max=x[:,0].min()-1,x[:,0].max()+1 #第0列的范围
x2_min,x2_max=x[:,1].min()-1,x[:,1].max()+1 #第1列的范围
t1=np.linspace(x1_min,x1_max,666)
t2=np.linspace(x2_min,x2_max,666)
x1,x2=np.meshgrid(t1,t2)
y_hat=classifier.predict(np.stack((x1.flat,x2.flat),axis=1)) #预测值
y_hat=y_hat.reshape(x1.shape) # 使之与输入的形状相同
# 通过网格采样点画出等高线图
plt.contourf(x1,x2,y_hat,alpha=0.2,cmap=cmap)
plt.xlim(x1.min(),x1.max())
plt.ylim(x2.min(),x2.max())
for idx, classes in enumerate(np.unique(y)):
plt.scatter(x[y==classes,0],x[y==classes,1],alpha=0.8,s=50,c=color_list[idx],marker=marker_list[idx],label=classes)
perceptron=Perceptron(learning_rate=0.1,num_iter=10)
perceptron.fit(x,y)
plt.plot(range(1,len(perceptron.errors)+1),perceptron.errors,marker='o')
plt.xlabel('迭代次数')
plt.ylabel('更新次数')
plt.show()
plot_decesion_regions(x,y,classifier=perceptron)
plt.xlabel('花瓣长度(cm)')
plt.ylabel('花瓣宽度(cm)')
plt.legend()
plt.show()
# 感知机对偶形式(鸢尾花分类)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
plt.rcParams['font.sans-serif'] = ['SimHei'] # 步骤一(替换sans-serif字体)
plt.rcParams['axes.unicode_minus'] = False # 步骤二(解决坐标轴负数的负号显示问题)
def get_data():
df=pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)
x=df.iloc[0:100,[0,2]].values
train_data_p=df.iloc[0:50,[0,2,4]].values
train_data_n=df.iloc[50:100,[0,2,4]].values
train_data_p[:,[2]],train_data_n[:,[2]]=-1,1
train_data=train_data_p.tolist()+train_data_n.tolist() #将数组或者矩阵转换为列表
print('train_data',train_data)
return train_data,x
def train(num_iter,train_data,learning_rate):
w=0.0
b=0.0
data_length=len(train_data)
print(train_data) #100
alpha=[0 for _ in range(data_length)] #[0 0 0 0 ...0]
train_data=np.array(train_data) #转化为数组
gram=np.matmul(train_data[:,0:-1],train_data[:,0:-1].T) #100x100
for i in range(num_iter):
count=0
i=random.randint(0,data_length-1)
yi=train_data[i,-1]
for j in range(data_length):
count+=alpha[j]*train_data[j,-1]*gram[i,j]
count+=b
if (yi*count<=0):
alpha[i]=alpha[i]+learning_rate
b=b+learning_rate*yi
for i in range(data_length):
w+=alpha[i]*train_data[i,0:-1]*train_data[i,-1]
return w,b,alpha,gram
def plot_points(w,b,x):
plt.figure()
x1=np.linspace(4,7,100)
x2=(-b -w[0]*x1)/(w[1]+1e-10)
plt.plot(x1,x2,color='k')
plt.scatter(x[:50,0],x[:50,1],color='r',s=50,marker='o',label='山鸢尾')
plt.scatter(x[50:100, 0], x[50:100, 1], color='b',s=50, marker='x', label='变色鸢尾')
plt.xlabel('萼片长度(cm)')
plt.ylabel('花瓣长度(cm)')
plt.legend()
plt.show()
train_data,x=get_data()
w,b,alpha,gram=train(num_iter=1000,train_data=train_data,learning_rate=0.1)
plot_points(w,b,x)