技术要点:
- 计算图
- 激活函数
- 梯度下降法
- 链式求导法则
- 张量求导
原理推到
代码展示
import numpy as np
import pickle
def sigmoid(z):
return 1/(1+np.exp(-z))
def sigmoid_derivative(x):
return sigmoid(x)*(1-sigmoid(x))
def dmp(a,tag=''):
if(tag):
tag+='---'
print(tag,sep='',end='')
print('shape:',a.shape,'array:',a)
def forward_calculate(g, x):
if(len(g)==0):
return
for i,layer in enumerate(g):
if(0==i):
layer=g[0]
layer['a']=x
else:
layer['z']=layer['w'].dot(g[i-1]['a'])+layer['b']
layer['a']=sigmoid(layer['z'])
return g[-1]['a']
def backward_calculate(g, label):
for i in reversed(range(len(g))):
layer=g[i]
if(0==i):
pass
continue
if i!=len(g)-1:
pd_a2z=sigmoid_derivative(layer['z'])
g[i-1]['a_b']=np.dot((layer['a_b']*pd_a2z),layer['w'])
else:
pd_a2a=-(label-layer['a'])
layer['a_b']=pd_a2a
pd_a2z=sigmoid_derivative(layer['z'])
g[i-1]['a_b']=np.dot((layer['a_b']*pd_a2z),layer['w'])
def update_weight(g):
for i,layer in enumerate(g):
if(0==i):
pass
else:
pd_a2z=sigmoid_derivative(layer['z'])
pd_z2w=(g[i-1]['a']).T
pd_l2w=np.outer(layer['a_b']*pd_a2z,pd_z2w)
layer['w']=-(0.2)*pd_l2w+layer['w']
pd_l2b=layer['a_b']=pd_a2z
layer['b']=-(0.2)*pd_l2b+layer['b']
def predict(g,x):
output=forward_calculate(g,x)
if(output>0.5):
return 1
else:
return 0
def accuracy(g,validation_set):
right_count=0
for i,di in enumerate(validation_set):
if(predict(g,di[0:-1])==di[-1]):
right_count+=1
return right_count*1.0/len(validation_set)
def train_model():
train_set=[[0,0,0],[1,1,0],[1,0,1],[0,1,1]]
validation_set=train_set
epoch=10000
g=init_graph()
for i in range(epoch):
acc=accuracy(g,validation_set)
print('epoch:',i,'accuracy:',acc)
if(acc>0.99999):
print(g)
with open('model-xor.dat','wb') as f:
pickle.dump(g,f)
break
for i,di in enumerate(train_set):
forward_calculate(g,np.array(di[0:-1])) # 正向计算
backward_calculate(g,np.array(di[-1])) # 反向传播
update_weight(g) # 更新权重
def init_graph():
graph=[]
input_layer={}
input_layer['a']=np.zeros((2,1)) # 输入层的输出
graph.append(input_layer)
hidder_layer={}
hidder_layer['w']=np.random.random((2,2)) # 隐藏层的权重
hidder_layer['b']=np.random.random((2,)) # 隐藏层的偏执
hidder_layer['a']=np.zeros((2,)) # 隐藏层的输出
graph.append(hidder_layer)
output_layer={}
output_layer['w']=np.random.random((1,2)) # 输出层的权重
output_layer['b']=np.random.random((1,1)) # 输出层的偏执
output_layer['a']=np.zeros((1,1)) # 输出层的输出
graph.append(output_layer)
return graph
def test_model():
with open('model-xor.dat','rb') as f:
g=pickle.load(f)
test_set=[[0,0,0],[1,1,0],[1,0,1],[0,1,1]]
print('test accuracy:',accuracy(g,test_set))
if __name__ == '__main__':
train_model()
test_model()