代码参考了零基础入门深度学习(4) - 卷积神经网络这篇文章,我只对代码里可能存在的一些小错误进行了更改。至于卷积神经网络的原理以及代码里不清楚的地方可以结合该文章理解,十分浅显易懂。
import numpy as np
from functools import reduce
from DL.cnn import ReluActivator,IdentityActivator,element_wise_op
class RecurrentLayer():
def __init__(self,input_width,state_width,activator,learning_rate):
self.input_width = input_width
self.state_width = state_width
self.activator = activator
self.learning_rate = learning_rate
self.times = 0 # 当前时刻初始化为0
self.state_list = [] # 保存各个时刻的state
self.state_list.append(np.zeros(state_width,1)) # 初始化s0
self.U = np.random.uniform(-1e-4,1e-4,(state_width,input_width)) #初始化U
self.W = np.random.uniform(-1e-4,1e-4,(state_width,state_width)) #初始化W
def forward(self,input_array):
'''
根据式2进行前向计算
'''
self.times += 1
state = (np.dot(self.U,input_array)+np.dot(self.W,self.state_list[-1]))
element_wise_op(state,self.activator.forward)
self.state_list.append(state)
def backward(self,sensitivity_array,activator):
'''
实现BPTT算法
'''
self.calc_delta(sensitivity_array,activator)
self.calc_gradient()
def calc_delta(self, sensitivity_array, activator):
# 用来保存各个时刻的误差项
self.delta_list = []
for i in range(self.times):
self.delta_list.append(np.zeros(self.state_width,1))
self.delta_list.append(sensitivity_array)
# 迭代计算每个时刻的误差项
for k in range(self.times-1,0,-1):
self.calc_delta_k(k,activator)
def calc_delta_k(self, k, activator):
'''
根据t+1时刻的delta计算k时刻的delta
f’(neti)怎么成了f(neti)
'''
state = self.state_list[k+1].copy()
element_wise_op(self.state_list[k+1],activator.backward)
self.delta_list[k] = np.dot(np.dot(self.delta_list[k+1].T,self.W),np.diag(state[:,0])).T
def calc_gradient(self):
# 保存各个时刻的权重梯度
self.gradient_list = []
for t in range(self.times+1):
self.gradient_list.append(np.zeros((self.state_width,self.state_width)))
for t in range(self.times,0,-1):
self.calc_gradient_t(t)
# 实际的梯度是各个时刻的梯度之和
# [0]被初始化为0且没有被修改过
self.gradient = reduce(lambda a,b:a+b,self.gradient_list,self.gradient_list[0])
def calc_gradient_t(self, t):
'''
计算每个时刻t权重的梯度
'''
gradient = np.dot(self.delta_list[t],self.state_list[t-1].T)
self.gradient_list[t] = gradient
def update(self):
'''
按照梯度下降,更新权重
'''
self.W -= self.learning_rate*self.gradient
# 上面的代码不包含权重U的更新。这部分实际上和全连接神经网络是一样的.
# 循环层是一个带状态的层,每次forword都会改变循环层的内部状态,这给梯度检查带来了麻烦。
# 因此,我们需要一个reset_state方法,来重置循环层的内部状态。
def reset_state(self):
self.times = 0
self.state_list = []
self.state_list.append((np.zeros((self.state_width,1))))
def data_set():
pass
def gradient_check():
'''
梯度检查
'''
# 设计一个误差函数,取所有节点输出项之和
error_function = lambda o:o.sum()
rl = RecurrentLayer(3,2,IdentityActivator(),1e-3)
# 计算forward值
x,d = data_set()
rl.forward(x[0])
rl.forward(x[1])
# 求取sensitivity map
sensitivity_array = np.ones(rl.state_list[-1].shape,dtype=np.float64)
# 计算梯度
rl.backward(sensitivity_array,IdentityActivator())
# 检查梯度
epsilon = 1e-4
for i in range(rl.W.shape[0]):
for j in range(rl.W.shape[1]):
rl.W[i][j] += epsilon
rl.reset_state()
rl.forward(x[0])
rl.forward(x[1])
err1 = error_function(rl.state_list[-1])
rl.W[i][j] -= 2*epsilon
rl.reset_state()
rl.forward(x[0])
rl.forward(x[1])
err2 = error_function(rl.state_list[-1])
expected_grad = (err1-err2)/(2*epsilon)
rl.W[i][j] += epsilon
print('weights(%d%d):expected-actural%f-%f'%(i,j,expected_grad,rl.gradient[i][j]))