参照大佬,完成的代码。做一下记录
一、 Neural Networks Learning
- checkNNGradients.py
- computeNumericalGradient.py
- debugInitializeWeights.py 前三个都是用来使用梯度下降测试优化算法的结果是否正确
- displayData.py 可视化数据
- ML_Exe_04.py 主函数,实现其他函数的统筹调度
- predict.py 预测函数,主要算法实现
- sigmoid.py s型函数及其导数计算
checkNNGradients.py
from debugInitializeWeights import *
from computeNumericalGradient import *
'''
梯度检测函数,使用梯度下降检测得到的神经网络参数解
是否正确
直接提供
'''
def checkNNGradients(lamb):
#设置测试参数
input_layer_size = 3;
hidden_layer_size = 5;
num_labels = 3;
lamb = 1
m = 5;
sizeList = {'theta1_x': hidden_layer_size,
'theta1_y': input_layer_size + 1,
'theta2_x': num_labels,
'theta2_y': hidden_layer_size + 1} # 保存θ大小的参数
theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
theta = np.r_[theta1.flatten(), theta2.flatten()]
X = debugInitializeWeights(m, input_layer_size - 1)
y = np.random.randint(0, num_labels, (m, 1))
# 对y进行改写,改为 m*num_labels 规格的矩阵
Y = np.zeros((m, num_labels))
for i in range(0, m):
Y[i, y[i, 0]] = 1
grad = nnGradient(theta, X, Y, lamb, sizeList)
numGrad = computeNumericalGradient(theta, X, Y, lamb, sizeList)
diff = np.linalg.norm(numGrad - grad) / np.linalg.norm(numGrad + grad)
print('check NN Gradient: diff = ', diff)
computeNumericalGradient.py
from predict import *
import numpy as np
#数值方法计算梯度,直接提供
def computeNumericalGradient(theta, X, Y ,lamb, sizeList):
numgrad = np.zeros(np.size(theta))
perturb = np.zeros(np.size(theta))
e = 1e-4
for p in range(0, np.size(theta)):
perturb[p] = e
theta_minus = theta - perturb
theta_plus = theta + perturb
loss1 = nnCostFunction(theta_minus, X, Y, lamb, sizeList)
loss2 = nnCostFunction(theta_plus, X, Y, lamb, sizeList)
numgrad[p] = (loss2 - loss1) / (2 * e)
perturb[p] = 0
return numgrad
debugInitializeWeights.py
import numpy as np
#测试参数的初始化
def debugInitializeWeights(L_out, L_in):
W = np.arange(1, L_out * (L_in + 1)+1)
W = np.sin(W)
W = np.array(W).reshape(L_out, (L_in + 1)) / 10;
return W
displayData.py
import numpy as np
import matplotlib.pyplot as plt
#显示图片数据
def displayData(X):
m = np.size(X, 0) #X的行数,即样本数量
n = np.size(X, 1) #X的列数,即单个样本大小
example_width = int(np.round(np.sqrt(n))) #单张图片宽度
example_height = int(np.floor(n / example_width)) #单张图片高度
display_rows = int(np.floor(np.sqrt(m))) #显示图中,一行多少张图
display_cols = int(np.ceil(m / display_rows)) #显示图中,一列多少张图片
pad = 1 #图片间的间隔
display_array = - np.ones((pad + display_rows * (example_height + pad),
pad + display_cols * (example_width + pad))) #初始化图片矩阵
curr_ex = 0 #当前的图片计数
#将每张小图插入图片数组中
for j in range(0, display_rows):
for i in range(0, display_cols):
if curr_ex >= m:
break
max_val = np.max(abs(X[curr_ex, :]))
jstart = pad + j * (example_height + pad)
istart = pad + i * (example_width + pad)
display_array[jstart: (jstart + example_height), istart: (istart + example_width)] = \
np.array(X[curr_ex, :]).reshape(example_height, example_width) / max_val
curr_ex = curr_ex + 1
if curr_ex >= m:
break
display_array = display_array.T
plt.imshow(display_array,cmap=plt.cm.gray)
plt.axis('off')
plt.show()
ML_Exe_04.py
import numpy as np
import scipy.optimize as op
import scipy.io as sio
from displayData import *
from predict import *
from checkNNGradients import *
#加载训练集
data = sio.loadmat('ex4data1.mat')
X = data['X']
y = data['y']
#标签数量
label_size = 10
#样本数量
m = y.shape[0]
print(y.shape)
#对y进行改写,改成5000*10规格的矩阵,0-9位置分别表示1,2,...,9,0
Y = np.zeros((m,label_size))
for i in range(0,m):
Y[i,y[i,0]-1]=1
#随机选择数据集可视化
rand = np.random.randint(0,m,100)
sx = X[rand,...]
displayData(sx)
#初始化数据,隐藏层和输出层theta的行数和列数
sizeList={'theta1_x':25,
'theta1_y':401,
'theta2_x':10,
'theta2_y':26};
lamb = 1
nn_params = randInitializeWeights(sizeList)
#训练模型
res = op.minimize(fun=nnCostFunction,
x0=nn_params,
args=(X,Y,lamb,sizeList),
method='TNC',
jac=nnGradient,
options={'maxiter':100})
print(res)
# 梯度检测
checkNNGradients(lamb)
#计算准确率
all_theta = changeForm(res.x, sizeList['theta1_x'], sizeList['theta1_y'],
sizeList['theta2_x'], sizeList['theta2_y'])
res_theta1 = all_theta['Theta1']
res_theta2 = all_theta['Theta2']
pred = predict(res_theta1, res_theta2, X)
acc = np.mean(pred == y.flatten())*100
print('Accuracy:',acc,'%')
#显示中间隐藏层
displayData(res_theta1[:, 1:])
predict.py
import numpy as np
from sigmoid import *
#随机初始化theta,在[-epsilon,epsilon]
def randInitializeWeights(sizeList):
epsilon_init = 0.12
theta1_x = sizeList['theta1_x']
theta1_y = sizeList['theta1_y']
theta2_x = sizeList['theta2_x']
theta2_y = sizeList['theta2_y']
theta_number = theta1_x*theta1_y+theta2_x*theta2_y
#在-epsilon和epsilon中间随机抽样theta_number,生成ndarray
W = np.random.uniform(-epsilon_init,epsilon_init,theta_number)
return W
#将初始化时的向量,改写成矩阵型theta
def changeForm(theta_vector,theta1_x,theta1_y,theta2_x,theta2_y):
theta1 = np.array(theta_vector[0:theta1_x*theta1_y]).reshape(theta1_x,theta1_y)
theta2 = np.array(theta_vector[theta1_x*theta1_y:theta1_x*theta1_y+theta2_x*theta2_y]).reshape(theta2_x,theta2_y)
return {'Theta1':theta1,'Theta2':theta2}
#计算各层的unit值
def nnCostFunction(nn_params,X,Y,lamb,sizeList):
theta = changeForm(nn_params,sizeList['theta1_x'], sizeList['theta1_y'],
sizeList['theta2_x'], sizeList['theta2_y'])
theta1 = theta['Theta1']
theta2 = theta['Theta2']
#样本数量
m = Y.shape[0]
#计算h(theta),需要走一遍神经网络
#第二层计算,插入偏差单元
a1 = np.insert(X,0,values = np.ones(m),axis=1)
a2 = sigmoid(np.dot(a1,theta1.T))
#第三层计算,插入偏差单元
a2 = np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
a3 = sigmoid(np.dot(a2,theta2.T))
#代价函数
J = 1 / m * np.sum(-np.multiply(Y, np.log(a3)) - np.multiply((1 - Y), np.log(1 - a3)))
#规格化theta0不参加
theta1_copy = theta1[:, 1:]
theta2_copy = theta2[:, 1:]
#规格化
J = J + lamb / (2 * m) * (np.sum(theta1_copy ** 2) + np.sum(theta2_copy ** 2))
return J
def nnGradient(nn_params,X,Y,lamb,sizeList):
theta = changeForm(nn_params,sizeList['theta1_x'], sizeList['theta1_y'],sizeList['theta2_x'], sizeList['theta2_y'])
theta1 = theta['Theta1']
theta2 = theta['Theta2']
#样本数量
m = Y.shape[0]
#计算h(theta),需要走一遍神经网络
#第二层计算,插入偏差单元
a1 = np.insert(X,0,values = np.ones(m),axis=1)
a2 = sigmoid(np.dot(a1,theta1.T))
#第三层计算,插入偏差单元
a2 = np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
a3 = sigmoid(np.dot(a2,theta2.T))
#规格化theta0不参加
theta1_copy = theta1[:, 1:]
theta2_copy = theta2[:, 1:]
#TODO 反向传播
#计算δ
delta3 = a3 - Y
delta2 = np.multiply(np.dot(delta3, theta2_copy), sigmoidGradient(np.dot(a1, theta1.T)))
#计算Δ
bigDeilta1 = np.dot(delta2.T, a1)
bigDeilta2 = np.dot(delta3.T, a2)
#计算D
theta1_grad = bigDeilta1 / m + lamb / m * theta1
theta2_grad = bigDeilta2 / m + lamb / m * theta2
theta1_grad[:, 0] = bigDeilta1[:, 0] / m
theta2_grad[:, 0] = bigDeilta2[:, 0] / m
#当使用高级优化方法来优化神经网络时,需要将多个参数矩阵展开,才能传入优化函数
grad = np.r_[theta1_grad.flatten(), theta2_grad.flatten()]
return grad
#使用模型进行预测
#预测
def predict(theta1,theta2,X):
#样本数量
m = X.shape[0];
#第二层计算,插入一列X0
X = np.insert(X,0,np.ones(m),axis=1)
#计算隐藏层unit,sigmoid作为激活函数
a2 = sigmoid(np.dot(X,theta1.T))
#第三层计算,插入偏移单元
a2 = np.insert(a2,0,np.ones(a2.shape[0]),axis=1)
#计算输出层unit,sigmoid作为激活函数
a3 = sigmoid(np.dot(a2,theta2.T))
# 0~9====1~10
p = a3.argmax(axis=1) +1
#矩阵展开成向量
return p.flatten()
sigmoid.py
import numpy as np
#计算激励函数
def sigmoid(z):
return 1/(1+np.exp(-z))
#计算激励函数(S型函数)的导数
def sigmoidGradient(z):
return np.multiply(sigmoid(z),1-sigmoid(z))