Andrew NG机器学习练习四 Neural Networks Learning PYTHON实现

参照大佬，完成的代码。做一下记录

一、 Neural Networks Learning

checkNNGradients.py
computeNumericalGradient.py
debugInitializeWeights.py 前三个都是用来使用梯度下降测试优化算法的结果是否正确
displayData.py 可视化数据
ML_Exe_04.py 主函数，实现其他函数的统筹调度
predict.py 预测函数，主要算法实现
sigmoid.py s型函数及其导数计算

checkNNGradients.py

from debugInitializeWeights import *
from computeNumericalGradient import *

'''
梯度检测函数,使用梯度下降检测得到的神经网络参数解
是否正确
直接提供
'''
def checkNNGradients(lamb):
    #设置测试参数
    input_layer_size = 3;
    hidden_layer_size = 5;
    num_labels = 3;
    lamb = 1
    m = 5;
    sizeList = {'theta1_x': hidden_layer_size,
                'theta1_y': input_layer_size + 1,
                'theta2_x': num_labels,
                'theta2_y': hidden_layer_size + 1}  # 保存θ大小的参数
    theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    theta = np.r_[theta1.flatten(), theta2.flatten()]
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.random.randint(0, num_labels, (m, 1))
    # 对y进行改写，改为 m*num_labels 规格的矩阵
    Y = np.zeros((m, num_labels))
    for i in range(0, m):
        Y[i, y[i, 0]] = 1
    grad = nnGradient(theta, X, Y, lamb, sizeList)
    numGrad = computeNumericalGradient(theta, X, Y, lamb, sizeList)
    diff = np.linalg.norm(numGrad - grad) / np.linalg.norm(numGrad + grad)
    print('check NN Gradient: diff = ', diff)

computeNumericalGradient.py

from predict import *
import numpy as np

#数值方法计算梯度，直接提供
def computeNumericalGradient(theta, X, Y ,lamb, sizeList):
    numgrad = np.zeros(np.size(theta))
    perturb = np.zeros(np.size(theta))
    e = 1e-4
    for p in range(0, np.size(theta)):
        perturb[p] = e
        theta_minus = theta - perturb
        theta_plus = theta + perturb
        loss1 = nnCostFunction(theta_minus, X, Y, lamb, sizeList)
        loss2 = nnCostFunction(theta_plus, X, Y, lamb, sizeList)
        numgrad[p] = (loss2 - loss1) / (2 * e)
        perturb[p] = 0
    return numgrad

debugInitializeWeights.py


import numpy as np

#测试参数的初始化
def debugInitializeWeights(L_out, L_in):
    W = np.arange(1, L_out * (L_in + 1)+1)
    W = np.sin(W)
    W = np.array(W).reshape(L_out, (L_in + 1)) / 10;
    return W

displayData.py

import numpy as np
import matplotlib.pyplot as plt


#显示图片数据
def displayData(X):
    m = np.size(X, 0)  #X的行数，即样本数量
    n = np.size(X, 1)  #X的列数，即单个样本大小
    example_width = int(np.round(np.sqrt(n)))  #单张图片宽度
    example_height = int(np.floor(n / example_width))  #单张图片高度
    display_rows = int(np.floor(np.sqrt(m)))  #显示图中，一行多少张图
    display_cols = int(np.ceil(m / display_rows))  #显示图中，一列多少张图片
    pad = 1  #图片间的间隔
    display_array = - np.ones((pad + display_rows * (example_height + pad),
                            pad + display_cols * (example_width + pad)))  #初始化图片矩阵
    curr_ex = 0  #当前的图片计数
    #将每张小图插入图片数组中
    for j in range(0, display_rows):
        for i in range(0, display_cols):
            if curr_ex >= m:
                break
            max_val = np.max(abs(X[curr_ex, :]))
            jstart = pad + j * (example_height + pad)
            istart = pad + i * (example_width + pad)
            display_array[jstart: (jstart + example_height), istart: (istart + example_width)] = \
                np.array(X[curr_ex, :]).reshape(example_height, example_width) / max_val
            curr_ex = curr_ex + 1
        if curr_ex >= m:
            break
    display_array = display_array.T
    plt.imshow(display_array,cmap=plt.cm.gray)
    plt.axis('off')
    plt.show()

ML_Exe_04.py

import numpy as np
import scipy.optimize as op
import scipy.io as sio
from displayData import *
from predict import *
from checkNNGradients import *



#加载训练集
data = sio.loadmat('ex4data1.mat')
X = data['X']
y = data['y']
#标签数量
label_size = 10
#样本数量
m = y.shape[0]
print(y.shape)
#对y进行改写，改成5000*10规格的矩阵，0-9位置分别表示1，2，...,9,0
Y = np.zeros((m,label_size))
for i in range(0,m):
    Y[i,y[i,0]-1]=1
#随机选择数据集可视化
rand = np.random.randint(0,m,100)
sx = X[rand,...]
displayData(sx)

#初始化数据,隐藏层和输出层theta的行数和列数
sizeList={'theta1_x':25,
          'theta1_y':401,
          'theta2_x':10,
          'theta2_y':26};
lamb = 1
nn_params = randInitializeWeights(sizeList)
#训练模型
res = op.minimize(fun=nnCostFunction,
                  x0=nn_params,
                  args=(X,Y,lamb,sizeList),
                  method='TNC',
                  jac=nnGradient,
                  options={'maxiter':100})
print(res)

# 梯度检测
checkNNGradients(lamb)

#计算准确率
all_theta = changeForm(res.x, sizeList['theta1_x'], sizeList['theta1_y'],
                       sizeList['theta2_x'], sizeList['theta2_y'])
res_theta1 = all_theta['Theta1']
res_theta2 = all_theta['Theta2']
pred = predict(res_theta1, res_theta2, X)
acc = np.mean(pred == y.flatten())*100
print('Accuracy:',acc,'%')

#显示中间隐藏层
displayData(res_theta1[:, 1:])

predict.py

import numpy as np
from sigmoid import *

#随机初始化theta,在[-epsilon,epsilon]
def randInitializeWeights(sizeList):
    epsilon_init = 0.12
    theta1_x = sizeList['theta1_x']
    theta1_y = sizeList['theta1_y']
    theta2_x = sizeList['theta2_x']
    theta2_y = sizeList['theta2_y']
    theta_number = theta1_x*theta1_y+theta2_x*theta2_y
    #在-epsilon和epsilon中间随机抽样theta_number，生成ndarray
    W = np.random.uniform(-epsilon_init,epsilon_init,theta_number)
    return W

#将初始化时的向量，改写成矩阵型theta
def changeForm(theta_vector,theta1_x,theta1_y,theta2_x,theta2_y):
    theta1 = np.array(theta_vector[0:theta1_x*theta1_y]).reshape(theta1_x,theta1_y)
    theta2 = np.array(theta_vector[theta1_x*theta1_y:theta1_x*theta1_y+theta2_x*theta2_y]).reshape(theta2_x,theta2_y)
    return {'Theta1':theta1,'Theta2':theta2}

#计算各层的unit值
def nnCostFunction(nn_params,X,Y,lamb,sizeList):
    theta = changeForm(nn_params,sizeList['theta1_x'], sizeList['theta1_y'],
                       sizeList['theta2_x'], sizeList['theta2_y'])
    theta1 = theta['Theta1']
    theta2 = theta['Theta2']
    #样本数量
    m = Y.shape[0]
    #计算h(theta),需要走一遍神经网络
    #第二层计算,插入偏差单元
    a1 = np.insert(X,0,values = np.ones(m),axis=1)
    a2 = sigmoid(np.dot(a1,theta1.T))
    #第三层计算，插入偏差单元
    a2 = np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
    a3 = sigmoid(np.dot(a2,theta2.T))

    #代价函数
    J = 1 / m * np.sum(-np.multiply(Y, np.log(a3)) - np.multiply((1 - Y), np.log(1 - a3)))
    #规格化theta0不参加
    theta1_copy = theta1[:, 1:]
    theta2_copy = theta2[:, 1:]
    #规格化
    J = J + lamb / (2 * m) * (np.sum(theta1_copy ** 2) + np.sum(theta2_copy ** 2))
    return J

def nnGradient(nn_params,X,Y,lamb,sizeList):
    theta = changeForm(nn_params,sizeList['theta1_x'], sizeList['theta1_y'],sizeList['theta2_x'], sizeList['theta2_y'])
    theta1 = theta['Theta1']
    theta2 = theta['Theta2']
    #样本数量
    m = Y.shape[0]
    #计算h(theta),需要走一遍神经网络
    #第二层计算,插入偏差单元
    a1 = np.insert(X,0,values = np.ones(m),axis=1)
    a2 = sigmoid(np.dot(a1,theta1.T))
    #第三层计算，插入偏差单元
    a2 = np.insert(a2,0,values=np.ones(a2.shape[0]),axis=1)
    a3 = sigmoid(np.dot(a2,theta2.T))
    #规格化theta0不参加
    theta1_copy = theta1[:, 1:]
    theta2_copy = theta2[:, 1:]
    #TODO 反向传播
    #计算δ
    delta3 = a3 - Y
    delta2 = np.multiply(np.dot(delta3, theta2_copy), sigmoidGradient(np.dot(a1, theta1.T)))
    #计算Δ
    bigDeilta1 = np.dot(delta2.T, a1)
    bigDeilta2 = np.dot(delta3.T, a2)
    #计算D
    theta1_grad = bigDeilta1 / m + lamb / m * theta1
    theta2_grad = bigDeilta2 / m + lamb / m * theta2
    theta1_grad[:, 0] = bigDeilta1[:, 0] / m
    theta2_grad[:, 0] = bigDeilta2[:, 0] / m
    #当使用高级优化方法来优化神经网络时，需要将多个参数矩阵展开，才能传入优化函数
    grad = np.r_[theta1_grad.flatten(), theta2_grad.flatten()]
    return grad

#使用模型进行预测
#预测
def predict(theta1,theta2,X):
    #样本数量
    m = X.shape[0];

    #第二层计算,插入一列X0
    X = np.insert(X,0,np.ones(m),axis=1)
    #计算隐藏层unit，sigmoid作为激活函数
    a2 = sigmoid(np.dot(X,theta1.T))

    #第三层计算，插入偏移单元
    a2 = np.insert(a2,0,np.ones(a2.shape[0]),axis=1)
    #计算输出层unit，sigmoid作为激活函数
    a3 = sigmoid(np.dot(a2,theta2.T))
    # 0~9====1~10
    p = a3.argmax(axis=1) +1
    #矩阵展开成向量
    return p.flatten()

sigmoid.py

import numpy as np

#计算激励函数
def sigmoid(z):
    return 1/(1+np.exp(-z))


#计算激励函数(S型函数)的导数
def sigmoidGradient(z):
    return np.multiply(sigmoid(z),1-sigmoid(z))

头秃的女程序员

发布了141 篇原创文章 · 获赞 65 · 访问量 4万+

私信关注

Andrew NG机器学习 练习四 Neural Networks Learning PYTHON实现

一、 Neural Networks Learning

猜你喜欢

Andrew NG机器学习练习四 Neural Networks Learning PYTHON实现