深度学习小记01--深度神经网络模型的代码实现

- 一、神经网络的基本模型
- 二、代码实现

本系列博客用于记录学习深度学习的过程

一、神经网络的基本模型

在这里插入图片描述

$\left\{ \begin{matrix} x11& \cdots & xn1\\ x12& \cdots & xn2\\ x13& \cdots & xn3 \end{matrix} \right\} W1=\left\{ \begin{matrix} w111& w112 & w113\\ w121& w122 & w123\\ w131& w132 & w133\\ \end{matrix} \right\} B1=\left\{ \begin{matrix} b11\\ b12\\ b13\\ \end{matrix} \right\}$
$\left\{ \begin{matrix} A111& \cdots & A1n1\\ A112& \cdots & A1n2\\ A113& \cdots & A1n3 \end{matrix} \right\} W2=\left\{ \begin{matrix} w211& w212 & w213\\ w221& w222 & w223\\ w231& w232 & w233\\ \end{matrix} \right\} B2=\left\{ \begin{matrix} b21\\ b22\\ b23\\ \end{matrix} \right\}$
$\left\{ \begin{matrix} A211& \cdots & A2n1\\ A212& \cdots & A2n2\\ A213& \cdots & A2n3 \end{matrix} \right\} W3=\left\{ \begin{matrix} w311& w312 & w313\\ w321& w322 & w323\\ w331& w332 & w333\\ \end{matrix} \right\} B3=\left\{ \begin{matrix} b31\\ b32\\ b33\\ \end{matrix} \right\}$
$\left\{ \begin{matrix} A311& \cdots & A3n1\\ A312& \cdots & A3n2\\ A313& \cdots & A3n3\\ \end{matrix} \right\} W4=\left\{ \begin{matrix} w311& w312 & w313\\ w321& w322 & w323\\ \end{matrix} \right\} B4=\left\{ \begin{matrix} b31\\ b32\\ \end{matrix} \right\}$
输出
$YP=A4=激活函数（W4A3+B4）=\left\{ \begin{matrix} A411& \cdots & A4n1\\ A412& \cdots & A4n2\\ \end{matrix} \right\}=\left\{ \begin{matrix} YP11& \cdots & YPn1\\ YP12& \cdots & YPn2\\ \end{matrix} \right\}$

$\frac{1}{m}\sum_{i=1}^{n}Loss(YP,Y)$
$dYP=\frac{\partial J}{\partial YP},dZ4（Z4=W4A3+B4）=dYP*\frac{\partial 激励函数}{\partial Z4}，dW4 =dZ4*A3，dB4 = dZ4 ，dA3=dZ4*W4$

$dZ3（Z3=W3A2+B3）=dA3*\frac{\partial 激励函数}{\partial Z3}，dW3 =dZ3*A2，dB3 = dZ3 dA2=dZ3*W3$
$. . . . . . . . . .$
$B = B - α * d B, W = W - α * d W$

二、代码实现

import numpy as np
import random
class Neuron_Net:
    """
    神经网络的代码实现
    """

    def __init__(self):
        self.alfa = 0.01
        self.layer_shape = []
        self.W = []
        self.dW = []
        self.B = []
        self.dB = []
        self.Z = []
        self.dZ = []
        self.A = [0]
        self.dA = [0]
        self.activity_infos = []
        self.dropout = []
        self.loss_fuc = None
        self.debug = False
        self.print_accuracy = True
        self.accuracy_error = 0.5

    def addLayer(self, input_dim: int, output_dim: int, activity_info, dropout=1, W_init=1):
        """

        :param input_dim: 输入维度
        :param output_dim: 输出维度
        :param activity_info: 激励函数信息字典{main_func:(Z)->return f(Z),dA_dZ: (Z)->return dA_dZ}
        :param dropout: 设置正则化参数 1为不设定
        :param W_init: 设定W的初始参数值
        :return: None
        """
        # 初始化W
        self.W.append(np.random.randn(output_dim, input_dim) * np.sqrt(W_init / input_dim))
        self.B.append(np.zeros((output_dim, 1)))
        self.activity_infos.append(activity_info)
        self.layer_shape.append((input_dim, output_dim))
        self.dropout.append(dropout)
        # 在存储信息的列表中开创位置
        self.A.append(0)
        self.dA.append(0)
        self.dW.append(0)
        self.dB.append(0)
        self.Z.append(0)
        self.dZ.append(0)

    def compile(self, loss_fuc, alfa, debug=False, print_accuracy=True, accuracy_error=0.5):
        """

        :param loss_fuc: 损失函数 (An,Y,accuracy_error)->return dAn,Loss,accuracy
        :param alfa: 学习率
        :param debug: 是否打开debug模式 未实现
        :param print_accuracy: 是否打印准确度
        :param accuracy_error: 预测值误差范围
        """
        for index, item in enumerate(self.layer_shape):
            if not self.W[index].shape[1] == item[0]:
                raise Exception('W[', index, ']列维度错误！')
            if not self.B[index].shape[0] == item[1]:
                raise Exception('B[', index, ']行维度错误！')
            if index > 0 and not self.layer_shape[index - 1][1] == item[0]:
                raise Exception('第', index - 1, '层的输出不等于第', index, '层的输入')
        self.loss_fuc = loss_fuc
        self.debug = debug
        self.print_accuracy = print_accuracy
        self.accuracy_error = accuracy_error
        self.alfa = alfa

    def train(self, X, Y, epoch):
        """
        训练神经模型
        :param X: 训练集
        :param Y: 标签集
        :param epoch: 迭代次数
        """
        self.A[0] = X
        for i in range(epoch):
            print('第', i + 1, '次')
            # 前向传播
            for index, item in enumerate(self.layer_shape):
                Z = np.dot(self.W[index], self.A[index]) + self.B[index]
                self.A[index + 1] = self.activity_infos[index]['main_func'](Z)
                self.Z[index] = Z
                # Dropout正则化
                d = np.random.rand(self.A[index + 1].shape[0], self.A[index + 1].shape[1]) <= self.dropout[index]
                self.A[index + 1] = np.multiply(self.A[index + 1], d)
                self.A[index + 1] /= self.dropout[index]
            dAn, Loss, accuracy = self.loss_fuc(self.A[-1], Y, self.accuracy_error)
            # 反向传播
            # 反向传播参数的存放顺序相反 如dA=[dAn,dA(n-1),....,dA0]
            self.dA[0] = dAn  # dA为倒序
            for index in range(len(self.layer_shape)):
                # 倒序插入dZ
                self.dZ[index] = np.multiply(self.dA[index], self.activity_infos[index]['dA/dZ'](self.Z[-index - 1]))
                dW = np.dot(self.dZ[index], self.A[-index - 2].T)
                self.dW[index] = dW / self.A[-index - 2].shape[1]
                self.dB[index] = self.dZ[index].sum(axis=1, keepdims=True) / self.A[-index - 2].shape[1]
                self.dA[index + 1] = np.dot(self.W[-index - 1].T, self.dZ[index])
            self.update_W_B()
            if self.print_accuracy:
                print('accuracy', accuracy)
            print('Loss:', Loss)

    def update_W_B(self):
        # 更新W和B
        for index in range(len(self.W)):
            self.W[index] -= self.alfa * self.dW[-index - 1]
            self.B[index] -= self.alfa * self.dB[-index - 1]

    def predict(self, X):
        # 预测
        temp_arr = [X, 0]
        for index in range(len(self.layer_shape)):
            temp_arr[1] = np.dot(self.W[index], temp_arr[0]) + self.B[index]
            temp_arr[0] = self.activity_infos[index]['main_func'](temp_arr[1])
        return temp_arr[0]

激活函数与损失函数

def activity_func(a):
    # simgoid函数
    yp = 1 / (1 + np.exp(-1 * a))
    return yp


def dA_dZ(Z):
    # 返回simgoid函数的dA/dZ
    return np.multiply((1 / (1 + np.exp(-1 * Z))), (1 - (1 / (1 + np.exp(-1 * Z)))))


def loss_func(YP, Y, error):
    size = YP.shape[1]  # 得到YP的数量
    Y_arr = Y.tolist()
    YP_arr = YP.tolist()
    # 避免后续求dA时出现除0现象
    temp = np.where(YP == 1)
    for index, item in enumerate(temp[0]):
        YP[item, temp[1][index]] = 0.999999999999
    dA = -(np.true_divide(Y, YP) + np.true_divide((Y - 1), (1 - YP)))
    loss_part1 = np.dot(Y, np.log(YP).T)
    loss_part2 = np.dot(1 - Y, np.log(1 - YP).T)
    loss = -(loss_part1 + loss_part2)
    count = 0
    for index, item in enumerate(Y_arr[0]):
        if abs(item - YP_arr[0][index]) < error:
            count += 1
    accuracy = count / size
    return dA, float(loss) / size if not np.isnan(float(loss) / size) else 0, accuracy


def relu(z):
    return np.multiply(z, z > 0)


def rele_dA_dZ(z):
    return z > 0

测试语句


if __name__ == '__main__':
    X_list = []
    for i in range(200):
        if i < 100:
            temp1 = random.random() * 100
            temp2 = 100 + random.random() * 100
        else:
            temp1 = 100 + random.random() * 100
            temp2 = random.random() * 100
        X_list.append([temp1, temp2])
    Y_list = []
    for i in range(200):
        Y_list.append(1 if i < 100 else 0)
    W = np.array([[0, 0]]).astype('float64').reshape(1, 2)
    B = 0
    X = np.array(X_list).T
    Y = np.array(Y_list).reshape(1, 200)
    alfa = 0.01
    net = Neuron_Net()
    activity_info = {
    
    'main_func': activity_func, 'dA/dZ': dA_dZ}
    relu_info = {
    
    'main_func': relu, 'dA/dZ': rele_dA_dZ}
    net.addLayer(2, 3, activity_info)
    net.addLayer(3, 1, activity_info)
    net.compile(loss_func, alfa, accuracy_error=0.5)
    net.train(X, Y, 500)