深度学习小记01--深度神经网络模型的代码实现

深度学习小记01--深度神经网络模型的代码实现


本系列博客用于记录学习深度学习的过程

一、神经网络的基本模型

在这里插入图片描述

A 0 = X = { x 11 ⋯ x n 1 x 12 ⋯ x n 2 x 13 ⋯ x n 3 } W 1 = { w 111 w 112 w 113 w 121 w 122 w 123 w 131 w 132 w 133 } B 1 = { b 11 b 12 b 13 } A0=X= \left\{ \begin{matrix} x11& \cdots & xn1\\ x12& \cdots & xn2\\ x13& \cdots & xn3 \end{matrix} \right\} W1=\left\{ \begin{matrix} w111& w112 & w113\\ w121& w122 & w123\\ w131& w132 & w133\\ \end{matrix} \right\} B1=\left\{ \begin{matrix} b11\\ b12\\ b13\\ \end{matrix} \right\} A0=X=x11x12x13xn1xn2xn3W1=w111w121w131w112w122w132w113w123w133B1=b11b12b13
A 1 = 激 活 函 数 ( W 1 X + B 1 ) = { A 111 ⋯ A 1 n 1 A 112 ⋯ A 1 n 2 A 113 ⋯ A 1 n 3 } W 2 = { w 211 w 212 w 213 w 221 w 222 w 223 w 231 w 232 w 233 } B 2 = { b 21 b 22 b 23 } A1=激活函数(W1X+B1)= \left\{ \begin{matrix} A111& \cdots & A1n1\\ A112& \cdots & A1n2\\ A113& \cdots & A1n3 \end{matrix} \right\} W2=\left\{ \begin{matrix} w211& w212 & w213\\ w221& w222 & w223\\ w231& w232 & w233\\ \end{matrix} \right\} B2=\left\{ \begin{matrix} b21\\ b22\\ b23\\ \end{matrix} \right\} A1=(W1X+B1)=A111A112A113A1n1A1n2A1n3W2=w211w221w231w212w222w232w213w223w233B2=b21b22b23
A 2 = 激 活 函 数 ( W 2 A 1 + B 2 ) = { A 211 ⋯ A 2 n 1 A 212 ⋯ A 2 n 2 A 213 ⋯ A 2 n 3 } W 3 = { w 311 w 312 w 313 w 321 w 322 w 323 w 331 w 332 w 333 } B 3 = { b 31 b 32 b 33 } A2=激活函数(W2A1+B2)= \left\{ \begin{matrix} A211& \cdots & A2n1\\ A212& \cdots & A2n2\\ A213& \cdots & A2n3 \end{matrix} \right\} W3=\left\{ \begin{matrix} w311& w312 & w313\\ w321& w322 & w323\\ w331& w332 & w333\\ \end{matrix} \right\} B3=\left\{ \begin{matrix} b31\\ b32\\ b33\\ \end{matrix} \right\} A2=(W2A1+B2)=A211A212A213A2n1A2n2A2n3W3=w311w321w331w312w322w332w313w323w333B3=b31b32b33
A 3 = 激 活 函 数 ( W 3 A 2 + B 3 ) = { A 311 ⋯ A 3 n 1 A 312 ⋯ A 3 n 2 A 313 ⋯ A 3 n 3 } W 4 = { w 311 w 312 w 313 w 321 w 322 w 323 } B 4 = { b 31 b 32 } A3=激活函数(W3A2+B3)= \left\{ \begin{matrix} A311& \cdots & A3n1\\ A312& \cdots & A3n2\\ A313& \cdots & A3n3\\ \end{matrix} \right\} W4=\left\{ \begin{matrix} w311& w312 & w313\\ w321& w322 & w323\\ \end{matrix} \right\} B4=\left\{ \begin{matrix} b31\\ b32\\ \end{matrix} \right\} A3=(W3A2+B3)=A311A312A313A3n1A3n2A3n3W4={ w311w321w312w322w313w323}B4={ b31b32}
输出
Y P = A 4 = 激 活 函 数 ( W 4 A 3 + B 4 ) = { A 411 ⋯ A 4 n 1 A 412 ⋯ A 4 n 2 } = { Y P 11 ⋯ Y P n 1 Y P 12 ⋯ Y P n 2 } YP=A4=激活函数(W4A3+B4)=\left\{ \begin{matrix} A411& \cdots & A4n1\\ A412& \cdots & A4n2\\ \end{matrix} \right\}=\left\{ \begin{matrix} YP11& \cdots & YPn1\\ YP12& \cdots & YPn2\\ \end{matrix} \right\} YP=A4=W4A3+B4={ A411A412A4n1A4n2}={ YP11YP12YPn1YPn2}

J = 1 m ∑ i = 1 n L o s s ( Y P , Y ) J = \frac{1}{m}\sum_{i=1}^{n}Loss(YP,Y) J=m1i=1nLoss(YP,Y)
d Y P = ∂ J ∂ Y P , d Z 4 ( Z 4 = W 4 A 3 + B 4 ) = d Y P ∗ ∂ 激 励 函 数 ∂ Z 4 , d W 4 = d Z 4 ∗ A 3 , d B 4 = d Z 4 , d A 3 = d Z 4 ∗ W 4 dYP=\frac{\partial J}{\partial YP},dZ4(Z4=W4A3+B4)=dYP*\frac{\partial 激励函数}{\partial Z4},dW4 =dZ4*A3,dB4 = dZ4 ,dA3=dZ4*W4 dYP=YPJ,dZ4Z4=W4A3+B4=dYPZ4dW4=dZ4A3dB4=dZ4dA3=dZ4W4

d Z 3 ( Z 3 = W 3 A 2 + B 3 ) = d A 3 ∗ ∂ 激 励 函 数 ∂ Z 3 , d W 3 = d Z 3 ∗ A 2 , d B 3 = d Z 3 d A 2 = d Z 3 ∗ W 3 dZ3(Z3=W3A2+B3)=dA3*\frac{\partial 激励函数}{\partial Z3},dW3 =dZ3*A2,dB3 = dZ3 dA2=dZ3*W3 dZ3Z3=W3A2+B3=dA3Z3dW3=dZ3A2dB3=dZ3dA2=dZ3W3
. . . . . . . . . . .......... ..........
B = B − α ∗ d B , W = W − α ∗ d W B = B- α*dB,W=W-α*dW B=BαdB,W=WαdW

二、代码实现

import numpy as np
import random
class Neuron_Net:
    """
    神经网络的代码实现
    """

    def __init__(self):
        self.alfa = 0.01
        self.layer_shape = []
        self.W = []
        self.dW = []
        self.B = []
        self.dB = []
        self.Z = []
        self.dZ = []
        self.A = [0]
        self.dA = [0]
        self.activity_infos = []
        self.dropout = []
        self.loss_fuc = None
        self.debug = False
        self.print_accuracy = True
        self.accuracy_error = 0.5

    def addLayer(self, input_dim: int, output_dim: int, activity_info, dropout=1, W_init=1):
        """

        :param input_dim: 输入维度
        :param output_dim: 输出维度
        :param activity_info: 激励函数信息字典{main_func:(Z)->return f(Z),dA_dZ: (Z)->return dA_dZ}
        :param dropout: 设置正则化参数 1为不设定
        :param W_init: 设定W的初始参数值
        :return: None
        """
        # 初始化W
        self.W.append(np.random.randn(output_dim, input_dim) * np.sqrt(W_init / input_dim))
        self.B.append(np.zeros((output_dim, 1)))
        self.activity_infos.append(activity_info)
        self.layer_shape.append((input_dim, output_dim))
        self.dropout.append(dropout)
        # 在存储信息的列表中开创位置
        self.A.append(0)
        self.dA.append(0)
        self.dW.append(0)
        self.dB.append(0)
        self.Z.append(0)
        self.dZ.append(0)

    def compile(self, loss_fuc, alfa, debug=False, print_accuracy=True, accuracy_error=0.5):
        """

        :param loss_fuc: 损失函数 (An,Y,accuracy_error)->return dAn,Loss,accuracy
        :param alfa: 学习率
        :param debug: 是否打开debug模式 未实现
        :param print_accuracy: 是否打印准确度
        :param accuracy_error: 预测值误差范围
        """
        for index, item in enumerate(self.layer_shape):
            if not self.W[index].shape[1] == item[0]:
                raise Exception('W[', index, ']列维度错误!')
            if not self.B[index].shape[0] == item[1]:
                raise Exception('B[', index, ']行维度错误!')
            if index > 0 and not self.layer_shape[index - 1][1] == item[0]:
                raise Exception('第', index - 1, '层的输出不等于第', index, '层的输入')
        self.loss_fuc = loss_fuc
        self.debug = debug
        self.print_accuracy = print_accuracy
        self.accuracy_error = accuracy_error
        self.alfa = alfa

    def train(self, X, Y, epoch):
        """
        训练神经模型
        :param X: 训练集
        :param Y: 标签集
        :param epoch: 迭代次数
        """
        self.A[0] = X
        for i in range(epoch):
            print('第', i + 1, '次')
            # 前向传播
            for index, item in enumerate(self.layer_shape):
                Z = np.dot(self.W[index], self.A[index]) + self.B[index]
                self.A[index + 1] = self.activity_infos[index]['main_func'](Z)
                self.Z[index] = Z
                # Dropout正则化
                d = np.random.rand(self.A[index + 1].shape[0], self.A[index + 1].shape[1]) <= self.dropout[index]
                self.A[index + 1] = np.multiply(self.A[index + 1], d)
                self.A[index + 1] /= self.dropout[index]
            dAn, Loss, accuracy = self.loss_fuc(self.A[-1], Y, self.accuracy_error)
            # 反向传播
            # 反向传播参数的存放顺序相反 如dA=[dAn,dA(n-1),....,dA0]
            self.dA[0] = dAn  # dA为倒序
            for index in range(len(self.layer_shape)):
                # 倒序插入dZ
                self.dZ[index] = np.multiply(self.dA[index], self.activity_infos[index]['dA/dZ'](self.Z[-index - 1]))
                dW = np.dot(self.dZ[index], self.A[-index - 2].T)
                self.dW[index] = dW / self.A[-index - 2].shape[1]
                self.dB[index] = self.dZ[index].sum(axis=1, keepdims=True) / self.A[-index - 2].shape[1]
                self.dA[index + 1] = np.dot(self.W[-index - 1].T, self.dZ[index])
            self.update_W_B()
            if self.print_accuracy:
                print('accuracy', accuracy)
            print('Loss:', Loss)

    def update_W_B(self):
        # 更新W和B
        for index in range(len(self.W)):
            self.W[index] -= self.alfa * self.dW[-index - 1]
            self.B[index] -= self.alfa * self.dB[-index - 1]

    def predict(self, X):
        # 预测
        temp_arr = [X, 0]
        for index in range(len(self.layer_shape)):
            temp_arr[1] = np.dot(self.W[index], temp_arr[0]) + self.B[index]
            temp_arr[0] = self.activity_infos[index]['main_func'](temp_arr[1])
        return temp_arr[0]

激活函数与损失函数

def activity_func(a):
    # simgoid函数
    yp = 1 / (1 + np.exp(-1 * a))
    return yp


def dA_dZ(Z):
    # 返回simgoid函数的dA/dZ
    return np.multiply((1 / (1 + np.exp(-1 * Z))), (1 - (1 / (1 + np.exp(-1 * Z)))))


def loss_func(YP, Y, error):
    size = YP.shape[1]  # 得到YP的数量
    Y_arr = Y.tolist()
    YP_arr = YP.tolist()
    # 避免后续求dA时出现除0现象
    temp = np.where(YP == 1)
    for index, item in enumerate(temp[0]):
        YP[item, temp[1][index]] = 0.999999999999
    dA = -(np.true_divide(Y, YP) + np.true_divide((Y - 1), (1 - YP)))
    loss_part1 = np.dot(Y, np.log(YP).T)
    loss_part2 = np.dot(1 - Y, np.log(1 - YP).T)
    loss = -(loss_part1 + loss_part2)
    count = 0
    for index, item in enumerate(Y_arr[0]):
        if abs(item - YP_arr[0][index]) < error:
            count += 1
    accuracy = count / size
    return dA, float(loss) / size if not np.isnan(float(loss) / size) else 0, accuracy


def relu(z):
    return np.multiply(z, z > 0)


def rele_dA_dZ(z):
    return z > 0

测试语句


if __name__ == '__main__':
    X_list = []
    for i in range(200):
        if i < 100:
            temp1 = random.random() * 100
            temp2 = 100 + random.random() * 100
        else:
            temp1 = 100 + random.random() * 100
            temp2 = random.random() * 100
        X_list.append([temp1, temp2])
    Y_list = []
    for i in range(200):
        Y_list.append(1 if i < 100 else 0)
    W = np.array([[0, 0]]).astype('float64').reshape(1, 2)
    B = 0
    X = np.array(X_list).T
    Y = np.array(Y_list).reshape(1, 200)
    alfa = 0.01
    net = Neuron_Net()
    activity_info = {
    
    'main_func': activity_func, 'dA/dZ': dA_dZ}
    relu_info = {
    
    'main_func': relu, 'dA/dZ': rele_dA_dZ}
    net.addLayer(2, 3, activity_info)
    net.addLayer(3, 1, activity_info)
    net.compile(loss_func, alfa, accuracy_error=0.5)
    net.train(X, Y, 500)

猜你喜欢

转载自blog.csdn.net/YmgmY/article/details/107437661