tensorflow学习-Google实战(chapter6)

6.1 对卷积神经网络的一些理解

  1. 全链接方式参数太多;因参数量大,导致计算量过大;随着层数的增多,可计算性变小
  2. 卷积核、局部连接、感受野、权值共享、滑动步长、边缘填充、卷积核个数
  3. 局部连接和权值共享大大降低了参数数量
  4. 池化层(下采样)起到特征增强和数据压缩的作用
  5. 卷积是一个特征提取过程
  6. 卷积神经网络(cnn)比全部采用全链接的神经网络(nn)判断准确。因为参数本身也有判断噪音;随着参数的增多,在loss达到足够小,差不多和参数引入的噪音影响相当时,判断就被噪音覆盖了。cnn的参数和数据相比,足够小,噪音影响远远小于nn,所以,cnn的判断准确性要高于nn。

    6.2卷积层、池化层样例

书上主要是展现了LeNet-5的实现(分别命名三个.py文件,再逐一run)

part 1  ##LeNet-5inference.py

 # LeNet5前向传播

import tensorflow as tf

# 1. 设定神经网络的参数
INPUT_NODE = 784  #输入节点
OUTPUT_NODE = 10    ###输出的标值只有0-9

IMAGE_SIZE = 28   ##图像的pixels为28*28
NUM_CHANNELS = 1
NUM_LABELS = 10

CONV1_DEEP = 32
CONV1_SIZE = 5  ###第一层卷积的filter为5*5

CONV2_DEEP = 64
CONV2_SIZE = 5  ###第二层卷积的filter为5*5

FC_SIZE = 512  ###全连接层的节点为512

# 2. 定义前向传播的过程
def inference(input_tensor, train, regularizer):
    with tf.variable_scope('layer1-conv1'):
        conv1_weights = tf.get_variable(
            "weight", [CONV1_SIZE, CONV1_SIZE, NUM_CHANNELS, CONV1_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv1_biases = tf.get_variable("bias", [CONV1_DEEP], initializer=tf.constant_initializer(0.0))
        conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))

    with tf.name_scope("layer2-pool1"):
        pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="SAME")

    with tf.variable_scope("layer3-conv2"):
        conv2_weights = tf.get_variable(
            "weight", [CONV2_SIZE, CONV2_SIZE, CONV1_DEEP, CONV2_DEEP],
            initializer=tf.truncated_normal_initializer(stddev=0.1))
        conv2_biases = tf.get_variable("bias", [CONV2_DEEP], initializer=tf.constant_initializer(0.0))
        conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
        relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))

    with tf.name_scope("layer4-pool2"):
        pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        pool_shape = pool2.get_shape().as_list()    ## pool2.get_shape()函数可以得到第四层输出矩阵的维度,而不需要手工计算。
        ###将该数据作为第五层全连接的输入时,需要将其拉直成为一个向量。而这个长度=原四维矩阵的长*宽*深度
        #其中pool_shape[0]为一个batch中的数据。
        nodes = pool_shape[1] * pool_shape[2] * pool_shape[3]
        reshaped = tf.reshape(pool2, [pool_shape[0], nodes])  ##通过tf.reshape()函数将第四层的输出变成一个batch的向量  

    with tf.variable_scope('layer5-fc1'):###声明第五层全连接层的变量并实现前向传播过程
        fc1_weights = tf.get_variable("weight", [nodes, FC_SIZE],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
        fc1_biases = tf.get_variable("bias", [FC_SIZE], initializer=tf.constant_initializer(0.1))
   ###只有全连接层的权重需要正则化
        fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
        if train: fc1 = tf.nn.dropout(fc1, 0.5) 

    with tf.variable_scope('layer6-fc2'):###声明第六层全连接层的变量并实现前向传播过程
        fc2_weights = tf.get_variable("weight", [FC_SIZE, NUM_LABELS],
                                      initializer=tf.truncated_normal_initializer(stddev=0.1))
        if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
        fc2_biases = tf.get_variable("bias", [NUM_LABELS], initializer=tf.constant_initializer(0.1))
        logit = tf.matmul(fc1, fc2_weights) + fc2_biases

    return logit   ##logit为最后的预测值

###dropout一般只在全连接层而不是卷积层或者池化层使用。而且只有全连接层的才需要正则化。

part 2  LeNet-5_train.py

# LeNet5训练

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import LeNet5_infernece
import os
import numpy as np

# 1. 定义神经网络相关的参数
BATCH_SIZE = 100
LEARNING_RATE_BASE = 0.01
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 55000
MOVING_AVERAGE_DECAY = 0.99
MODEL_SAVE_PATH = "LeNet5_model/" # 在当前目录下存在LeNet5_model子文件夹
MODEL_NAME = "LeNet5_model"

# 2. 定义训练过程
def train(mnist):
    # 定义输出为4维矩阵的placeholder
    x = tf.placeholder(tf.float32, [
        BATCH_SIZE,
        LeNet5_infernece.IMAGE_SIZE,
        LeNet5_infernece.IMAGE_SIZE,
        LeNet5_infernece.NUM_CHANNELS],
                       name='x-input')     ###输入为28*28的pixels值
    y_ = tf.placeholder(tf.float32, [None, LeNet5_infernece.OUTPUT_NODE], name='y-input')###输出值得列坐标为10,即一个样本为一行

    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)  ##l2正则化
    y = LeNet5_infernece.inference(x, True, regularizer)  #根据前向传播函数得到,第一个参数是输入变量,第二个的输入为是否train(bool),第三个正则化项
    global_step = tf.Variable(0, trainable=False)

    # 定义损失函数、学习率、滑动平均操作以及训练过程。
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())#滑动平均
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY,
        staircase=True)  #staircase=True表示学习率呈阶梯型衰减,如果为False则为连续性衰减

    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)  ###利用梯度下降反向传播进行训练
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name='train')

    # 初始化TensorFlow持久化类。
    saver = tf.train.Saver()
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        for i in range(TRAINING_STEPS):
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            
            reshaped_xs = np.reshape(xs, (
                BATCH_SIZE,
                LeNet5_infernece.IMAGE_SIZE,
                LeNet5_infernece.IMAGE_SIZE,
                LeNet5_infernece.NUM_CHANNELS))
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: reshaped_xs, y_: ys})
###control_dependencies是用于控制计算流图的先后顺序的。必需先完成control_input的计算,才能执行之后定义的context。
###但是,tensorflow是顺序执行的,为什么还需control_dependecies呢?原因在实际训练中,大多是以一个BATCH_SIZE大小来训练。因此需循环地去刷新前面所定义的变量。而采用control_dependencies可以确保control_input在被刷新之后,在执行定义的内容,从而保证计算顺序的正确性。
### 该例中,用 with tf.control_dependencies([train_step]): 保证每次执行sess.run之前,train_step均被均新过。 
 
            if i % 1000 == 0:
                print("After %d training step(s), loss on training batch is %g." % (step, loss_value))
                saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)

# 3. 主程序入口
def main(argv=None):
    mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    train(mnist)

if __name__ == '__main__':
    main()

结果:

After 1 training step(s), loss on training batch is 2.93483.
After 1001 training step(s), loss on training batch is 0.217289.
After 2001 training step(s), loss on training batch is 0.142817.
After 3001 training step(s), loss on training batch is 0.140553.
After 4001 training step(s), loss on training batch is 0.115699.
After 5001 training step(s), loss on training batch is 0.106968.
After 6001 training step(s), loss on training batch is 0.103355.
After 7001 training step(s), loss on training batch is 0.0896177.
After 8001 training step(s), loss on training batch is 0.0786352.
After 9001 training step(s), loss on training batch is 0.0765136.
After 10001 training step(s), loss on training batch is 0.069799.
After 11001 training step(s), loss on training batch is 0.0626096.
After 12001 training step(s), loss on training batch is 0.0571934.
After 13001 training step(s), loss on training batch is 0.058789.
After 14001 training step(s), loss on training batch is 0.0535299.
After 15001 training step(s), loss on training batch is 0.048936.
After 16001 training step(s), loss on training batch is 0.0476011.
After 17001 training step(s), loss on training batch is 0.0489079.
After 18001 training step(s), loss on training batch is 0.0458619.
After 19001 training step(s), loss on training batch is 0.0403256.
After 20001 training step(s), loss on training batch is 0.0414163.
After 21001 training step(s), loss on training batch is 0.0379971.
After 22001 training step(s), loss on training batch is 0.0373656.
After 23001 training step(s), loss on training batch is 0.0384955.
After 24001 training step(s), loss on training batch is 0.0396963.
After 25001 training step(s), loss on training batch is 0.0412267.
After 26001 training step(s), loss on training batch is 0.0370816.
After 27001 training step(s), loss on training batch is 0.0400256.
After 28001 training step(s), loss on training batch is 0.0365092.
After 29001 training step(s), loss on training batch is 0.0397724.

part 3 LeNet-5_eval.py

# 测试

import time
import math
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import LeNet5_infernece
import LeNet5_train

def evaluate(mnist):
    with tf.Graph().as_default() as g:
        # 定义输出为4维矩阵的placeholderA
        x = tf.placeholder(tf.float32, [
            mnist.test.num_examples,
            #LeNet5_train.BATCH_SIZE,
            LeNet5_infernece.IMAGE_SIZE,
            LeNet5_infernece.IMAGE_SIZE,
            LeNet5_infernece.NUM_CHANNELS],
                           name='x-input')
        y_ = tf.placeholder(tf.float32, [None, LeNet5_infernece.OUTPUT_NODE], name='y-input')
        validate_feed = {x: mnist.test.images, y_: mnist.test.labels}
        global_step = tf.Variable(0, trainable=False)

        regularizer = tf.contrib.layers.l2_regularizer(LeNet5_train.REGULARIZATION_RATE)
        y = LeNet5_infernece.inference(x, False, regularizer)
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        variable_averages = tf.train.ExponentialMovingAverage(LeNet5_train.MOVING_AVERAGE_DECAY)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        #n = math.ceil(mnist.test.num_examples / LeNet5_train.BATCH_SIZE)
        n = math.ceil(mnist.test.num_examples / mnist.test.num_examples)
        for i in range(n):
            with tf.Session() as sess:
                ckpt = tf.train.get_checkpoint_state(LeNet5_train.MODEL_SAVE_PATH)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    xs, ys = mnist.test.next_batch(mnist.test.num_examples)
                    #xs, ys = mnist.test.next_batch(LeNet5_train.BATCH_SIZE)
                    reshaped_xs = np.reshape(xs, (
                        mnist.test.num_examples,
                        #LeNet5_train.BATCH_SIZE,
                        LeNet5_infernece.IMAGE_SIZE,
                        LeNet5_infernece.IMAGE_SIZE,
                        LeNet5_infernece.NUM_CHANNELS))
                    accuracy_score = sess.run(accuracy, feed_dict={x:reshaped_xs, y_:ys})
                    print("After %s training step(s), test accuracy = %g" % (global_step, accuracy_score))
                else:
                    print('No checkpoint file found')
                    return

# 主程序
def main(argv=None):
    mnist = input_data.read_data_sets("../../../datasets/MNIST_data", one_hot=True)
    evaluate(mnist)

if __name__ == '__main__':
    main()

结果:

Extracting ../../../datasets/MNIST_data\train-images-idx3-ubyte.gz
Extracting ../../../datasets/MNIST_data\train-labels-idx1-ubyte.gz
Extracting ../../../datasets/MNIST_data\t10k-images-idx3-ubyte.gz
Extracting ../../../datasets/MNIST_data\t10k-labels-idx1-ubyte.gz
INFO:tensorflow:Restoring parameters from LeNet5_model/LeNet5_model-54001
After 54001 training step(s), test accuracy = 0.9925
'''
相比较于上一章的mnist的code,之前的acc:98.4%;而加入卷积之后的效果是ACC:99.4%
相比较而言,提升了1%。
经典的卷积网络架构是:输入——(卷积+——池化?)+——全连接层
卷积层+表示:使用一层或者多层卷积架构,一般为连续使用三层的卷积架构
池化?表示:没有或者一层池化层()池化层可以起到减小过拟合的效果
卷积网络一般会在最后经过几层全连接层

LeNet-5 model: 输入层-卷积层-池化层-卷积层-池化层-全连接层-全连接层-输出层
以上可以看做一般的卷积网络的的正则表达式

区分:LeNet-5model:是不同的卷积架构串联
     Inception-v3model:是不同的卷积架构并联(即在某一层的卷积上使用不同size的filter)
'''

猜你喜欢

转载自blog.csdn.net/jasminexjf/article/details/80019720