Tensorflow 入门学习14.TensorFlow实现LeNet实例

本文学习内容来自《TensorFlow深度学习应用实践》

本节逐步对LeNet中的每一层进行分解，会对神经元的个数、隐藏层的层数以及学习率等神经网络关键参数做出调整，观察模型训练的时间。

代码：

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time

x = tf.placeholder('float', [None, 784])
y_ = tf.placeholder('float', [None, 10])

x_image = tf.reshape(x, [-1, 28, 28, 1])

# 第一个卷积层
# 初始化卷积核和偏置值
filter1 = tf.Variable(tf.truncated_normal([5, 5, 1, 6]))  # 卷积核是由5*5大小的卷积，输入为1个通道而输出为6个通道
bias1 = tf.Variable(tf.truncated_normal([6]))  # 生成的偏置值与卷积结果进行求和的计算
conv1 = tf.nn.conv2d(x_image, filter1, strides=[1, 1, 1, 1], padding='SAME')
h_conv1 = tf.nn.sigmoid(conv1 + bias1)  # 求得第一个卷积层输出结果

# maxPooling池化层，对于2*2大小的框进行最大特征取值
maxPool2 = tf.nn.max_pool(h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

filter2 = tf.Variable(tf.truncated_normal([5, 5, 6, 16]))
bias2 = tf.Variable(tf.truncated_normal([16]))
conv2 = tf.nn.conv2d(maxPool2, filter2, strides=[1, 1, 1, 1], padding='SAME')
h_conv2 = tf.nn.sigmoid(conv2 + bias2)

maxPool3 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# 第三层 卷积层，这里需要进行卷积计算后的大小为[10,10,16]，其后的池化层将特征进行再一次压缩
filter3 = tf.Variable(tf.truncated_normal([5, 5, 16, 120]))
bias3 = tf.Variable(tf.truncated_normal([120]))
conv3 = tf.nn.conv2d(maxPool3, filter3, strides=[1, 1, 1, 1], padding='SAME')
h_conv3 = tf.nn.sigmoid(conv3 + bias3)

# 后面2个全连接层，全连接层的作用在整个卷积神经网络中起到“分类器”的作用
# 即将学到的“分布式特征表示”映射 到样本标记空间的作用

# 权值参数
W_fc1 = tf.Variable(tf.truncated_normal([7 * 7 * 120, 80]))
# 偏置值
b_fc1 = tf.Variable(tf.truncated_normal([80]))
# 将卷积的输出展开
h_pool2_flat = tf.reshape(h_conv3, [-1, 7 * 7 * 120])
# 神经网络计算，并添加sigmoid激活函数
h_fc1 = tf.nn.sigmoid(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

# 输出层，使用softmax进行多分类
# 这里对池化后的数据进行重新展开，将二维数据重新展开成一维数组之后计算每一行的元素个数。最后一个输出层在使用了softmax进行概率的计算
W_fc2 = tf.Variable(tf.truncated_normal([80, 10]))
b_fc2 = tf.Variable(tf.truncated_normal([10]))
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)

# 最后是交叉熵作为损失函数，使用梯度下降来对模型进行训练
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)

sess = tf.InteractiveSession()

# 测试正确率
corrent_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(corrent_prediction, "float"))

# 所有变量进行初始化
sess.run(tf.initialize_all_variables())

# 获取mnist数据
mnist_data_set = input_data.read_data_sets('MNIST_data', one_hot=True)
# 进行训练
start_time = time.time()
for i in range(20000):
    # 取训练数据
    batch_xs, batch_ys = mnist_data_set.train.next_batch(200)

    # 每迭代100 个batch,对当前训练数据进行测试，输出当前预测准确率
    if i % 2 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch_xs, y_: batch_ys})
        print("step %d, training accuracy %g" % (i, train_accuracy))
        # 计算间隔时间
        end_time = time.time()
        print('time:', (end_time - start_time))
        start_time = end_time

        # 训练数据
        train_step.run(feed_dict={x: batch_xs, y_: batch_ys})

# 关闭会话
sess.close()

运行结果：
在这里插入图片描述

使用ReLU激活函数代替sigmoid

对于神经网络模型来说，首先重要的一个目标就是能够达到最好的准确率，这需要通过设计不同的模型和算法完成。其次在模型的训练过程中一般要求能够在最短的时间内达到收敛。

相较于sigmoid和tanh 函数，ReLU主要有以下优点：

收敛快：对于SGD的收敛有巨大的加速作用，可以看到对于达到阈值的数据其激活力度是随数值的加大而增大，且呈现一个线性关系。
计算简单：ReLU的算法较为简单，单纯一个值的输入输出不需要进行一系列的复杂计算，从而获得激活值。
不易过拟合：使用ReLU进行模型计算时，一部分神经元在计算时如果有一个过大的梯度经过，则次神经元的梯度会强行设置为0，而在整个其后的训练过程中这个神经元都不会被激活，这会导致数据多样化的丢失，但是也能防止过拟合，这个现象一般不被注意到。

把上面程序里的sigmoid换为Relu，可以看到训练准确度并没有提高，反而是在比较低的水平。不同的学习率对ReLU模型的训练会有很大影响，准确率设置不当会造成大量的神经元被锁死。这里需要减少模型的学习率。

函数重构——模块化设计

上面的程序里为了模型的正常使用，在图计算过程中需要使用大量的权重值和偏置量。这些都是由TensorFlow变量所设置。而变量带来的问题就是每次图对话计算过程中都要被反复初始化和赋予新值。

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time
import matplotlib.pyplot as plt


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


# 初始化单个卷积核上的偏置值
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


# 输入特征 x，用卷积核W进行卷积运算，strides 为卷积核移动步长，
# padding 表示是否需要补齐边缘像素使输出图像大小不变
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


# 对 x 进行最大池化操作，ksize进行池化的范围
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


sess = tf.InteractiveSession()

# 声明输入图片数据、类别
x = tf.placeholder('float', [None, 784])
y_ = tf.placeholder('float', [None, 10])
# 输入图片数据化
x_image = tf.reshape(x, [-1, 28, 28, 1])

W_conv1 = weight_variable([5, 5, 1, 6])
b_conv1 = bias_variable([6])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 6, 16])
b_conv2 = bias_variable([16])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 16, 120])
# 偏置值
b_fc1 = bias_variable([120])
# 将卷积的输出展开
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 16])
# 神经网络计算，并添加relu激活函数
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

W_fc2 = weight_variable([120, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)

# 代价函数
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
# 使用Adam优化算法来调整参数
train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(cross_entropy)

# 测试正确率
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float32"))

# 变量初始化
sess.run(tf.initialize_all_variables())

# 获取mnist数据
mnist_data_set = input_data.read_data_sets('MNIST_data', one_hot=True)
c = []

# 进行训练
start_time = time.time()
for i in range(1000):
    # 获取训练数据
    batch_xs, batch_ys = mnist_data_set.train.next_batch(200)
    # 每迭代10个batch，对当前训练数据进行测试，输出当前预测准确率
    if i % 2 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch_xs, y_: batch_ys})
        c.append(train_accuracy)
        print("step %d , training accuracy %g" % (i, train_accuracy))
        # 计算间隔时间
        end_time = time.time()
        print('time:', (end_time - start_time))
        start_time = end_time
    # 训练数据
    train_step.run(feed_dict={x: batch_xs, y_: batch_ys})

sess.close()
plt.plot(c)
plt.tight_layout()
plt.savefig('cnn-tf-cifar10-2.png', dpi=200)

在这里插入图片描述
输出准确率：

卷积核和隐藏层参数的修改

通过调整激活函数和学习率程序学习效率会有非常大的提高。除此之外深度学习中有不同的隐藏层和每层包含的神经元，而通过调节这些神经元和隐藏层的数目，也可以改善神经网络模型的设计。
下面修改每个隐藏层中神经元的数目，即第一次生成了32个通道的卷积层，第二层为64，而在全连接阶段使用了1024个神经元作为学习参数。

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import time
import matplotlib.pyplot as plt


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


# 初始化单个卷积核上的偏置值
def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


# 输入特征 x，用卷积核W进行卷积运算，strides 为卷积核移动步长，
# padding 表示是否需要补齐边缘像素使输出图像大小不变
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


# 对 x 进行最大池化操作，ksize进行池化的范围
def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


sess = tf.InteractiveSession()

# 声明输入图片数据、类别
x = tf.placeholder('float', [None, 784])
y_ = tf.placeholder('float', [None, 10])
# 输入图片数据化
x_image = tf.reshape(x, [-1, 28, 28, 1])

W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

W_fc1 = weight_variable([7 * 7 * 64, 1024])
# 偏置值
b_fc1 = bias_variable([1024])
# 将卷积的输出展开
h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
# 神经网络计算，并添加relu激活函数
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
y_conv = tf.nn.softmax(tf.matmul(h_fc1, W_fc2) + b_fc2)

# 代价函数
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
# 使用Adam优化算法来调整参数
train_step = tf.train.GradientDescentOptimizer(1e-4).minimize(cross_entropy)

# 测试正确率
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float32"))

# 变量初始化
sess.run(tf.initialize_all_variables())

# 获取mnist数据
mnist_data_set = input_data.read_data_sets('MNIST_data', one_hot=True)
c = []

# 进行训练
start_time = time.time()
for i in range(1000):
    # 获取训练数据
    batch_xs, batch_ys = mnist_data_set.train.next_batch(200)
    # 每迭代10个batch，对当前训练数据进行测试，输出当前预测准确率
    if i % 2 == 0:
        train_accuracy = accuracy.eval(feed_dict={x: batch_xs, y_: batch_ys})
        c.append(train_accuracy)
        print("step %d , training accuracy %g" % (i, train_accuracy))
        # 计算间隔时间
        end_time = time.time()
        print('time:', (end_time - start_time))
        start_time = end_time
    # 训练数据
    train_step.run(feed_dict={x: batch_xs, y_: batch_ys})

sess.close()
plt.plot(c)
plt.tight_layout()
plt.savefig('cnn-tf-cifar10-1.png', dpi=200)

（此代码未调试成功，实际运行时准确率相当低）
随着卷积核数目的增加，准确率上升的速度也非常快，但训练的速度明确变慢了。