MNIST手写数字识别之Tensorflow实现---CNN实现

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.examples.tutorials.mnist import mnist

import tensorflow as tf
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# 设置按需使用GPU
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)

# MNIST_data代表当前程序文件所在的目录中，用于存放MNIST数据的文件夹，如果没有则新建，然后下载．
# 如果当前文件所在目录中，不存在 MNIST_data 这个目录的话，程序会自动下载 MNIST 数据到这个位置，# 如果已经存在了的话，就直接读取数据文件。
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)


# print('训练集信息：', mnist.train.images.shape, mnist.train.labels.shape)
# print('测试集信息：', mnist.test.images.shape, mnist.test.labels.shape)
# print('验证集信息：', mnist.validation.images.shape, mnist.validation.labels.shape)

# 获取第5张图片，标签为[0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
# image = mnist.train.images[5]
# 将图像数据还原成28*28的分辨率
# image = image.reshape(28, 28)
# 打印对应的标签
# print(mnist.train.labels[5])
#
# plt.figure()
# plt.imshow(image)
# plt.show()

# 权值初始化
def weight_variable(shape):
    # 用正态分布来初始化权值
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


# 偏置初始化
def bias_variable(shape):
    # 本例中用relu激活函数，所以用一个很小的正偏置较好
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


# input 代表输入，filter 代表卷积核
def conv2d(input, filter):
    return tf.nn.conv2d(input, filter, strides=[1, 1, 1, 1], padding='SAME')


# 2x2最大池化层
def max_pool(input):
    return tf.nn.max_pool(input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')


# input_layer
x_input = tf.placeholder(tf.float32, [None, 784], name="x_input")
# 将x_input 重新调整结构，适用于CNN的特征提取
input_image = tf.reshape(x_input, [-1, 28, 28, 1])
y_input = tf.placeholder(tf.float32, [None, 10], name="y_input")

# [filter_height, filter_width, in_channels, out_channels]
# 定义了卷积核
filter = [3, 3, 1, 32]

filter_conv1 = weight_variable(filter)
b_conv1 = bias_variable([32])

# 创建卷积层，进行卷积操作，并通过Relu激活，然后池化
h_conv1 = tf.nn.relu(conv2d(input_image, filter_conv1) + b_conv1)
h_pool1 = max_pool(h_conv1)

# h_flat 是将　pool 后的卷积核全部拉平成一行数据，便于和后面的全连接层进行数据运算．
h_flat = tf.reshape(h_pool1, [-1, 14 * 14 * 32])

W_fc1 = weight_variable([14 * 14 * 32, 784])
b_fc1 = bias_variable([784])
h_fc1 = tf.matmul(h_flat, W_fc1) + b_fc1

W_fc2 = weight_variable([784, 10])
b_fc2 = bias_variable([10])

y_hat = tf.matmul(h_fc1, W_fc2) + b_fc2

# 1.损失函数loss：cross_entropy
# cross_entropy = tf.reduce_mean(
#     tf.nn.softmax_cross_entropy_with_logits(labels=y_input, logits=y_hat))

# 损失模型隐藏到loss-model模块
with tf.name_scope("loss-model"):
    # 1.损失函数loss：cross_entropy
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_input, logits=y_hat))
    # 给损失模型的输出添加scalar，用来观察loss的收敛曲线
    tf.summary.scalar("loss", cross_entropy)

# 2.优化函数：GradientOptimizer
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)
# 等价于以下语句：
# optimizer = tf.train.GradientDescentOptimizer(0.001)
# train_step = optimizer.minimize(loss_cross_entropy)

# 3.预测结果评估
# 预测值中最大值（１）即分类结果，是否等于原始标签中的（１）的位置。
# argmax()取最大值所在的下标
# y_pre和y_input一行对应一个标签，行数对应batch的size大小
# correct_prediction = tf.equal(tf.argmax(y_pre, 1), tf.arg_max(y_input, 1))
# accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

with tf.name_scope("accuracy-model"):
    # y_pre和y_input一行对应一个标签，行数对应batch的size大小
    correct_prediction = tf.equal(tf.argmax(y_hat, 1), tf.arg_max(y_input, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    # 给损失模型的输出添加scalar，用来观察accracy的收敛曲线
    tf.summary.scalar("test_acc", accuracy)

# 开始运行
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    # 调用 merge_all() 收集所有的操作数据
    merged = tf.summary.merge_all()

    # 模型运行产生的所有数据保存到 ./tensorflow 文件夹供 TensorBoard 使用
    writer = tf.summary.FileWriter('./tensorboard', sess.graph, filename_suffix="_mnist")

    # 迭代 100 个 epoch， 训练准确率已经达到了0.98
    for i in range(55000):
        x_batch, y_batch = mnist.train.next_batch(batch_size=100)

        # train_step.run(feed_dict={x_input: x_batch, y_input: y_batch})
        # 等价于下面语句：
        # sess.run(train_step,feed_dict={x_input: x_batch, y_input: y_batch})

        # summary = sess.run(merged, feed_dict={x_input: x_batch, y_input: y_batch})
        # train_step.run(feed_dict={x_input: x_batch, y_input: y_batch})
        # 等价于上述两条语句
        summary, _, train_loss = sess.run([merged, train_step, cross_entropy],
                                          feed_dict={x_input: x_batch, y_input: y_batch})
        # print("step %d, loss %s" % (i, train_loss))

        if (i + 1) % 100 == 0:
            # print("step %d,\ttrain_loss %s" % (i+1, train_loss))
            train_accuracy = accuracy.eval(feed_dict={x_input: mnist.train.images, y_input: mnist.train.labels})
            print("step %d,\ttrain_loss %s,\ttrain acc %g" % (i + 1, train_loss, train_accuracy))

        if (i + 1) % 500 == 0:
            test_accuracy = accuracy.eval(feed_dict={x_input: mnist.test.images, y_input: mnist.test.labels})
            print("= " * 10, "step %d,\ttest acc %g" % (i + 1, test_accuracy))

        writer.add_summary(summary, i)

    val_accuracy = accuracy.eval(feed_dict={x_input: mnist.validation.images, y_input: mnist.validation.labels})
    print("* " * 10, "step %d, validation acc %g" % (i + 1, val_accuracy))
手动下载mnist数据集地址：http://yann.lecun.com/exdb/mnist/
本程序是使用一个卷积池化层和一个全连接层实现手写数字识别分类，模型框架如图所示：
MNIST手写数字识别之Tensorflow实现---CNN实现

猜你喜欢