本文将以库函数的方式实现MNIST手写识别,共三个程序。第一个是mnist_inference.py, 它定义了前向传播的过程以及神经网络中的参数。第二个事mnist_train.py, 它定义了神经网络的训练过程。第三个是mnist_eval.py, 它定义了测试过程。
#coding:utf-8 #mnist_inference.py import tensorflow as tf #定义神经网络相关参数 INPUT_NODE = 784 OUTPUT_NODE = 10 LAYER1_NODE = 500 #通过tf.get_variable 函数来获取变量 #在训练神经网络时会创建这些变量 #在测试时会通过保存的模型,加载这些变量的取值 #可以在变量加载时将“滑动平均变量”重命名,所以可以在训练时使用变量自身,在测试时使用变量的滑动平均值 #在这个函数中也会将变量的正则化损失加入损失集合 def get_weight_variable(shape, regularizer): #对权重的定义,shape表示维度 #将变量初始化为满足正太分布的随机值,但如果随机出来的值偏离平均值超过2个标准差,那么这个数将会被重新随机 weights = tf.get_variable("weights", shape, initializer=tf.truncated_normal_initializer(stddev=0.1)) #将当前变量的正则损失加入名字为losses的集合 if regularizer != None: tf.add_to_collection('losses', regularizer(weights)) return weights #定义神经网络的前向传播过程 def inference(input_tensor, regularizer): #声明第一层神经网络的变量并完成前向传播的过程 with tf.variable_scope('layer1'): weights = get_weight_variable([INPUT_NODE, LAYER1_NODE], regularizer) biases = tf.get_variable("biases", [LAYER1_NODE], initializer=tf.constant_initializer(0.0)) layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases) #声明第二层神经网络的变量并完成前向传播的过程 with tf.variable_scope('layer2'): weights = get_weight_variable([LAYER1_NODE, OUTPUT_NODE], regularizer) biases = tf.get_variable("biases", [OUTPUT_NODE], initializer=tf.constant_initializer(0.0)) layer2 = tf.matmul(layer1, weights) + biases return layer2
#coding:utf-8 #mnist_train.py import os import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data #加载 mnist_inference.py 中定义的常量和前向传播的函数 import mnist_inference #配置神经网络参数 BATCH_SIZE = 100 LEARNING_RATE_BASE = 0.8 LEARNING_RATE_DECAY = 0.99 REGULARAZTION_RATE = 0.0001 TRAINING_STEPS = 9001 MOVING_AVERAGE_DECAY = 0.99 #模型保存的路径和文件名 MODEL_SAVE_PATH = "/home/sun/AI/DNN/handWrite2/model" MODEL_NAME = "model.ckpt" def train(mnist): x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input') y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y_input') #返回regularizer函数,L2正则化项的值 regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE) #使用mnist_inference.py中定义的前向传播过程 y=mnist_inference.inference(x,regularizer) #定义step为0 global_step = tf.Variable(0, trainable=False) #滑动平均,由衰减率和步数确定 variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) #可训练参数的集合 variables_averages_op = variable_averages.apply(tf.trainable_variables()) #交叉熵损失 函数 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(y, tf.argmax(y_, 1)) #交叉熵平均值 cross_entropy_mean = tf.reduce_mean(cross_entropy) #总损失 loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses')) #学习率(衰减) learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY) #定义了反向传播的优化方法,之后通过sess.run(train_step)就可以对所有GraphKeys.TRAINABLE_VARIABLES集合中的变量进行优化,似的当前batch下损失函数更小 train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step) #更新参数 with tf.control_dependencies([train_step, variables_averages_op]): train_op = tf.no_op(name='train') saver = tf.train.Saver() #初始会话,并开始训练过程 with tf.Session() as sess: tf.initialize_all_variables().run() for i in range(TRAINING_STEPS): xs, ys = mnist.train.next_batch(BATCH_SIZE) op, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys}) if i % 1000 == 0: print ("After %d training step(s), loss on training batch is %g." % (step, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) def main(argv=None): mnist = input_data.read_data_sets("/home/sun/AI/DNN/handWrite2/data", one_hot=True) train(mnist) #if __name__ == '__main__': # tf.app.run() main()
#coding:utf-8 #mnist_eval.py import time import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import mnist_inference import mnist_train #每10秒加载一次最新的模型,并在测试数据上测试最新模型的正确率 EVAL_INTERVAL_SECS = 10 def evaluate(mnist): with tf.Graph().as_default() as g: #定义输入输出格式 x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input') y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input') validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels} #计算前向传播结果,测试时不关心正则化损失的值,所以这里设为None y = mnist_inference.inference(x, None) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) variable_averages = tf.train.ExponentialMovingAverage(mnist_train.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) while True: with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH) print(ckpt.model_checkpoint_path) if ckpt and ckpt.model_checkpoint_path: #加载模型 saver.restore(sess, ckpt.model_checkpoint_path) #通过文件名得到模型保存时迭代的轮数 global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] accuracy_score = sess.run(accuracy, feed_dict=validate_feed) print("After %s training step(s), validation accuracy = %g" % (global_step, accuracy_score)) else: print('No checkpoint file found') return time.sleep(EVAL_INTERVAL_SECS) def main(argv=None): mnist = input_data.read_data_sets("/home/sun/AI/DNN/handWrite2/data", one_hot=True) evaluate(mnist) if __name__ == '__main__': tf.app.run()