cifar10代码

在源码的基础上进行了部分改正。

一个标准的机器学习程序,应该包括数据输入、定义模型本身、模型训练和模型性能测试四大部分,可以分成四个.py文件。

(一)数据输入部分(input_dataset.py

#coding:utf-8
import os
import tensorflow as tf
# 原图像的尺度为32*32,但根据常识,信息部分通常位于图像的中央,这里定义了以中心裁剪后图像的尺寸
fixed_height = 24
fixed_width = 24
# cifar10数据集的格式,训练样例集和测试样例集分别为50k和10k
train_samples_per_epoch = 50000
test_samples_per_epoch = 10000
data_dir='./cifar-10-batches-bin' # 定义数据集所在文件夹路径
batch_size=128 #定义每次参数更新时,所使用的batch的大小

def read_cifar10(filename_queue):
    # 定义一个空的类对象,类似于c语言里面的结构体定义
    class Image(object):
        pass
    image = Image()
    image.height=32
    image.width=32
    image.depth=3
    label_bytes = 1
    image_bytes = image.height*image.width*image.depth
    Bytes_to_read = label_bytes+image_bytes
    # 定义一个Reader,它每次能从文件中读取固定字节数
    reader = tf.FixedLengthRecordReader(record_bytes=Bytes_to_read)
    # 返回从filename_queue中读取的(key, value)对,key和value都是字符串类型的tensor,并且当队列中的某一个文件读完成时,该文件名会dequeue
    image.key, value_str = reader.read(filename_queue)
    # 解码操作可以看作读二进制文件,把字符串中的字节转换为数值向量,每一个数值占用一个字节,在[0, 255]区间内,因此out_type要取uint8类型
    value = tf.decode_raw(bytes=value_str, out_type=tf.uint8)
    # 从一维tensor对象中截取一个slice,类似于从一维向量中筛选子向量,因为value中包含了label和feature,故要对向量类型tensor进行'parse'操作
    image.label = tf.slice(input_=value, begin=[0], size=[label_bytes])# begin和size分别表示待截取片段的起点和长度
    data_mat = tf.slice(input_=value, begin=[label_bytes], size=[image_bytes])
    data_mat = tf.reshape(data_mat, (image.depth, image.height, image.width)) #这里的维度顺序,是依据cifar二进制文件的格式而定的
    transposed_value = tf.transpose(data_mat, perm=[1, 2, 0]) #对data_mat的维度进行重新排列,返回值的第i个维度对应着data_mat的第perm[i]维
    image.mat = transposed_value
    return image

def get_batch_samples(img_obj, min_samples_in_queue, batch_size, shuffle_flag):
# tf.train.shuffle_batch()函数用于随机地shuffling 队列中的tensors来创建batches(也即每次可以读取多个data文件中的样例构成一个batch)。这个函数向当前Graph中添加了下列对象:
# *创建了一个shuffling queue,用于把‘tensors’中的tensors压入该队列;
# *一个dequeue_many操作,用于根据队列中的数据创建一个batch;
# *创建了一个QueueRunner对象,用于启动一个进程压数据到队列
# capacity参数用于控制shuffling queue的最大长度;min_after_dequeue参数表示进行一次dequeue操作后队列中元素的最小数量,可以用于确保batch中
# 元素的随机性;num_threads参数用于指定多少个threads负责压tensors到队列;enqueue_many参数用于表征是否tensors中的每一个tensor都代表一个样例
# tf.train.batch()与之类似,只不过顺序地出队列(也即每次只能从一个data文件中读取batch),少了随机性。

    if shuffle_flag == False:
        image_batch, label_batch = tf.train.batch(tensors=img_obj,
                                                  batch_size=batch_size,
                                                  num_threads=4,
                                                  capacity=min_samples_in_queue+3*batch_size)
    else:
        image_batch, label_batch = tf.train.shuffle_batch(tensors=img_obj,
                                                          batch_size=batch_size,
                                                          num_threads=4,
                                                          min_after_dequeue=min_samples_in_queue,
                                                          capacity=min_samples_in_queue+3*batch_size)
    tf.summary.image('input_image', image_batch) #输出预处理后图像的summary缓存对象,用于在session中写入到事件文件中//tf.summary.image('input_image', image_batch, max_images=6)
    return image_batch, tf.reshape(label_batch, shape=[batch_size])

def preprocess_input_data():
#这部分程序用于对训练数据集进行‘数据增强’操作,通过增加训练集的大小来防止过拟合'''
    filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in range(1, 6)]
    #filenames =[os.path.join(data_dir, 'test_batch.bin')]
    for f in filenames: #检验训练数据集文件是否存在
        if not tf.gfile.Exists(f):
            raise ValueError('fail to find file:'+f)
    filename_queue = tf.train.string_input_producer(string_tensor=filenames) # 把文件名输出到队列中,作为整个data pipe的第一阶段
    image = read_cifar10(filename_queue) #从文件名队列中读取一个tensor类型的图像
    new_img = tf.cast(image.mat, tf.float32)
    tf.summary.image('raw_input_image', tf.reshape(new_img, [1, 32, 32, 3]))#输出预处理前图像的summary缓存对象
    new_img = tf.random_crop(new_img, size=(fixed_height, fixed_width, 3)) #从原图像中切割出子图像
    new_img = tf.image.random_brightness(new_img, max_delta=63) #随机调节图像的亮度
    new_img = tf.image.random_flip_left_right(new_img) #随机地左右翻转图像
    new_img = tf.image.random_contrast(new_img, lower=0.2, upper=1.8) #随机地调整图像对比度
    final_img =tf.image.per_image_standardization(new_img) #对图像进行whiten操作,目的是降低输入图像的冗余性,尽量去除输入特征间的相关性

    min_samples_ratio_in_queue = 0.4  #用于确保读取到的batch中样例的随机性,使其覆盖到更多的类别、更多的数据文件!!!
    min_samples_in_queue = int(min_samples_ratio_in_queue*train_samples_per_epoch)
    return get_batch_samples([final_img, image.label], min_samples_in_queue, batch_size, shuffle_flag=True)

(二)模型本身定义部分( forward_prop.py

#输入层->卷积层->池化层->规范化层->卷积层->规范化层->池化层->全连接层->全连接层->softmax输出层
#coding:utf-8
import tensorflow as tf
import input_dataset
#外部引用input_dataset文件中定义的hyperparameters
height = input_dataset.fixed_height
width = input_dataset.fixed_width
train_samples_per_epoch = input_dataset.train_samples_per_epoch
test_samples_per_epoch = input_dataset.test_samples_per_epoch

# 用于描述训练过程的常数
moving_average_decay = 0.9999     # The decay to use for the moving average.
num_epochs_per_decay = 350.0      # 衰减呈阶梯函数,控制衰减周期(阶梯宽度)
learning_rate_decay_factor = 0.1  # 学习率衰减因子
initial_learning_rate = 0.1       # 初始学习率

def variable_on_cpu(name, shape, dtype, initializer):
   #with tf.device("/cpu:0"):  #一个 context manager,用于为新的op指定要使用的硬件
        return tf.get_variable(name=name,
                               shape=shape,
                               initializer=initializer,
                               dtype=dtype)

def variable_on_cpu_with_collection(name, shape, dtype, stddev, wd):
    #with tf.device("/cpu:0"):
        weight = tf.get_variable(name=name,
                                 shape=shape,
                                 initializer=tf.truncated_normal_initializer(stddev=stddev, dtype=dtype))
        if wd is not None:
            weight_decay = tf.multiply(tf.nn.l2_loss(weight), wd, name='weight_loss')
            tf.add_to_collection(name='losses', value=weight_decay)
        return weight

def losses_summary(total_loss):
#通过使用指数衰减,来维护变量的滑动均值。当训练模型时,维护训练参数的滑动均值是有好处的。在测试过程中使用滑动参数比最终训练的参数值本身,
    #会提高模型的实际性能(准确率)。apply()方法会添加trained variables的shadow copies,并添加操作来维护变量的滑动均值到shadow copies。average
    #方法可以访问shadow variables,在创建evaluation model时非常有用。
#滑动均值是通过指数衰减计算得到的。shadow variable的初始化值和trained variables相同,其更新公式为
# shadow_variable = decay * shadow_variable + (1 - decay) * variable
    average_op = tf.train.ExponentialMovingAverage(decay=0.9) #创建一个新的指数滑动均值对象
    losses = tf.get_collection(key='losses')# 从字典集合中返回关键字'losses'对应的所有变量,包括交叉熵损失和正则项损失
    # 创建‘shadow variables’,并添加维护滑动均值的操作
    maintain_averages_op = average_op.apply(losses+[total_loss])#维护变量的滑动均值,返回一个能够更新shadow variables的操作
    for i in losses+[total_loss]:
        tf.summary.scalar(i.op.name+'_raw', i) #保存变量到Summary缓存对象,以便写入到文件中
        tf.summary.scalar(i.op.name, average_op.average(i)) #average() returns the shadow variable for a given variable.
    return maintain_averages_op  #返回损失变量的更新操作

def one_step_train(total_loss, step):
    batch_count = int(train_samples_per_epoch/input_dataset.batch_size) #求训练块的个数
    decay_step = batch_count*num_epochs_per_decay #每经过decay_step步训练,衰减lr
    lr = tf.train.exponential_decay(learning_rate=initial_learning_rate,
                                    global_step=step,
                                    decay_steps=decay_step,
                                    decay_rate=learning_rate_decay_factor,
                                    staircase=True)
    tf.summary.scalar('learning_rate', lr)
    losses_movingaverage_op = losses_summary(total_loss)
    #tf.control_dependencies是一个context manager,控制节点执行顺序,先执行control_inputs中的操作,再执行context中的操作
    with tf.control_dependencies(control_inputs=[losses_movingaverage_op]):
        trainer = tf.train.GradientDescentOptimizer(learning_rate=lr)
        gradient_pairs = trainer.compute_gradients(loss=total_loss) #返回计算出的(gradient, variable) pairs
    gradient_update = trainer.apply_gradients(grads_and_vars=gradient_pairs, global_step=step) #返回一步梯度更新操作
    #num_updates参数用于动态调整衰减率,真实的decay_rate =min(decay, (1 + num_updates) / (10 + num_updates)
    variables_average_op = tf.train.ExponentialMovingAverage(decay=moving_average_decay, num_updates=step)
    # tf.trainable_variables() 方法返回所有`trainable=True`的变量,列表结构
    maintain_variable_average_op = variables_average_op.apply(var_list=tf.trainable_variables())#返回模型参数变量的滑动更新操作
    with tf.control_dependencies(control_inputs=[gradient_update, maintain_variable_average_op]):
        gradient_update_optimizor = tf.no_op() #Does nothing. Only useful as a placeholder for control edges
    return gradient_update_optimizor

def network(images):
    with tf.variable_scope(name_or_scope='conv1') as scope:
        weight = variable_on_cpu_with_collection(name='weight',
                                                 shape=(5, 5, 3, 64),
                                                 dtype=tf.float32,
                                                 stddev=0.05,
                                                 wd = 0.0)
        bias = variable_on_cpu(name='bias', shape=(64), dtype=tf.float32, initializer=tf.constant_initializer(value=0.0))
        conv1_in = tf.nn.conv2d(input=images, filter=weight, strides=(1, 1, 1, 1), padding='SAME')
        conv1_in = tf.nn.bias_add(value=conv1_in, bias=bias)
        conv1_out = tf.nn.relu(conv1_in)

    pool1 = tf.nn.max_pool(value=conv1_out, ksize=(1, 3, 3, 1), strides=(1, 2, 2, 1), padding='SAME')

    norm1 = tf.nn.lrn(input=pool1, depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75)

    with tf.variable_scope(name_or_scope='conv2') as scope:
        weight = variable_on_cpu_with_collection(name='weight',
                                 shape=(5, 5, 64, 64),
                                 dtype=tf.float32,
                                 stddev=0.05,
                                 wd=0.0)
        bias = variable_on_cpu(name='bias', shape=(64), dtype=tf.float32, initializer=tf.constant_initializer(value=0.1))
        conv2_in = tf.nn.conv2d(norm1, weight, strides=(1, 1, 1, 1), padding='SAME')
        conv2_in = tf.nn.bias_add(conv2_in, bias)
        conv2_out = tf.nn.relu(conv2_in)

    norm2 = tf.nn.lrn(input=conv2_out, depth_radius=4, bias=1.0, alpha=0.001/9.0, beta=0.75)

    pool2 = tf.nn.max_pool(value=norm2, ksize=(1, 3, 3, 1), strides=(1, 2, 2, 1), padding='SAME')
    # input tensor of shape `[batch, in_height, in_width, in_channels]
    reshaped_pool2 = tf.reshape(tensor=pool2, shape=(-1, 6*6*64))

    with tf.variable_scope(name_or_scope='fully_connected_layer1') as scope:
        weight = variable_on_cpu_with_collection(name='weight',
                                                 shape=(6*6*64, 384),
                                                 dtype=tf.float32,
                                                 stddev=0.04,
                                                 wd = 0.004)
        bias = variable_on_cpu(name='bias', shape=(384), dtype=tf.float32, initializer=tf.constant_initializer(value=0.1))
        fc1_in = tf.matmul(reshaped_pool2, weight)+bias
        fc1_out = tf.nn.relu(fc1_in)

    with tf.variable_scope(name_or_scope='fully_connected_layer2') as scope:
        weight = variable_on_cpu_with_collection(name='weight',
                                                 shape=(384, 192),
                                                 dtype=tf.float32,
                                                 stddev=0.04,
                                                 wd=0.004)
        bias = variable_on_cpu(name='bias', shape=(192), dtype=tf.float32, initializer=tf.constant_initializer(value=0.1))
        fc2_in = tf.matmul(fc1_out, weight)+bias
        fc2_out = tf.nn.relu(fc2_in)

    with tf.variable_scope(name_or_scope='softmax_layer') as scope:
        weight = variable_on_cpu_with_collection(name='weight',
                                                 shape=(192, 10),
                                                 dtype=tf.float32,
                                                 stddev=1/192,
                                                 wd=0.0)
        bias = variable_on_cpu(name='bias', shape=(10), dtype=tf.float32, initializer=tf.constant_initializer(value=0.0))
        classifier_in = tf.matmul(fc2_out, weight)+bias
        classifier_out = tf.nn.softmax(classifier_in)
    return classifier_out

def loss(logits, labels):
    labels = tf.cast(x=labels, dtype=tf.int32)  #强制类型转换,使符合sparse_softmax_cross_entropy_with_logits输入参数格式要求
    cross_entropy_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='likelihood_loss')
    cross_entropy_loss = tf.reduce_mean(cross_entropy_loss, name='cross_entropy_loss') #对batch_size长度的向量取平均
    tf.add_to_collection(name='losses', value=cross_entropy_loss) #把张量cross_entropy_loss添加到字典集合中key='losses'的子集中
    return tf.add_n(inputs=tf.get_collection(key='losses'), name='total_loss') #返回字典集合中key='losses'的子集中元素之和  

(三)模型训练部分( train.py)

#coding:utf-8
import input_dataset
import forward_prop
import tensorflow as tf
import os
import numpy as np

max_iter_num = 10000 #设置参数迭代次数
checkpoint_path = './checkpoint' #设置模型参数文件所在路径
event_log_path = './event-log' #设置事件文件所在路径,用于周期性存储Summary缓存对象

def train():
    with tf.Graph().as_default():    #指定当前图为默认graph
        global_step = tf.Variable(initial_value=0, trainable=False)#设置trainable=False,是因为防止训练过程中对global_step变量也进行滑动更新操作
        img_batch, label_batch = input_dataset.preprocess_input_data()#输入图像的预处理,包括亮度、对比度、图像翻转等操作
        # img_batch, label_batch = input_dataset.input_data(eval_flag=False)
        logits = forward_prop.network(img_batch) #图像信号的前向传播过程
        total_loss = forward_prop.loss(logits, label_batch) #计算损失
        one_step_gradient_update = forward_prop.one_step_train(total_loss, global_step) #返回一步梯度更新操作
        #创建一个saver对象,用于保存参数到文件中
        saver = tf.train.Saver(var_list=tf.all_variables()) #tf.all_variables return a list of `Variable` objects
        all_summary_obj =  tf.summary.merge_all()#返回所有summary对象先merge再serialize后的的字符串类型tensor
        initiate_variables = tf.initialize_all_variables()
        #log_device_placement参数可以记录每一个操作使用的设备,这里的操作比较多,就不需要记录了,故设置为False
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))
        sess.run(initiate_variables)  #变量初始化
        tf.train.start_queue_runners(sess=sess) #启动所有的queuerunners
        Event_writer = tf.summary.FileWriter(logdir=event_log_path, graph=sess.graph)
        for step in range(max_iter_num):
            _, loss_value = sess.run(fetches=[one_step_gradient_update, total_loss])
            assert not np.isnan(loss_value) #用于验证当前迭代计算出的loss_value是否合理
            if step%10 == 0:
                print('step %d, the loss_value is %.2f' % (step, loss_value))
            if step%100 == 0:
                # 添加`Summary`协议缓存到事件文件中,故不能写total_loss变量到事件文件中,因为这里的total_loss为普通的tensor类型
                all_summaries = sess.run(all_summary_obj)
                Event_writer.add_summary(summary=all_summaries, global_step=step)
            if step%1000 == 0 or (step+1)==max_iter_num:
                variables_save_path = os.path.join(checkpoint_path, './model-parameters.bin') #路径合并,返回合并后的字符串
                saver.save(sess,variables_save_path  , global_step=step)#把所有变量(包括moving average前后的模型参数)保存在variables_save_path路径下
if __name__ == '__main__':
    train()                  


(四)模型性能评估部分( evaluate.py

#coding:utf-8
import tensorflow as tf
import input_dataset
import forward_prop
import train
import math
import numpy as np

def eval_once(summary_op, summary_writer, saver, predict_true_or_false):
    with tf.Session() as sess:
#从checkpoint文件中返回checkpointstate模板
        checkpoint_proto = tf.train.get_checkpoint_state(checkpoint_dir=train.checkpoint_path)
        if checkpoint_proto and checkpoint_proto.model_checkpoint_path:
            saver.restore(sess, checkpoint_proto.model_checkpoint_path)#恢复模型变量到当前session中
        else:
            print('checkpoint file not found!')
            return
        # 启动很多线程,并把coordinator传递给每一个线程
        coord = tf.train.Coordinator() #返回一个coordinator类对象,这个类实现了一个简单的机制,可以用来coordinate很多线程的结束
        try:
            threads = [] #使用coord统一管理所有线程
            for queue_runner in tf.get_collection(key=tf.GraphKeys.QUEUE_RUNNERS):
                threads.extend(queue_runner.create_threads(sess, coord=coord, daemon=True, start=True))
#计算测试数据块的个数,并向上取整
            test_batch_num = math.ceil(input_dataset.test_samples_per_epoch/input_dataset.batch_size)
            iter_num = 0
            true_test_num = 0
#这里使用取整后的测试数据块个数,来计算测试样例的总数目,理论上这样算测试样例总数会偏大啊,暂时还未理解???
            total_test_num = test_batch_num*input_dataset.batch_size

            while iter_num<test_batch_num and not coord.should_stop():
                result_judge = sess.run([predict_true_or_false])
                true_test_num += np.sum(result_judge)
                iter_num += 1
            precision = true_test_num/total_test_num
            print("The test precision is %.3f"  % precision)
        except:
            coord.request_stop()
        coord.request_stop()
        coord.join(threads)

def evaluate():
    with tf.Graph().as_default() as g:
        img_batch, labels = input_dataset.input_data(eval_flag=True)#读入测试数据集
        logits = forward_prop.network(img_batch)#使用moving average操作前的模型参数,计算模型输出值
#判断targets是否在前k个predictions里面,当k=1时等价于常规的计算正确率的方法,sess.run(predict_true_or_false)会执行符号计算
        predict_true_or_false = tf.nn.in_top_k(predictions=logits, targets=labels, k=1)
        #恢复moving average操作后的模型参数
        moving_average_op = tf.train.ExponentialMovingAverage(decay=forward_prop.moving_average_decay)
#返回要恢复的names到Variables的映射,也即一个map映射。如果一个变量有moving average,就使用moving average变量名作为the restore
# name, 否则就使用变量名
        variables_to_restore = moving_average_op.variables_to_restore()
        saver = tf.train.Saver(var_list=variables_to_restore)

        summary_op = tf.merge_all_summaries() #创建序列化后的summary对象
#创建一个event file,用于之后写summary对象到logdir目录下的文件中
        summary_writer = tf.train.SummaryWriter(logdir='./event-log-test', graph=g)
        eval_once(summary_op, summary_writer, saver, predict_true_or_false)




猜你喜欢

转载自blog.csdn.net/qq_32841011/article/details/79058907