《实战Google深度学习框架》第七章一个图像预处理样例

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt 

#给定一张图像，随机调整图像的色彩。因为调整亮度、对比度、饱和度和色相的顺序会影响最后得到的结果，所以可以定义不
#同的顺序。具体使用哪一种可以在训练数据预处理时随机指定，进一步降低无关因素对模型的影响。
def distort_color(image,color_ordering=0):
    if color_ordering == 0:
        image = tf.image.random_brightness(image,max_delta=0.5)#亮度
        image = tf.image.random_saturation(image,lower=0.5,upper=1.5)#饱和度
        image = tf.image.random_hue(image,max_delta=0.2)#色相
        image = tf.image.random_contrast(image,lower=0.5,upper=1.5)#对比度
    elif color_ordering == 1:
        image = tf.image.random_saturation(image,lower=0.5,upper=1.5)
        image = tf.image.random_contrast(image,lower=0.5,upper=1.5)
        image = tf.image.random_hue(image,max_delta=0.2)
        image = tf.image.random_brightness(image,max_delta=0.5)
        #还可以定义其他的顺序排列，此处不再一一指出。
    #tf.clip_by_value基于定义的min与max对tesor数据进行截断操作，目的是为了应对梯度爆发或者梯度消失的情况    
    return tf.clip_by_value(image,0.0,1.0)

#给定一张解码后图像、目标图像尺寸以及图像上的标注框，这个函数可以对给出的图像进行预处理。这个函数的输入图像
#是图像识别问题中原始的训练图像，输出是神经网络模型的输入层。注意这里只处理模型的训练数据，对于预测的数据，一般
#不需要随机变换的步骤。       
def preprocess_for_train(image,height,width,bbox):
    #如果没有图像标注框，则认为整个图像是需要关注的部分。
    if bbox is None:
        bbox = tf.constant([0.0,0.0,1.0,1.0],dtype=tf.float32,shape=[1,1,4])
    #转换图像张量的类型为浮点型。        
    if image.dtype != tf.float32:
        image = tf.image.convert_image_dtype(image,dtype=tf.float32)
    #随机截取图像，减少需要关注的物体大小对图像识别算法的影响
    #此函数为图像生成单个随机变形的边界框。函数输出的是可用于裁剪原始图像的单个边框。返回值为3个张量：begin，
    #size和 bboxes。前2个张量用于 tf.slice 剪裁图像。后者可以用于 tf.image.draw_bounding_boxes 函数来画出边界框。    
    bbox_begin,bbox_size,_ = tf.image.sample_distorted_bounding_box(tf.shape(image),bounding_boxes=bbox)
    #slice(input_, begin, size, name=None),“input_”是你输入的tensor，就是被切的那个,“begin”是每一个维度的
    #起始位置,“size”相当于问每个维度拿几个元素出来。
    distorted_image = tf.slice(image,bbox_begin,bbox_size)
    #将随机截取的图像调整为神经网络输入层大小，大小调整的算法是随机选择的。
    distorted_image = tf.image.resize_images(distorted_image,(height,width),method=np.random.randint(4))
    #随机左右翻转图像
    distorted_image = tf.image.random_flip_left_right(distorted_image)
    #使用一种随机的顺序调整图像色彩
    distorted_image = distort_color(distorted_image,np.random.randint(2))    
    return distorted_image

#实现对图片的读取,(‘r’:UTF-8编码; ‘rb’:非UTF-8编码)
image_raw_data = tf.gfile.FastGFile(r'F:\学校事务\论文资料\tensorflow\timg.jpg','rb').read()


with tf.Session() as sess:
    #将图像使用jpeg的格式解码从而得到图相对应的三维矩阵。TensorFlow还提供了tf.image.decode_png
    #函数对png格式的图像进行解码。解码后结果为一个张量，在使用它的取值之前需要明确调用运行
    #的过程。
    img_data = tf.image.decode_png(image_raw_data)
#    plt.imshow(img_data.eval())
#    plt.show()
    boxes = tf.constant([[[0.05,0.05,0.9,0.7],[0.35,0.47,0.5,0.56]]])
    #运行6次获得6种不同的图像
    for i in range(6):
        #将图像尺寸调整为599*599
        result = preprocess_for_train(img_data,599,599,boxes)
        
        plt.imshow(result.eval())
        plt.show()
输入图像为：
在这里插入图片描述
输入的6种图像为：
《实战Google深度学习框架》第七章 一个图像预处理样例

猜你喜欢

《实战Google深度学习框架》第七章一个图像预处理样例