基于tensorflow的数字体识别

import numpy as np

import struct

import matplotlib.pyplot as plt

# 训练集文件

train_images_idx3_ubyte_file = 'minist_data/train-images.idx3-ubyte'

# 训练集标签文件

train_labels_idx1_ubyte_file = 'minist_data/train-labels.idx1-ubyte'

# 测试集文件

test_images_idx3_ubyte_file = 'minist_data/t10k-images.idx3-ubyte'

# 测试集标签文件

test_labels_idx1_ubyte_file = 'minist_data/t10k-labels.idx1-ubyte'

def decode_idx3_ubyte(idx3_ubyte_file):

    """

    解析idx3文件的通用函数

    :param idx3_ubyte_file: idx3文件路径

    :return: 数据集

    """

    # 读取二进制数据

    bin_data = open(idx3_ubyte_file, 'rb').read()

    # 解析文件头信息,依次为魔数、图片数量、每张图片高、每张图片宽

    offset = 0

    fmt_header = '>iiii' #因为数据结构中前4行的数据类型都是32位整型,所以采用i格式,但我们需要读取前4行数据,所以需要4i。我们后面会看到标签集中,只使用2ii

    magic_number, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, offset)

    # print('魔数:%d, 图片数量: %d, 图片大小: %d*%d' % (magic_number, num_images, num_rows, num_cols))

    # 解析数据集

    image_size = num_rows * num_cols

    offset += struct.calcsize(fmt_header)  #获得数据在缓存中的指针位置,从前面介绍的数据结构可以看出,读取了前4行之后,指针位置(即偏移位置offset)指向0016

    # print(offset)

    fmt_image = '>' + str(image_size) + 'B'  #图像数据像素值的类型为unsigned char型,对应的format格式为B。这里还有加上图像大小784,是为了读取784B格式数据,如果没有则只会读取一个值(即一副图像中的一个像素值)

    # print(fmt_image,offset,struct.calcsize(fmt_image))

    images = np.empty((num_images, num_rows, num_cols))

    #plt.figure()

    for i in range(num_images):

        # if (i + 1) % 10000 == 0:

        #     print('已解析 %d' % (i + 1) + '')

        #     print(offset)

        images[i] = np.array(struct.unpack_from(fmt_image, bin_data, offset)).reshape((num_rows, num_cols))

        #print(images[i])

        offset += struct.calcsize(fmt_image)

#        plt.imshow(images[i],'gray')

#        plt.pause(0.00001)

#        plt.show()

    #plt.show()

    return images

def decode_idx1_ubyte(idx1_ubyte_file):

    """

    解析idx1文件的通用函数

    :param idx1_ubyte_file: idx1文件路径

    :return: 数据集

    """

    # 读取二进制数据

    bin_data = open(idx1_ubyte_file, 'rb').read()

    # 解析文件头信息,依次为魔数和标签数

    offset = 0

    fmt_header = '>ii'

    magic_number, num_images = struct.unpack_from(fmt_header, bin_data, offset)

    # print('魔数:%d, 图片数量: %d' % (magic_number, num_images))

    # 解析数据集

    offset += struct.calcsize(fmt_header)

    fmt_image = '>B'

    labels = np.empty(num_images)

    for i in range(num_images):

        # if (i + 1) % 10000 == 0:

        #     # print ('已解析 %d' % (i + 1) + '')

        labels[i] = struct.unpack_from(fmt_image, bin_data, offset)[0]

        offset += struct.calcsize(fmt_image)

    return labels

def load_train_images(idx_ubyte_file=train_images_idx3_ubyte_file):

    """

    TRAINING SET IMAGE FILE (train-images-idx3-ubyte):

    [offset] [type]          [value]          [description]

    0000     32 bit integer  0x00000803(2051) magic number

    0004     32 bit integer  60000            number of images

    0008     32 bit integer  28               number of rows

    0012     32 bit integer  28               number of columns

    0016     unsigned byte   ??               pixel

    0017     unsigned byte   ??               pixel

    ........

    xxxx     unsigned byte   ??               pixel

    Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).

    :param idx_ubyte_file: idx文件路径

    :return: n*row*colnp.array对象,n为图片数量

    """

    return decode_idx3_ubyte(idx_ubyte_file)

def load_train_labels(idx_ubyte_file=train_labels_idx1_ubyte_file):

    """

    TRAINING SET LABEL FILE (train-labels-idx1-ubyte):

    [offset] [type]          [value]          [description]

    0000     32 bit integer  0x00000801(2049) magic number (MSB first)

    0004     32 bit integer  60000            number of items

    0008     unsigned byte   ??               label

    0009     unsigned byte   ??               label

    ........

    xxxx     unsigned byte   ??               label

    The labels values are 0 to 9.

    :param idx_ubyte_file: idx文件路径

    :return: n*1np.array对象,n为图片数量

    """

    return decode_idx1_ubyte(idx_ubyte_file)

def load_test_images(idx_ubyte_file=test_images_idx3_ubyte_file):

    """

    TEST SET IMAGE FILE (t10k-images-idx3-ubyte):

    [offset] [type]          [value]          [description]

    0000     32 bit integer  0x00000803(2051) magic number

    0004     32 bit integer  10000            number of images

    0008     32 bit integer  28               number of rows

    0012     32 bit integer  28               number of columns

    0016     unsigned byte   ??               pixel

    0017     unsigned byte   ??               pixel

    ........

    xxxx     unsigned byte   ??               pixel

    Pixels are organized row-wise. Pixel values are 0 to 255. 0 means background (white), 255 means foreground (black).

    :param idx_ubyte_file: idx文件路径

    :return: n*row*colnp.array对象,n为图片数量

    """

    return decode_idx3_ubyte(idx_ubyte_file)

def load_test_labels(idx_ubyte_file=test_labels_idx1_ubyte_file):

    """

    TEST SET LABEL FILE (t10k-labels-idx1-ubyte):

    [offset] [type]          [value]          [description]

    0000     32 bit integer  0x00000801(2049) magic number (MSB first)

    0004     32 bit integer  10000            number of items

    0008     unsigned byte   ??               label

    0009     unsigned byte   ??               label

    ........

    xxxx     unsigned byte   ??               label

    The labels values are 0 to 9.

    :param idx_ubyte_file: idx文件路径

    :return: n*1np.array对象,n为图片数量

    """

    return decode_idx1_ubyte(idx_ubyte_file)

def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):

    m = X.shape[0]                  #数据集的样本数

    mini_batches = []

    np.random.seed(seed)

    #step1:打乱数据

    # np.random.permutation函数传入的是数字时,返回的就是0~m-1的乱序结果,

    #传入数组的话返回就是外汇返佣的乱序

    permutation = list(np.random.permutation(m))

    shuffled_X = X[permutation,:,:,:]

    shuffled_Y = Y[permutation,:]

    # step2:根据batch大小尽心分割数据集

    num_complete_minibatches = math.floor(m/mini_batch_size) # batch的数量

    for k in range(0, num_complete_minibatches):

        mini_batch_X = shuffled_X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:,:,:]

        mini_batch_Y = shuffled_Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]

        mini_batch = (mini_batch_X, mini_batch_Y)

        mini_batches.append(mini_batch)

    #当数据集的大小不能整除batchsize时,剩下的数据就作为一个batch

    if m % mini_batch_size != 0:

        mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m,:,:,:]

        mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m,:]

        mini_batch = (mini_batch_X, mini_batch_Y)

        mini_batches.append(mini_batch)

    return mini_batches

    

def convert_to_one_hot(Y, C):

#1.np.eye(C)是生成CxC大小的单位矩阵

#2.Y.reshape(-1)Y转换为一维数组,长度为m

#3.np.eye(C)[Y.reshape(-1)]就是根据Y.reshape(-1)中的数字以此去取np.eye(C)单位矩阵中对应行,输出结果维度是mxC

    Y = np.eye(C)[Y.reshape(-1)]

    return Y

import numpy as np

import matplotlib.pyplot as plt

import tensorflow as tf

import time

import cv2

import minist_project.cnn_utils as cnn_utils

from minist_project.input_data import load_train_images, load_train_labels, load_test_images, load_test_labels

#创建placeholder,这是为了后面向网络中传入我们的训练数据

def create_placeholders(n_H0, n_W0, n_C0, n_y):

    X = tf.placeholder(tf.float32, [None, n_H0, n_W0, n_C0])

    Y = tf.placeholder(tf.float32, [None, n_y])

    return X, Y

#初始化卷积层的权重参数,全连接层的参数并不需要我们初始化,这一部分tensorflow会去做

def initialize_parameters():

    tf.set_random_seed(1)

#tf.get_variable第二个参数是我们创建变量的维度,[f,f,c,filters]: f表示卷积核的大小,c卷积核的通道数(跟上一层的通道数一样),filters是卷积核的数量。

#W1是第一层卷积层的权重,W2是第二层

    W1 = tf.get_variable("W1", [5,5,1,32], initializer=tf.contrib.layers.xavier_initializer(seed=0))

    W2 = tf.get_variable("W2", [5,5,32,64], initializer=tf.contrib.layers.xavier_initializer(seed=0))

    parameters = {"W1":W1, "W2":W2}

    return parameters

#前向传播

#LeNet5网络结构

def forward_propagation(X, parameters):

#根据我们前面的LeNet5网络的参数进行构建即可

    W1 = parameters["W1"]

    W2 = parameters["W2"]

    #conv1

    #tf.nn.conv2d的第一个参数是我们的输入,第二个参数是卷积核,即我们前面初始化的权重参数

    #第三个参数是步长,默认格式是[batch, height, width, channels],因为我们步长是定义在输入图像的宽高上面,只需要在宽高上面移动就好了,所以第一和第四个通常都为1

    Z1 = tf.nn.conv2d(input=X,filter=W1,strides=[1,1,1,1],padding="VALID")

    A1 = tf.nn.relu(Z1) #激活函数

    #pool1

    #参数定义和上面卷积的差不多

    P1 = tf.nn.max_pool(value=A1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

    #conv2

    Z2 = tf.nn.conv2d(input=P1,filter=W2,strides=[1,1,1,1],padding="VALID")

    A2 = tf.nn.relu(Z2)

    #pool2

    P2 = tf.nn.max_pool(value=A2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")

    #flatten

    P = tf.contrib.layers.flatten(P2)

    #fc1

    #全连接层,第一个是我们的输入,第二个参数是我们指定的该层的神经元数量,默认激活函数是relu

    f1 = tf.contrib.layers.fully_connected(P, 120)

    #fc2

    f2 = tf.contrib.layers.fully_connected(f1, 84)

    #fc3——输出层

    #这里是我们的输出层,注意在这里我们不需要激活函数,因为在计算代价的时候,已经包含了激活函数。

    Z = tf.contrib.layers.fully_connected(f2, 10, activation_fn=None)

    return Z

    

def compute_cost(Z3, Y):

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=Z3,labels=Y))

    return cost

def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001,num_epochs=150,minibatch_size=64,print_cost=True,isPlot=True):

    tf.reset_default_graph()

    tf.set_random_seed(1)

    seed = 3

    (m, n_H0, n_W0, n_C0) = X_train.shape #获取数据集的维度

    n_y = Y_train.shape[1]

    costs = []  #用于存放我们每次迭代的代价

    X, Y = create_placeholders(n_H0,n_W0,n_C0,n_y)

    parameters = initialize_parameters()

    Z5 = forward_propagation(X,parameters)

    cost = compute_cost(Z5, Y)

    #这里使用了Adam的优化器,Adam优化了我们minibatch梯度下降算法,使下降更快

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()  #创建saver用于保存训练后的模型

    total_time = 0  #记录每5次迭代的总时间

    with tf.Session() as sess:

        sess.run(init)

        for epoch in range(1,num_epochs+1):

            start_time = time.clock()

            minibatch_cost = 0

            num_minibatches = int(m / minibatch_size)

            seed = seed + 1

            #分割数据集

            minibatches = cnn_utils.random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in minibatches:

                (minibatch_X, minibatch_Y) = minibatch

                #将我们的每个batch的数据传入我们的网络中,然后进行梯度下降

                _, temp_cost = sess.run([optimizer, cost],feed_dict={X:minibatch_X, Y:minibatch_Y})

                minibatch_cost += temp_cost / num_minibatches

            end_time = time.clock()

            minium = end_time - start_time

            total_time += minium

            if print_cost:

                if epoch % 5 == 0:

                    print("当前是第 " + str(epoch) + " 代,成本值为:" + str(minibatch_cost) + " ; 每一个epoch花费时间:" + str(minium) + " 秒,10epoch总的时间:" + str(total_time))

                    total_time = 0

            if epoch % 5 == 0:

                costs.append(minibatch_cost)

#保存模型

        saver.save(sess, "model/model_LeNet5/minist-model")

        if isPlot:

            plt.plot(np.squeeze(costs))

            plt.ylabel("cost")

            plt.xlabel("iterations (per tens)")

            plt.title("Learning rate =" + str(learning_rate))

            plt.show()

#tf.argmax的第二个参数等于1表示我们输出Z中每行最大值的索引

        predict_op = tf.argmax(Z5, 1)

        #tf.equal比较两个矩阵或向量对应元素是否相等,相等就为True,不等就为False

        corrent_prediction = tf.equal(predict_op, tf.argmax(Y,1))

#tf.cast是进行数据转换,这里是bool型转为floatTrue就为1.0False就是0.0

#tf.reduce_mean就是求均值

        accuracy = tf.reduce_mean(tf.cast(corrent_prediction, "float"))

        print("corrent_prediction accuracy = "+str(accuracy))

#这里同样将我们的数据传入我们的tensor张量中

        train_accuracy = accuracy.eval({X:X_train,Y:Y_train})

        test_accuracy = accuracy.eval({X:X_test,Y:Y_test})

        print("训练集准确度:" + str(train_accuracy))

        print("测试及准确度:" + str(test_accuracy))

    return (train_accuracy, test_accuracy, parameters)

#加载数据集

train_x = load_train_images()

train_y = load_train_labels()

test_x = load_test_images()

test_y = load_test_labels()

#因为mnist数据集中每个图片是28x28,我们需要转换成28x28x1

train_x = train_x.reshape(train_x.shape[0],train_x.shape[1],train_x.shape[2], 1)

test_x = test_x.reshape(test_x.shape[0], test_x.shape[1], test_x.shape[2], 1)

#我们还需要把y中的元素转为int,原本是9.0这种float

train_y = train_y.reshape(len(train_y),1).astype(int)

test_y = test_y.reshape(len(test_y),1).astype(int)

train_y = cnn_utils.convert_to_one_hot(train_y,10)

test_y = cnn_utils.convert_to_one_hot(test_y, 10)

print("训练集x", train_x.shape)

print("训练集y", train_y.shape)

print("测试机x", test_x.shape)

print("测试机y", test_y.shape)

# 训练模型

tf.reset_default_graph()

np.random.seed(1)

_, _, parameters = model(train_x, train_y, test_x, test_y, num_epochs=30)

训练集x(60000, 28, 28, 1)

训练集y(60000, 10)

测试机x(10000, 28, 28, 1)

测试机y(10000, 10)

当前是第 5 代,成本值为:0.13700463391617798 ; 每一个epoch花费时间:2.6696909999999434 秒,10epoch总的时间:13.63106999999988

当前是第 10 代,成本值为:0.051648345611684646 ; 每一个epoch花费时间:2.728288999999961 秒,10epoch总的时间:13.54936499999991

当前是第 15 代,成本值为:0.02288695655724883 ; 每一个epoch花费时间:2.730446000000029 秒,10epoch总的时间:13.464165000000037

当前是第 20 代,成本值为:0.010089922937483614 ; 每一个epoch花费时间:2.6733050000000276 秒,10epoch总的时间:13.692039999999963

当前是第 25 代,成本值为:0.0053147655032918335 ; 每一个epoch花费时间:2.714560000000006 秒,10epoch总的时间:13.653553000000045

当前是第 30 代,成本值为:0.004936797035458563 ; 每一个epoch花费时间:2.7167780000000334 秒,10epoch总的时间:13.743517000000054

训练集准确度:0.99885

测试及准确度:0.9837

原文链接:https://blog.csdn.net/CarryLvan/article/details/103693489

猜你喜欢

转载自www.cnblogs.com/benming/p/12106954.html