tensorflow实现两层神经网络 (附代码)

不得不说,本人真是属于比较懒的那种,特别是在写文章这块,好多次想写,但是。。一想到要写一堆字就满心郁闷。

下面是正题,这篇文章将会贴出两份用tensorflow实现两层神经网络的代码,一份纯手写,一份是从腾讯云里提供的代码,复制下来,然后改动了一下而成的。贴出代码有两个目的:

(1)自己找代码实在找的好辛苦啊,贴出这两份,希望能减少别人的一些弯路。。

(2)我这两份代码使用的参数可以说是完全一样了,但是出现两个问题

         (1)首先预测的准确率就差很多,腾讯云那份代码能达到0.9的准确率(嗯。。说来惭愧,本来用它代码里的那个数据,能达到0.94的准确率,然后我改了改,将输入改成本地的数据,判断准确率的函数换成了自己的,然后两层迭代也改成了一层迭代,一些迭代次数这种小参数也改了一下,嗯。。准确率就降到0.89左右了,这个下降的原因还得思考一下),而我自己写的代码,准确率最高能达到0.79左右?这个还得看运气。。有时候0.71什么的都不奇怪。。

ps:哈哈,更正更正,我自己的也是能达到0.87左右的,哈哈,嗯。。看来这个果然看运气。。。为什么。。

哈哈,图为证!!

         (2)一开始得到的loss也完全不一样,腾讯云的那份一开始的loss都是以几千计的。。(比如什么5000多),然后loss下降的速度也很快,最后经过100轮能达到5,60左右,而我的那份就不一样了。。loss都是从2.3左右这样的数字开始的,最后能降到0.8,0.9左右。。

所以我就很不明白,数据集一样,参数一样,为什么出来的结果完全不一样。。判断准确率的函数也是一样的,问题先丢在这,等以后理解深一点了,再回来看看。

最后还有一点,有一段时间,我发现我写的代码,loss完全没法更新,就是更新来更新去都在2.3左右,然后发现这个好像跟w的起始权重也有关系

如果w的初始设成上面的那种就可以更新,下面的就不可以,但是腾讯云里面的w就是下面的那种,这个还得看一下函数

下面不多说废话,第一份是我自己的,哈哈

import tensorflow as tf
import numpy as np
import pickle
import gzip
# 首先定义输入

def get_input():
    f = gzip.open('./mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
    train_x = [np.reshape(x,(1,784)) for x in training_data[0]]
    train_y = [vectorized(y) for y in training_data[1]]
    test_x = [np.reshape(x,(1,784)) for x in test_data[0]]
    test_y = [vectorized(y) for y in test_data[1]]
    return train_x,train_y,test_x,test_y

def get_batch(train_x,train_y,batch_size):
    a = np.arange(len(train_x))
    np.random.shuffle(a)
    t_x = np.reshape(train_x,(len(train_x),784))
    t_y = np.reshape(train_y,(len(train_y),10))
    t_xi = [t_x[a[i]] for i in range(batch_size)]
    t_yi = [t_y[a[i]] for i in range(batch_size)]
    return t_xi,t_yi

def vectorized(num):
    e = np.zeros((10, 1))
    e[num] = 1
    return e

def predict(test_x,test_y,X,Y,batch_size):
    # 下面要预测,计算准确率,
    data_num = len(test_x)
    print("this is test data_num:",data_num)
    count=0
    print("this is predict")
    for data_index in range(0,data_num,batch_size):
        t_x = np.reshape(test_x[data_index:data_index+batch_size],(batch_size,784))
        # t_x = t_x.T
        t_y = np.reshape(test_y[data_index:data_index+batch_size],(batch_size,10))
        
        a = sess.run(Y,feed_dict = {X:t_x})
        # print(a.shape)
        for i in range(len(a)):
            if return_big(t_y[i]) == return_big(a[i]):
                count+=1
    print("this is count:",count)
    print("this is precise:",count/data_num)

def return_big(arr):
    # 这里实现相同作用的函数是tf.argmax
    # 用于返回一维数组中最大值的索引
    t = arr[0]
    t_flag = 0
    for i in range(1,len(arr)):
        if arr[i]> t:
            t = arr[i]
            t_flag = i
    return t_flag

if __name__ == "__main__":
    batch_size = 100
    # 隐藏层神经元的数目
    hidden_layer1 = 500
    hidden_layer2 = 500
    #学习率
    learning_rate = 0.1
    #迭代的次数
    epoch_num = 150
    x = tf.placeholder(tf.float32,[batch_size,784])
    y_pred = tf.placeholder(tf.float32,[batch_size,10])
    # y_pred用来装分类的结果,即labels

    w1 = tf.Variable(tf.truncated_normal([784,hidden_layer1],stddev = 0.1))
    # w1 = tf.Variable(tf.random_normal([784,hidden_layer1]))
    b1 = tf.Variable(tf.zeros([1,hidden_layer1])+0.01)
    w2 = tf.Variable(tf.truncated_normal([hidden_layer1,hidden_layer2],stddev = 0.1))
    # w2 = tf.Variable(tf.random_normal([hidden_layer1,hidden_layer2]))
    b2 = tf.Variable(tf.zeros([1,hidden_layer2])+0.01)
    w3 = tf.Variable(tf.truncated_normal([hidden_layer2,10],stddev = 0.1))
    # w3 = tf.Variable(tf.random_normal([hidden_layer2,10]))
    b3 = tf.Variable(tf.zeros([1,10])+0.01)

    h1 = tf.nn.relu(tf.matmul(x,w1)+b1)
    h2 = tf.nn.relu(tf.matmul(h1,w2)+b2)
    y = tf.nn.softmax(tf.matmul(h2,w3)+b3)

    #上面是前向传播的过程,下面是反向传播
    entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_pred, logits=y)
    loss = tf.reduce_mean(entropy)
    optimize = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)
    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)
        train_x,train_y,test_x,test_y = get_input()
        print(len(train_x))
        print(len(train_y))
        for i in range(epoch_num):
            t_x,t_y = get_batch(train_x,train_y,batch_size)
            _,_loss = sess.run([optimize,loss],feed_dict = {x:t_x,y_pred:t_y})
            print("loss of epoches[{0}]:{1}".format(i,_loss))

        # 下面是计算预测准确率
        predict(test_x,test_y,x,y,batch_size)

下面是腾讯云的,但是是我改动过了的,嗯。。原版。。被我覆盖掉了,想要的就到腾讯云找吧

#-*- encoding:utf-8 -*-
#!/usr/local/env python

import numpy as np
import tensorflow as tf
import pickle
import gzip
# from tensorflow.examples.tutorials.mnist import input_data

def add_layer(inputs, in_size, out_size, activation_function=None):
    W = tf.Variable(tf.random_normal([in_size, out_size]))
    b = tf.Variable(tf.zeros([1, out_size]) + 0.01)

    Z = tf.matmul(inputs, W) + b
    if activation_function is None:
        outputs = Z
    else:
        outputs = activation_function(Z)

    return outputs

def get_batch(train_x,train_y,batch_size):
    a = np.arange(len(train_x))
    np.random.shuffle(a)
    t_x = np.reshape(train_x,(len(train_x),784))
    t_y = np.reshape(train_y,(len(train_y),10))
    t_xi = [t_x[a[i]] for i in range(batch_size)]
    t_yi = [t_y[a[i]] for i in range(batch_size)]
    return t_xi,t_yi

def get_input():
    f = gzip.open('./mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f,encoding='bytes')
    train_x = [np.reshape(x,(784,1)) for x in training_data[0]]
    train_y = [vectorized(y) for y in training_data[1]]
    test_x = [np.reshape(x,(784,1)) for x in test_data[0]]
    test_y = [vectorized(y) for y in test_data[1]]
    return train_x,train_y,test_x,test_y

def vectorized(num):
    e = np.zeros((10, 1))
    e[num] = 1
    return e

def return_big(arr):
    # 这里实现相同作用的函数是tf.argmax
    # 用于返回一维数组中最大值的索引
    t = arr[0]
    t_flag = 0
    for i in range(1,len(arr)):
        if arr[i]> t:
            t = arr[i]
            t_flag = i
    return t_flag

def predict(test_x,test_y,X,Y,batch_size):
    # 下面要预测,计算准确率,
    data_num = len(test_x)
    print("this is test data_num:",data_num)
    count=0
    print("this is predict")
    for data_index in range(0,data_num,batch_size):
        t_x = np.reshape(test_x[data_index:data_index+batch_size],(batch_size,784))
        # t_x = t_x.T
        t_y = np.reshape(test_y[data_index:data_index+batch_size],(batch_size,10))
        
        a = sess.run(Y,feed_dict = {X:t_x})
        # print(a.shape)
        for i in range(len(a)):
            if return_big(t_y[i]) == return_big(a[i]):
                count+=1
    print("this is count:",count)
    print("this is precise:",count/data_num)

if __name__ == "__main__":

    # MNIST = input_data.read_data_sets("mnist", one_hot=True)
    # print(MNIST)
    learning_rate = 0.01
    batch_size = 100
    n_epochs = 100

    X = tf.placeholder(tf.float32, [batch_size, 784])
    Y = tf.placeholder(tf.float32, [batch_size, 10])

    layer_dims = [784, 500, 500, 10]
    layer_count = len(layer_dims)-1 # 不算输入层
    layer_iter = X

    for l in range(1, layer_count): # layer [1,layer_count-1] is hidden layer
        layer_iter = add_layer(layer_iter, layer_dims[l-1], layer_dims[l], activation_function=tf.nn.relu)
    prediction = add_layer(layer_iter, layer_dims[layer_count-1], layer_dims[layer_count], activation_function=None)

    entropy = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=prediction)
    loss = tf.reduce_mean(entropy)

    optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss)

    init = tf.global_variables_initializer()

    with tf.Session() as sess:
        sess.run(init)

        # n_batches = int(MNIST.test.num_examples/batch_size)
        # print("this is n_batches:",n_batches)
        # print("this is n_epochs:",n_epochs)
        train_x,train_y,test_x,test_y = get_input()
        print("this is train_x len:",len(train_x))
        for i in range(n_epochs):
            print("this is ",i," epochs!!")
            # for j in range(n_batches):
                # X_batch, Y_batch = MNIST.train.next_batch(batch_size)
            X_batch, Y_batch = get_batch(train_x,train_y,batch_size)
                # print("this is x_batch shape:",len(X_batch))
                # print("this is X_batch shape:",X_batch.shape)
                # print("this is Y_batch shape:",Y_batch.shape)
            _, loss_ = sess.run([optimizer, loss], feed_dict={X: X_batch, Y: Y_batch})
                # if i % 10 == 5 and j == 0:
            print("Loss of epochs[{0}]: {1}".format(i, loss_))

        # test the model
        # n_batches = int(MNIST.test.num_examples/batch_size)
        # total_correct_preds = 0
        # for i in range(n_batches):
        # X_batch, Y_batch = MNIST.test.next_batch(10000)
        # predict(X_batch,Y_batch,X,prediction,batch_size)
        predict(test_x,test_y,X,prediction,batch_size)
            # preds = sess.run(prediction, feed_dict={X: X_batch, Y: Y_batch})
            # correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y_batch, 1))
            # accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32)) 

            # total_correct_preds += sess.run(accuracy)

        # print("Accuracy {0}".format(total_correct_preds/MNIST.test.num_examples))

猜你喜欢

转载自blog.csdn.net/yolan6824/article/details/81637692