Kaggle手写数字识别 -- CNN

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_35865125/article/details/88628294

项目:https://www.kaggle.com/c/digit-recognizer

方法: 卷积神经网络CNN

工具: tensorFlow;


代码:

导入工具包:

import tensorflow as tf
import pandas as pd
import numpy as np
import pylab  
import matplotlib.pyplot as plt
import matplotlib.cm as cm

读入数据集并处理:

# read training data from CSV file 
data = pd.read_csv('C:/Users/FrankFang/Desktop/Kaggle/DigitRecognizer/train.csv')
print('data({0[0]},{0[1]})'.format(data.shape))#(42000,785),自动去掉了第一行的说明
print (data.head())#默认输出矩阵的前5行


images = data.iloc[:,1:].values#去掉第一列的标签;  iloc——通过行号索引行数据
images = images.astype(np.float)#像素值转换为float型

# convert from [0:255] => [0.0:1.0]
images = np.multiply(images, 1.0 / 255.0)
print('images({0[0]},{0[1]})'.format(images.shape))#for each pictuere:s28 * 28px




image_size = images.shape[1]#返回列数,,shape[0]返回行数
print ('image_size => {0}'.format(image_size))



image_width = image_height = np.ceil(np.sqrt(image_size)).astype(np.uint8)
print ('image_width => {0}\nimage_height => {1}'.format(image_width,image_height))



labels_flat = data[[0]].values.ravel()#a.ravel([order]) #Return a flattened array.

print('labels_flat({0})'.format(len(labels_flat)))
print ('labels_flat[{0}] => {1}'.format(10,labels_flat[10]))
print(labels_flat[1:3])


labels_count = np.unique(labels_flat).shape[0]#label的种类数目
print('labels_count => {0}'.format(labels_count))

convert class labels from scalars to one-hot vectors

# convert class labels from scalars to one-hot vectors
# 0 => [1 0 0 0 0 0 0 0 0 0]
# 1 => [0 1 0 0 0 0 0 0 0 0]
# ...
# 9 => [0 0 0 0 0 0 0 0 0 1]
def dense_to_one_hot(labels_dense, num_classes):
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    #print(np.arange(num_labels)* num_classes)
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    #print(labels_dense.ravel())
    return labels_one_hot

labels = dense_to_one_hot(labels_flat, labels_count)
labels = labels.astype(np.float16)

print('labels({0[0]},{0[1]})'.format(labels.shape))
print ('labels[{0}] => {1}'.format(10,labels[10]))
print (labels[0:3])

定义权重初始化,偏置初始化函数:

# weight initialization。定义权重矩阵
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

#定义偏置变量
def bias_variable(shape): 
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

#卷积层
def conv2d(x,W):
    #x is a input tensor, with shape [batch, in_height, in_width, in_channels],
    #W is filter,shape:[filter_height, filter_width, in_channels, out_channels]
    #Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
    #strides[1]:horizontal strides , strides[2]:vertices strides
    # padding: A `string` from: `"SAME", "VALID"`.
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding="SAME")

#池化层
def max_pool_2X2(x):
    #x: 同上
    #ksize=[1,2,2,1],第一个和最后一个必须为1,中间两个表示窗口大小为2*2
    #strides同上
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")
#定义placeholder
# images
x = tf.placeholder('float', shape=[None, image_size])#行数先预留出来,对应的是样本的个数
# labels
y = tf.placeholder('float', shape=[None, labels_count])#行数先预留出来,对应的是样本的个数

#改变x的格式转变为4D的向量[batch, in_height, in_width, in_channels] 以方便使用卷积函数
x_image=tf.reshape(x,[-1,28,28,1])


#通过调用上面定义的函数,初始化第一个卷积层的权值和偏置
 
W_conv1 = weight_variable([5,5,1,32])#5*5的filter,channel 为1,该层总共使用了32 filters
b_conv1 = bias_variable([32])#每一个filter有一个bias
#print(W_conv1.shape)

#把x_imapge与权值进行卷积,再加上偏置,最后应用于Relu激活函数
conv2d_1 = conv2d(x_image,W_conv1) + b_conv1
#print(conv2d_1.shape)
h_conv1 = tf.nn.relu(conv2d_1)#print (h_conv1.get_shape()) # => (40000, 28, 28, 32)
#print(h_conv1.shape)
h_pool1 = max_pool_2X2(h_conv1)#进行maxpooling #print (h_pool1.get_shape()) # => (40000, 14, 14, 32)
#print(h_pool1.shape)

#初始化第2个卷积层的权值和偏置
W_conv2=weight_variable([5,5,32,64])#5*5的filter,channel为32,该层总共有64 filters
b_conv2=bias_variable([64])#每一个filter有一个bias
  
#把上一层的输出h_pool1与第二层的权值进行卷积,再加上偏置,最后应用于Relu激活函数。
conv2d_2 = conv2d(h_pool1,W_conv2) + b_conv2
h_conv2=tf.nn.relu(conv2d_2)
h_pool2=max_pool_2X2(h_conv2)#进行maxpooling
#print(h_pool2.shape)
#28*28的图片第一次卷积之后还是28*28,第一次池化之后为14*14
#第二次卷积后为14*14,第二次池化之后为7*7
#以上操作后,得到64张7*7d的图片

###########下面是全连接层#############

#初始化第一个全连接层的权值和偏置
W_fc1 = weight_variable([7*7*64,1024])#全连接层的上一层有7*7*64个节点,该全连接层有1024个节点
b_fc1 = bias_variable([1024])
        
#将池化层2的输出扁平化为一维
h_poo2_flat = tf.reshape(h_pool2,[-1,7*7*64])#-1代表任何值,这里用作批次的位置
#求第一个全连接层的输出
h_fc1 = tf.nn.relu(tf.matmul(h_poo2_flat,W_fc1) + b_fc1)

#keep_prob用来表示神经元的输出概率
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1,keep_prob)#丢弃第一个全连接层的部分节点

#初始化第2个全连接层的权值
W_fc2 = weight_variable([1024,labels_count])#
b_fc2 = bias_variable([labels_count])

#计算输出,第2个全连接层的输出作为最后的预测值(需要经过softmax层进行处理)
prediction = tf.nn.softmax(tf.matmul(h_fc1_drop,W_fc2) + b_fc2)#最后输出的prediction的行数维度,由每次向placeholder中放了几个samples决定的
#print(prediction.shape) #(?, 10)

#交叉熵代价函数:
#cross_entropy=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y,logits=prediction))#成本函数:交叉熵代价函数
cross_entropy = -tf.reduce_sum(y*tf.log(prediction))# matrix a*b,对应位置的元素相乘,a b需要有相同的维度;reduce_sum用于对矩阵内所有元素求和
#print(tf.log(prediction).shape)  #(?, 10)
#print((y*tf.log(prediction)).shape) #(?, 10)
#tf.summary.scalar("cross_entropy",cross_entropy)

#使用adam优化器来最小化loss,学习率是1e-4
train_step = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

#结果存放在一个布尔型列表中,值为1表示预测正确,值为0表示预测错误
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(prediction,1))#argmax返回一维张量中最大的值所在的位置
#求准确率,将上面的布尔列表中的所有元素相加,然后除以元素的总个数即可;
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))#cast是进行数据格式转换,把布尔型转为float32类型
#tf.summary.scalar("accuracy",accuracy)      
prediction_labels = tf.argmax(prediction,1)


num_examples = images.shape[0]
#随机排列所有的training set:
perm = np.arange(num_examples)
np.random.shuffle(perm)
images=images[perm]#随机打乱顺序
labels=labels[perm]

num_examples_usedfor_training = 40000# traning set中一共有42000个samples,取40000个作为训练样本
images_uesefor_training = images[0:num_examples_usedfor_training]
labels_uesefor_training = labels[0:num_examples_usedfor_training]
index_in_epoch = 0

def next_batch(batch_size):#batch_size:需要返回几个图片#最好再加一个将原来的顺序打乱的步骤
    global images_uesefor_training
    global labels_uesefor_training
    global index_in_epoch
    if index_in_epoch >= (num_examples_usedfor_training-1):
        index_in_epoch = 0
        perm = np.arange(num_examples_usedfor_training)
        np.random.shuffle(perm)
        images_uesefor_training=images_uesefor_training[perm]#随机打乱顺序
        labels_uesefor_training=labels_uesefor_training[perm]
        
    start = index_in_epoch
    index_in_epoch += (batch_size)
    end = index_in_epoch
    return images_uesefor_training[start:end], labels_uesefor_training[start:end] #注意:images[0:3]对应的是images[0] images[1] images[2]
 

运行tensorflow session开始训练:

# start TensorFlow session
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)


# visualisation variables
train_accuracies = []
validation_accuracies = []
cost_values=[]
x_range = []

#writer=tf.summary.FileWriter("logs/",sess.graph)

#Ideally, we should use all data for every step of the training, but that's expensive. So, instead, we use small "batches" of  data.
BATCH_SIZE=50
index_in_epoch = 0
for i in range(2500): #training set中共有42000个sample,前num_examples_usedfor_training(40000)个作为training,
                      #后2000个用作validation set检测自己的训练效果
    #获得一个batch的数据和标签
    batch_xs,batch_ys =  next_batch(BATCH_SIZE)
    #print('batch_xs({0[0]},{0[1]})'.format(batch_xs.shape))
    #print('batch_ys({0[0]},{0[1]})'.format(batch_ys.shape))
    #print(np.mean(batch_xs[0,1:700]))
    if (i%20) == 0: #每隔10次输出一次 当前模型对于即将参加训练的下一个batch的准确率 以及 对validation set中第一个batch的准确率
        #acc,cost = sess.run([accuracy,cross_entropy],feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
        train_acc = accuracy.eval(feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
        validation_acc = accuracy.eval(feed_dict={x:images[num_examples_usedfor_training:num_examples_usedfor_training+BATCH_SIZE],y:labels[num_examples_usedfor_training:num_examples_usedfor_training+BATCH_SIZE],keep_prob:1.0})
        cost = cross_entropy.eval(feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
        
        train_accuracies.append(train_acc)
        validation_accuracies.append(validation_acc)
        cost_values.append(cost)
        x_range.append(i)
        
        print("Iter= " + str(i) + ",Current training batech's Accuracy= " + str(train_acc) + ",cost= " + str(cost))
        
    #通过feed喂到模型中进行训练
    sess.run(train_step,feed_dict={x:batch_xs,y:batch_ys,keep_prob:0.4})    
    
    #summary=sess.run(merged,feed_dict={x:batch_xs,y:batch_ys,keep_prob:1.0})
    #writer.add_summary(summary,i) 
    #print(predic)
plt.plot(x_range, train_accuracies,'-b', label='Training')
plt.plot(x_range, validation_accuracies,'-r', label='Validation')
plt.legend(loc='lower right', frameon=False)
plt.ylim(ymax = 1.1, ymin = 0.0)
plt.ylabel('accuracy')
plt.xlabel('step')
plt.show()

#训练结束,用trainning set中的后2000个检测自己的训练效果,每次测试一批,一起测试会卡死
validation_accuracies_2 = []
#index_in_epoch=40000
for i in range(40):#2000/50=40
    #batch_xs,batch_ys =  next_batch(BATCH_SIZE)
    batch_xs=images[i*BATCH_SIZE + num_examples_usedfor_training : (i+1)*BATCH_SIZE + num_examples_usedfor_training]
    batch_ys=labels[i*BATCH_SIZE + num_examples_usedfor_training : (i+1)*BATCH_SIZE + num_examples_usedfor_training]
    validation_acc = accuracy.eval(feed_dict={x:batch_xs ,y:batch_ys,keep_prob:1.0})
    validation_accuracies_2.append(validation_acc)
print( "validation training_set Accuracy= " + str(sum(validation_accuracies_2)/float(len(validation_accuracies_2))) )    

#读取test 数据集,并利用训练好的数据做预测

# read test data from CSV file !!!!!必须加.values 否则图片不能显示
image_test = pd.read_csv('C:/Users/FrankFang/Desktop/Kaggle/DigitRecognizer/test.csv').values#data_test(28000,784)
print('image_test({0[0]},{0[1]})'.format(image_test.shape))
#print (image_test.head())#默认输出矩阵的前5行
data_test = image_test.astype(np.float)

# convert from [0:255] => [0.0:1.0]
data_test = np.multiply(data_test, 1.0 / 255.0)

predicted_lables = np.zeros(data_test.shape[0])
for i in range(560):#28000/50=560
    predicted_lables[i*BATCH_SIZE : (i+1)*BATCH_SIZE] = prediction_labels.eval(feed_dict={x: data_test[i*BATCH_SIZE : (i+1)*BATCH_SIZE], 
                                                                              keep_prob: 1.0})
print('predicted_lables({0})'.format(len(predicted_lables)))

# output test image and prediction
display(image_test[5])#display(images[10])#显示第10个样本图片

print ( "predicted_lables[5] ="+str(predicted_lables[5] ) )

# save results
np.savetxt('submission_softmax.csv', 
           np.c_[range(1,len(data_test)+1),predicted_lables], 
           delimiter=',', 
           header = 'ImageId,Label', 
           comments = '', 
           fmt='%d')
sess.close()

猜你喜欢

转载自blog.csdn.net/qq_35865125/article/details/88628294