基于MXnet进行手写数字识别

#载入库
import numpy as np
import os
import gzip
import struct
import logging
import mxnet as mx
import matplotlib.pyplot as plt

#读入数据并设置迭代器：
#注意MXNet中图像的数组格式是[样本，通道，宽，高]。对于这里的灰度图像，通道数为1.
#数据的归一化很重要。建议数据的绝对值不超过1，有助于提高梯度的稳定性。

logging.getLogger().setLevel(logging.DEBUG) #打开调试信息的显示

def read_data(label_url, image_url): #读入训练数据
with gzip.open(label_url) as flbl: #打开标签文件
magic, num = struct.unpack(">II",flbl.read(8)) #读入标签文件头
label = np.fromstring(flbl.read(), dtype=np.int8) #读入标签内容
with gzip.open(image_url, 'rb') as fimg:#打开图像文件
magic, num, rows,cols = struct.unpack(">IIII",fimg.read(16)) #读入图像文件头，rows和cols都会是28
image = np.fromstring(fimg.read(), dtype=np.uint8) #读入图像内容
image = image.reshape(len(label), 1, rows, cols) #设置正确的数组格式
image = image.astype(np.float32)/255.0 #归一化0到1区间
return(label,image)

#读入数据
(train_lbl, train_img) = read_data('F:\\package\\train-labels-idx1-ubyte.gz', 'F:\\package\\train-images-idx3-ubyte.gz')
(val_lbl, val_img) = read_data('F:\\package\\t10k-labels-idx1-ubyte.gz', 'F:\\package\\t10k-images-idx3-ubyte.gz')

batch_size = 32 #批大小

#迭代器
train_iter = mx.io.NDArrayIter(train_img, train_lbl, batch_size, shuffle=True)
val_iter = mx.io.NDArrayIter(val_img, val_lbl, batch_size)

#显示其中的数据样例：
for i in range(10): #输出前10个数字
plt.subplot(1,10,i+1) #这里的语句可参见matplotlib库的介绍
plt.imshow(train_img[i].reshape(28,28), cmap='Greys_r')
plt.axis('off')
plt.show() #显示图像
print('label: %s' % (train_lbl[0:10],)) #显示对应的标签

#定义网络
data = mx.symbol.Variable('data')

#将图像摊平，例如1*28*28的图像会变成784个像素点，这样才可与普通神经元连接
flatten = mx.sym.Flatten(data=data, name="flatten")

#第1层网络及非线性激活，有128个神经元，使用ReLU非线性
fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=128, name="fc1")
act1 = mx.sym.Activation(data=fc1, act_type="relu", name="act1")

#第2层网络及非线性激活，有64个神经元，使用ReLU非线性
fc2 = mx.sym.FullyConnected(data=act1, num_hidden=64, name="fc2")
act2 = mx.sym.Activation(data=fc2, act_type="relu", name="act2")

#输出神经元，因为需要分成10类，所以有10个神经元
fc3 = mx.sym.FullyConnected(data=act2, num_hidden=10, name="fc3")
#SoftMax层，将上一层输出的10个数变为10个分类概率
net = mx.sym.SoftmaxOutput(data=fc3, name='softmax')

#显示网络的参数情况：
#我们将调用MXNet中的viz库，需要先告知MXNet输入数据的格式
shape = {"data" : (batch_size,1,28,28)}
mx.viz.print_summary(symbol = net, shape = shape)
#读者可盐酸这里的参数量。例如，fc1共有128个神经元，每个神经元的输入是784个数据点，所以每个神经元
#有784个权重，还有1个偏置，所以fc1层的参数量为128X(784+1)=100480。
#显示网络的结构图：
#mx.viz.plot_network(symbol=net, shape=shape).view()

#训练网络：
#由于训练数据量比较大，这里采用了GPU，若读者没有GPU，可修改为CPU
module = mx.mod.Module(symbol = net, context=mx.gpu(0))

module.fit(
train_iter,
eval_data=val_iter,
optimizer = 'sgd',
optimizer_params = {'learning_rate': 0.2, 'lr_scheduler':mx.lr_scheduler.FactorScheduler(step=60000/batch_size,factor = 0.9)},
num_epoch = 20,
batch_end_callback = mx.callback.Speedometer(batch_size,60000/batch_size)
)

基于MXnet进行手写数字识别

猜你喜欢