(二)RNN-循环神经网络-Tensorflow中的实现

        由于需要用到TensorFlow实现RNN-LSTM神经网络,因此寻找了一个入门教程,原文地址,此文章所给出的例子十分符合我现阶段的需求,也令我收获颇多。网上有诸篇关于此代码的讲解与介绍,其中这篇博文我认为最为精彩,基本借鉴其文章内容,详细请见:深度学习(07)_RNN-循环神经网络-02-Tensorflow中的实现,在此只记录对于自己代码的理解(因为自己对python和TensorFlow都不甚熟悉,所以可能会出现极大的错误,我会在后面的学习中尽可能修正自己的理解):

数据处理的流程:

代码注释:

import numpy as np
import tensorflow as tf
from tensorflow.python import debug as tf_debug
import matplotlib.pyplot as plt


#num_steps是记忆深度
num_steps = 5
#每次训练输入的数据长度直观一点的感受就是cell个数
batch_size = 200
#结果可能的类型数
num_classes = 2
#每个cell内部的神经元数
state_size = 4
learning_rate = 0.1

#根据自定的规则生成长度为1000000的数据
def gen_data(size=1000000):
	X = np.array(np.random.choice(2, size=(size,)))
	Y = []
	for i in range(size):
		threshold = 0.5
                #如果某一位是1,则其后面第三位为1的概率增大0.5
                if X[i-3] == 1:
			threshold += 0.5
                #如果某一位是1,则其后面第八位为1的概率减小0.25。若两个效果同时满足,则概率增大0.25

		if X[i-8] == 1:
			threshold -=0.25
		if np.random.rand() > threshold:
			Y.append(0)
		else:
			Y.append(1)
	return X, np.array(Y)

    #将数据由序列转换为epoche_size个batch_size * num_steps大小的(X,Y)
    def gen_batch(raw_data, batch_size, num_step):
	raw_x, raw_y = raw_data
	data_length = len(raw_x)
	batch_partition_length = data_length // batch_size
	data_x = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
	data_y = np.zeros([batch_size, batch_partition_length], dtype=np.int32)
	for i in range(batch_size):
                data_x[i] = raw_x[batch_partition_length*i:batch_partition_length*(i+1)]
                data_y[i] = raw_y[batch_partition_length*i:batch_partition_length*(i+1)]
        epoch_size = batch_partition_length // num_steps
        for i in range(epoch_size):
                x = data_x[:, i * num_steps:(i + 1) * num_steps]
                y = data_y[:, i * num_steps:(i + 1) * num_steps]
                yield (x, y)


def gen_epochs(n, num_steps):
	for i in range(n):
		yield gen_batch(gen_data(), batch_size, num_steps)

x = tf.placeholder(tf.int32, [batch_size, num_steps], name="x")
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='y')
init_state = tf.zeros([batch_size, state_size])
#x_one_hot是将batch_size*num_steps大小的二维张量的值转换成one_hot类型,因此
#x_one_hot变成了batch_size*num_steps*num_classes的三维张量
x_one_hot = tf.one_hot(x, num_classes)
#tf.unstack将x_one_hot从第一维展开成num_steps个batch_size*num_classes的二维张量
rnn_inputs = tf.unstack(x_one_hot, axis=1)
#使用变量空间可以使用同一变量W和b
with tf.variable_scope('rnn_cell'):
        #将输入x和前一时刻的state作为计算当前state的输入
	W = tf.get_variable('W', [num_classes + state_size, state_size])
	b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
#给定输入和前一时刻的state输出当前时刻的state
def rnn_cell(rnn_input, state):
	with tf.variable_scope('rnn_cell', reuse=True):
		W = tf.get_variable('W', [num_classes+state_size, state_size])
		b = tf.get_variable('b', [state_size], initializer=tf.constant_initializer(0.0))
	return tf.tanh(tf.matmul(tf.concat([rnn_input, state],1),W) + b)
#初始化-1时刻的state为0
state = init_state
rnn_outputs = []
for rnn_input in rnn_inputs:
	state = rnn_cell(rnn_input, state)
	rnn_outputs.append(state)
#rnn_outputs[-1]是rnn_outputs的最后一个元素
final_state = rnn_outputs[-1]

with tf.variable_scope('softmax'):
	W = tf.get_variable('W', [state_size, num_classes])
	b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))
        #计算输出
	logits = [tf.matmul(rnn_output, W) + b for rnn_output in rnn_outputs]
	predictions = [tf.nn.softmax(logit) for logit in logits]
#y本来是大小batch_size*num_steps的一维张量,使用tf.unstack展开成num_steps个batch_size长度的一维张量
y_as_list = tf.unstack(y, num=num_steps, axis=1)
#使用交叉熵来作为偏差量,zip()将来两个一维张量合并为n(n为长度最小张量的长度)个二元组
losses = [tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label,logits=logit) for logit, label in zip(logits, y_as_list)]
total_loss = tf.reduce_mean(losses)
train_step = tf.train.AdagradOptimizer(learning_rate).minimize(total_loss)

def train_rnn(num_epochs, num_steps, state_size=4, verbose=True):
	with tf.Session() as sess:
		sess.run(tf.global_variables_initializer())
		training_losses = []
                #epoch包含gen_epochs返回的大小为batch_size*num_steps的X和Y,idx是外部循环次数
		for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps)):
			training_loss = 0
			training_state = np.zeros((batch_size, state_size))
			if verbose:
				print('\nepoch', idx)
                        #step是内部循环次数
			for step, (X, Y) in enumerate(epoch):
				tr_losses, training_loss_, training_state, _ = \
			    sess.run([losses, total_loss, final_state, train_step], feed_dict={x:X, y:Y, init_state:training_state})
				training_loss += training_loss_
				if step % 100 == 0 and step > 0:
					if verbose:
						print('第 {0} 步的平均损失 {1}'.format(step, training_loss/100))
					training_losses.append(training_loss/100)
					training_loss = 0
	return training_losses

training_losses = train_rnn(num_epochs=1, num_steps=num_steps, state_size=state_size)
print(training_losses[0])
plt.plot(training_losses)
plt.show()
神经网络:

      内部神经元个数为state_size,横向宽度为batch_size,故一次输入长度为batch_size * num_classes,而num_steps即为一次训练的数据序列的长度,也就是保留num_steps序列长度内的记忆

猜你喜欢

转载自blog.csdn.net/Micusd/article/details/80986261