版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_32023541/article/details/83653360
seq2seq 函数说明
处理 seq2seq 任务 序列到序列问题
编码 - 解码框架: Encoder - Decoder 先使用编码器将输入编码映射到语义空间,得到一个固定维数的向量,这个向量就表示
输入的语义;然后再使用解码器将这个向量解码,获得所需要的输出。
注意:基本的 seq2seq 是Encoder 生成的最后一个时刻的 c 参与到Decoder 的初始时刻
在 seq2seq 中,标签不仅仅用来计算 loss ,还参与节点运算
TensorFlow 中的 seq2seq:
tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(encoder_inputs,
decoder_inputs,
cell,
dtype = dtypes.float32,
scope = None)
encoder_inputs: 一个形状为 [batch_size,input_size] 的基于时序的 list
decoder_inputs: 同 encoder_inputs
cell : 定义的 cell 网络
返回值: outputs 和 state 。 output 为[batch_size,output_size] 的张量基于时序的 list 。 state 为[batch_size,cell.state_size]基于时序的 list
基本结构图:Encoder 生成的最后一个时刻的 c 参与到Decoder 的初始时刻,并且标签也参与运算作为 Decoder 各个时刻的输入。
Basic seq2seq 代码示例
# -*- coding:utf-8 -*-
import random
import math
import tensorflow as tf
import numpy as np
def do_generate_x_y(isTrain,batch_size,seqlen):
batch_x =[]
batch_y =[]
for _ in range(batch_size):
offset_rand = random.random() * 2 *math.pi
freq_rand = (random.random() - 0.5)/1.5 * 15 + 0.5
amp_rand = random.random() + 0.1
sin_data = amp_rand * np.sin(np.linspace(
seqlen /15.0 * freq_rand * 0.0 *math.pi + offset_rand,
seqlen/15.0 * freq_rand*3.0*math.pi + offset_rand,seqlen * 2))
offset_rand = random.random()*2*math.pi
freq_rand = (random.random() - 0.5)/1.5*15 + 0.5
amp_rand = random.random() * 1.2
sig_data = amp_rand*np.cos(np.linspace(
seqlen/15.0*freq_rand *0.0 *math.pi + offset_rand,
seqlen/15.0*freq_rand*3.0*math.pi + offset_rand,seqlen * 2)) + sin_data
batch_x.append(np.array([sig_data[:seqlen]]).T)
batch_y.append(np.array([sig_data[seqlen:]]).T)
# 当前 shape :[batch_size,seqlen,output_dim]
batch_x = np.array(batch_x).transpose([1,0,2])
batch_y = np.array(batch_y).transpose([1,0,2])
# 转换后 shape [seqlen,batch_size,output_dim]
return batch_x,batch_y
def generate_data(isTrain,batch_size):
seq_length = 15
if isTrain:
return do_generate_x_y(isTrain,batch_size,seqlen = seq_length)
else:
return do_generate_x_y(isTrain,batch_size,seqlen = seq_length * 2)
if __name__ == "__main__":
sample_now,sample_f = generate_data(True,batch_size = 3)
print ("training examples:")
print sample_now.shape
print "(seq_len,batch_size,output_dim)"
seq_length = sample_now.shape[0]
batch_size = 10
output_dim = input_dim = sample_now.shape[-1]
hidden_dim = 12
layers_stacked_count = 2
# 学习率
learning_rate = 0.1
nb_iters = 100
lambda_l2_reg = 0.003
tf.reset_default_graph()
encoder_input = []
expected_output = []
decode_input = []
for i in range(seq_length):
encoder_input.append(tf.placeholder(tf.float32,shape = (None,input_dim)))
expected_output.append(tf.placeholder(tf.float32,shape = (None,output_dim)))
decode_input.append(tf.placeholder(tf.float32,shape = (None,input_dim)))
tcells = []
for i in range(layers_stacked_count):
tcells.append(tf.contrib.rnn.GRUCell(hidden_dim))
Mcell = tf.contrib.rnn.MultiRNNCell(tcells)
# 基于 Basic seq2seq ,dec_outputs 的shape [batch_size,hidden_dim] 的基于时序的 list
dec_outputs,dec_memory = tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(encoder_input,decode_input,Mcell)
reshaped_outputs = []
for ii in dec_outputs:
reshaped_outputs.append(tf.contrib.layers.fully_connected(ii,output_dim,activation_fn = None))
output_loss = 0
for _y,_Y in zip(reshaped_outputs,expected_output):
output_loss += tf.reduce_mean(tf.pow(_y - _Y,2)) # 均方差
# 求正则化损失
reg_loss = 0
for tf_var in tf.trainable_variables():
if not "fully_connected" in tf_var.name:
reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var))
loss = output_loss + lambda_l2_reg * reg_loss
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
# 开始训练
sess = tf.InteractiveSession()
def train_batch(batch_size):
X,Y = generate_data(isTrain = True,batch_size = batch_size)
feed_dict = {encoder_input[t]:X[t] for t in range(len(encoder_input))}
feed_dict.update({expected_output[t]:Y[t] for t in range(len(expected_output))})
# Y[:-1] = Y[0:len(Y)-1]
c = np.concatenate(([np.zeros_like(Y[0])],Y[:-1]),axis = 0)
feed_dict.update({decode_input[t]:c[t] for t in range(len(c))})
_,loss_t = sess.run([train_op,loss],feed_dict)
return loss_t
train_losses = []
sess.run(tf.global_variables_initializer())
for t in range(nb_iters + 1):
train_loss= train_batch(batch_size)
train_losses.append(train_loss)
if t % 50 == 0:
print "Step {}/{},train_loss :{}".format(t,nb_iters,train_loss)