基于rnn 的 基本seq2seq 任务

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_32023541/article/details/83653360

seq2seq 函数说明

处理 seq2seq 任务 序列到序列问题
编码 - 解码框架: Encoder - Decoder 先使用编码器将输入编码映射到语义空间,得到一个固定维数的向量,这个向量就表示
输入的语义;然后再使用解码器将这个向量解码,获得所需要的输出。
注意:基本的 seq2seq 是Encoder 生成的最后一个时刻的 c 参与到Decoder 的初始时刻

在 seq2seq 中,标签不仅仅用来计算 loss ,还参与节点运算
TensorFlow 中的 seq2seq:
tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(encoder_inputs,
                                decoder_inputs,
                                cell,
                                dtype = dtypes.float32,
                                scope = None)
                                
encoder_inputs: 一个形状为 [batch_size,input_size] 的基于时序的 list
decoder_inputs: 同 encoder_inputs
cell : 定义的 cell 网络
返回值: outputs 和 state 。 output 为[batch_size,output_size] 的张量基于时序的 list 。 state 为[batch_size,cell.state_size]基于时序的 list

基本结构图:Encoder 生成的最后一个时刻的 c 参与到Decoder 的初始时刻,并且标签也参与运算作为 Decoder 各个时刻的输入。

Basic seq2seq 代码示例

# -*- coding:utf-8 -*-
import random
import math
import tensorflow as tf
import numpy as np

def do_generate_x_y(isTrain,batch_size,seqlen):
    batch_x =[]
    batch_y =[]
    for _ in range(batch_size):
        offset_rand = random.random() * 2 *math.pi
        freq_rand = (random.random() - 0.5)/1.5 * 15 + 0.5
        amp_rand = random.random() + 0.1

        sin_data = amp_rand * np.sin(np.linspace(
            seqlen /15.0 * freq_rand * 0.0 *math.pi + offset_rand,
            seqlen/15.0 * freq_rand*3.0*math.pi + offset_rand,seqlen * 2))
        
        offset_rand = random.random()*2*math.pi
        freq_rand = (random.random() - 0.5)/1.5*15 + 0.5
        amp_rand = random.random() * 1.2

        sig_data = amp_rand*np.cos(np.linspace(
            seqlen/15.0*freq_rand *0.0 *math.pi + offset_rand,
            seqlen/15.0*freq_rand*3.0*math.pi + offset_rand,seqlen * 2)) + sin_data
        batch_x.append(np.array([sig_data[:seqlen]]).T)
        batch_y.append(np.array([sig_data[seqlen:]]).T)

    # 当前 shape :[batch_size,seqlen,output_dim]
    batch_x = np.array(batch_x).transpose([1,0,2])
    batch_y = np.array(batch_y).transpose([1,0,2])
    # 转换后 shape [seqlen,batch_size,output_dim]
    return batch_x,batch_y

def generate_data(isTrain,batch_size):
    seq_length = 15
    if isTrain:
        return do_generate_x_y(isTrain,batch_size,seqlen = seq_length)
    else:
        return do_generate_x_y(isTrain,batch_size,seqlen = seq_length * 2)

if __name__ == "__main__":
    sample_now,sample_f = generate_data(True,batch_size = 3)
    print ("training examples:")
    print sample_now.shape
    print "(seq_len,batch_size,output_dim)"

    seq_length = sample_now.shape[0]
    batch_size = 10
    output_dim = input_dim = sample_now.shape[-1]
    hidden_dim = 12
    layers_stacked_count = 2

    # 学习率
    learning_rate = 0.1
    nb_iters = 100
    lambda_l2_reg = 0.003
    tf.reset_default_graph()
    encoder_input = []
    expected_output = []
    decode_input = []
    for i in range(seq_length):
        encoder_input.append(tf.placeholder(tf.float32,shape = (None,input_dim)))
        expected_output.append(tf.placeholder(tf.float32,shape = (None,output_dim)))
        decode_input.append(tf.placeholder(tf.float32,shape = (None,input_dim)))
    tcells = []
    for i in range(layers_stacked_count):
        tcells.append(tf.contrib.rnn.GRUCell(hidden_dim))
    Mcell = tf.contrib.rnn.MultiRNNCell(tcells)
    
    # 基于 Basic seq2seq ,dec_outputs 的shape [batch_size,hidden_dim] 的基于时序的 list
    dec_outputs,dec_memory = tf.contrib.legacy_seq2seq.basic_rnn_seq2seq(encoder_input,decode_input,Mcell)
    reshaped_outputs = []
    for ii in dec_outputs:
        reshaped_outputs.append(tf.contrib.layers.fully_connected(ii,output_dim,activation_fn = None))
    
    output_loss = 0
    for _y,_Y in zip(reshaped_outputs,expected_output):
        output_loss += tf.reduce_mean(tf.pow(_y - _Y,2)) # 均方差
    # 求正则化损失
    reg_loss = 0
    for tf_var in tf.trainable_variables():
        if not "fully_connected" in tf_var.name:
            reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var))
    loss = output_loss + lambda_l2_reg * reg_loss

    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    # 开始训练
    sess = tf.InteractiveSession()
    def train_batch(batch_size):
        X,Y = generate_data(isTrain = True,batch_size = batch_size)
        feed_dict = {encoder_input[t]:X[t] for t in range(len(encoder_input))}
        feed_dict.update({expected_output[t]:Y[t] for t in range(len(expected_output))})
        # Y[:-1] = Y[0:len(Y)-1]
        c = np.concatenate(([np.zeros_like(Y[0])],Y[:-1]),axis = 0)
        feed_dict.update({decode_input[t]:c[t] for t in range(len(c))})
        _,loss_t = sess.run([train_op,loss],feed_dict)
        return loss_t
    
    train_losses = []
    sess.run(tf.global_variables_initializer())
    for t in range(nb_iters + 1):
        train_loss= train_batch(batch_size)
        train_losses.append(train_loss)
        if t % 50 == 0:
            print "Step {}/{},train_loss :{}".format(t,nb_iters,train_loss)

猜你喜欢

转载自blog.csdn.net/qq_32023541/article/details/83653360
今日推荐