一、背景
- 之前一篇写过了使用单向LSTM进行股价预测的全流程代码,接下来补充一下其他模型的使用,基本上也都是比较普遍的模型,包括双向LSTM、Transformer、GRU这几类
- 需要注意的是,从模型效果上将,并非双向LSTM一定优于单向LSTM,并非Transformer就一定效果最好,选择合适的算法配合数据往往才是最佳选择
二、简介
- 这个小项目是使用LSTM进行股价预测,使用数据是000001.SZ 平安银行自2014年开始的每日基础数据
- 数据列包括【‘股票代码’, ‘交易日期’, ‘开盘价’, ‘最高价’, ‘最低价’, ‘收盘价’, ‘昨收价’, ‘涨跌额’,‘涨跌幅’, '成交量 ', ‘成交额’】
- 演示代码主要使用前N天的基础数据预测下一天的收盘价,当然实际的工作中,不会去预测收盘价这种无意义的target,但在这里纯粹为了演示
三、代码分块展示
1.双向LSTM
没什么好说的,无非就是之前的单向LSTM中参数bidirectional修改,从而使用双向
class BiLSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size, batch_size):
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.output_size = output_size
self.num_directions = 2
self.batch_size = batch_size
self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True, bidirectional=True)
self.linear = nn.Linear(self.num_directions * self.hidden_size, self.output_size)
def forward(self, input_seq):
h_0 = torch.randn(self.num_directions * self.num_layers, input_seq.size(0), self.hidden_size).to(device)
c_0 = torch.randn(self.num_directions * self.num_layers, input_seq.size(0), self.hidden_size).to(device)
# print(input_seq.size())
seq_len = input_seq.shape[1]
# input(batch_size, seq_len, input_size)
# output(batch_size, seq_len, num_directions * hidden_size)
output, _ = self.lstm(input_seq, (h_0, c_0))
pred = self.linear(output)
pred = pred[:, -1, :]
return pred
2.GRU模型
GRU模型比LSTM还要简单一些,算法结构上将门控单元进行了改变,GRU将LSTM中的输入门、遗忘门和输出门合并为一个更新门和一个重置门,从而减少了参数数量和计算复杂度。
import torch.nn.functional as F
class GRU(nn.Module):
def __init__(self, output_size, input_size, hidden_size, num_layers):
super(GRU, self).__init__()
self.output_size = output_size
self.num_layers = num_layers
self.input_size = input_size
self.hidden_size = hidden_size
self.gru = nn.GRU(self.input_size, self.hidden_size,
self.num_layers, batch_first=True)
self.fc1 = nn.Linear(hidden_size, 20)
self.fc2 = nn.Linear(20, output_size)
self.dropout = nn.Dropout(0.2)
def forward(self, x):
h_0 = torch.randn(self.num_layers, x.size(0), self.hidden_size).to(device)
# h_0 = Variable(torch.zeros(
# self.num_layers, x.size(0), self.hidden_size))
# Propagate input through GRU
out, _ = self.gru(x, h_0)
# ula是表示最后一层的所有时刻的隐层输出,h_out是表示所有层的最后一个时刻隐层状
out = F.relu(self.fc1(out))
out = self.fc2(out)
return out[:, -1, :]
3.Transformer模型
相对于传统的循环神经网络(如GRU和LSTM),Transformer采用了一种基于注意力机制(Attention)的架构,它不需要依赖于序列的顺序性。
import math
class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout=0.1, max_len=30):
super(PositionalEncoding, self).__init__()
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(
torch.arange(0, d_model, 2).float() *
(-math.log(10000.0) / d_model))
# print(pe.size(),pe[:, 0::2].size(),pe[:, 1::2].size())
# print(torch.sin(position * div_term).size(),torch.cos(position * div_term).size())
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
x = x + self.pe[:x.size(1), :].squeeze(1)
return x
class TransformerModel(nn.Module):
def __init__(self, input_size,output_size,d_model,seq_len):
super(TransformerModel, self).__init__()
self.input_size=input_size # 输入的特征数量
self.d_model=d_model # position的维度,需要整数
self.output_size=output_size # 输出的维度
self.seq_len=seq_len # 时间窗口大小
# embed_dim = head_dim * num_heads?
self.input_fc = nn.Linear(self.input_size, self.d_model)
self.output_fc = nn.Linear(self.input_size, self.d_model)
self.pos_emb = PositionalEncoding(self.d_model)
encoder_layer = nn.TransformerEncoderLayer(
d_model=self.d_model,
nhead=4,
dim_feedforward=4 * self.input_size,
batch_first=True,
dropout=0.1,
device=device
)
decoder_layer = nn.TransformerDecoderLayer(
d_model=self.d_model,
nhead=4,
dropout=0.1,
dim_feedforward=4 * self.input_size,
batch_first=True,
device=device
)
self.encoder = torch.nn.TransformerEncoder(encoder_layer, num_layers=3)
self.decoder = torch.nn.TransformerDecoder(decoder_layer, num_layers=3)
self.fc1 = nn.Linear(self.seq_len * self.d_model, self.d_model)
self.fc2 = nn.Linear(self.d_model, self.output_size)
self.fc = nn.Linear(self.output_size * self.d_model, self.output_size)
def forward(self, x):
# x的维度=[batch_size, seq_len, input_dim]
y = x[:, -self.output_size:, :] # [batch_size, 1, input_dim]
x = self.input_fc(x) # [batch_size, seq_len, d_model]
x = self.pos_emb(x) # [batch_size, seq_len, d_model]
x = self.encoder(x) # [batch_size, seq_len, d_model]
# 不经过解码器
# x = x.flatten(start_dim=1) # [batch_size, seq_len*d_model]
# x = self.fc1(x) # [batch_size, d_model]
# out = self.fc2(x) # [batch_size, output_size]
# 使用解码器
y = self.output_fc(y) # [batch_size, output_size, d_model]
out = self.decoder(y, x) # [batch_size, output_size, d_model]
out = out.flatten(start_dim=1) # [batch_size, output_size*d_model]
out = self.fc(out) # [batch_size, output_size]
return torch.abs (out)
总结
- 本文主要是承接上一篇单向LSTM的股价预测文章,增加了双向LSTM、GRU、Transformer三个模型,便于后续对预测任务进行拓展应用。