BiLSTM实现imdb情感分类任务

目录

一、导入必须的库

二、加载数据集

三、创建BiLSTM模型

四、加载词向量

五、设置超参数、优化器和损失函数

六、训练过程设计

七、运用模型进行预测

八、模型保存和加载


一、导入必须的库

import torch
from torch import nn
from d2l import torch as d2l

二、加载数据集

batch_size = 64
train_iter, test_iter, vocab = d2l.load_data_imdb(batch_size)

三、创建BiLSTM模型

class BiRNN(nn.Module):
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers, **kwargs):
        super(BiRNN, self).__init__(**kwargs)
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.encoder = nn.LSTM(embed_size, num_hiddens, num_layers=num_layers, bidirectional=True)
        self.decoder = nn.Linear(4 * num_hiddens, 2)

    def forward(self, x):
        embeddings = self.embedding(x.T)
        self.encoder.flatten_parameters()
        # 返回上一个隐藏层在不同时间步的隐状态,
        # outputs的形状是(时间步数,批量大小,2*隐藏单元数)
        outputs, _ = self.encoder(embeddings)
        # 连结初始和最终时间步的隐状态,作为全连接层的输入,
        # 其形状为(批量大小,4*隐藏单元数)
        encoding = torch.cat((outputs[0], outputs[-1]), dim=1)
        outs = self.decoder(encoding)
        return outs


# 实例化一个模型
embed_size, num_hiddens, num_layers = 100, 100, 2
net = BiRNN(len(vocab), embed_size, num_hiddens, num_layers)


# 给模型赋予初始权重
def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.xavier_uniform_(m.weight)

    if type(m) == nn.LSTM:
        for param in m._flat_weights_names:
            if 'weight' in param:
                nn.init.xavier_uniform_(m._parameters[param])


net.apply(init_weights)

四、加载词向量

# 为此表里面的单词加载预训练的100维Glove嵌入,并转化成词元向量
glove_embedding = d2l.TokenEmbedding('glove.6b.100d')
embeds = glove_embedding[vocab.idx_to_token]

# 我们使用这些预训练的词向量来表示评论中的词元,并且在训练期间不要更新这些向量。
net.embedding.weight.data.copy_(embeds)
net.embedding.weight.requires_grad = False

五、设置超参数、优化器和损失函数

devices = d2l.try_all_gpus()
lr, num_epochs = 0.01, 5
trainer = torch.optim.Adam(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss(reduction="none")

六、训练过程设计

def train_batch_ch13(net, X, y, loss, trainer, devices):
    """Train for a minibatch with mutiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`"""
    if isinstance(X, list):
        # Required for BERT fine-tuning (to be covered later)
        X = [x.to(devices[0]) for x in X]
    else:
        X = X.to(devices[0])
    y = y.to(devices[0])
    net.train()
    trainer.zero_grad()
    pred = net(X)
    l = loss(pred, y)
    l.sum().backward()
    trainer.step()
    train_loss_sum = l.sum()
    train_acc_sum = d2l.accuracy(pred, y)
    return train_loss_sum, train_acc_sum


def train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs,
               devices=d2l.try_all_gpus()):
    """Train a model with mutiple GPUs (defined in Chapter 13).

    Defined in :numref:`sec_image_augmentation`"""
    timer, num_batches = d2l.Timer(), len(train_iter)
    # animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 1],
    #                         legend=['train loss', 'train acc', 'test acc'])
    net = nn.DataParallel(net, device_ids=devices).to(devices[0])
    for epoch in range(num_epochs):
        # Sum of training loss, sum of training accuracy, no. of examples,
        # no. of predictions
        metric = d2l.Accumulator(4)
        for i, (features, labels) in enumerate(train_iter):
            timer.start()
            l, acc = train_batch_ch13(
                net, features, labels, loss, trainer, devices)
            metric.add(l, acc, labels.shape[0], labels.numel())
            timer.stop()


            print("epoch:{},batch_num:{},loss:{:.3f},acc:{:.3f}".format(epoch, i, metric[0] / metric[2],
                                                                            metric[1] / metric[3]))
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        # animator.add(epoch + 1, (None, None, test_acc))
    print(f'loss {metric[0] / metric[2]:.3f}, train acc '
          f'{metric[1] / metric[3]:.3f}, test acc {test_acc:.3f}')
    print(f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on '
          f'{str(devices)}')


train_ch13(net, train_iter, test_iter, loss, trainer, num_epochs, devices)

七、运用模型进行预测

def predict_sentiment(net, vocab, sequence):
    """预测文本序列的情感"""
    sequence = torch.tensor(vocab[sequence.split()], device=d2l.try_gpu())
    label = torch.argmax(net(sequence.reshape(1, -1)), dim=1)
    return 'positive' if label == 1 else 'negative'


predict_sentiment(net, vocab, 'this movie is so great')

八、模型保存和加载

# 保存模型
torch.save({
            'epoch':num_epochs,
            'model_state_dict': net.state_dict(),
            'optimizer_state_dict': trainer.state_dict(),
            'loss': loss,
            }, 'model.pt')


# 加载模型
model = BiRNN(len(vocab),100,100,2)
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

checkpoint = torch.load('model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()  # 测试模式
model.train()  # 训练模式
# 装入cuda
model = model.to(device)

# 检测是否可用
predict_sentiment(model, vocab, 'I am a apple.')

猜你喜欢

转载自blog.csdn.net/qq_38901850/article/details/125176513
今日推荐