torch.nn.Parameter(),nn.Embedding()

1.torch.nn.Parameter() function

self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size))

The meaning is to convert a fixed non-trainable tensor into a trainable type parameter, and bind this parameter to this module (there is this bound parameter in net.parameter(), so it can be performed during parameter optimization Optimization), so after type conversion, this variable becomes part of the model and becomes a parameter in the model that can be changed according to training.
The parameters added by nn.Parameter() will be added to the Parameters list and will be sent to the optimizer to learn and update along with training. The above is taken from torcn.nn.parameter
when accessing its elements.

print(net.parameters())

A linear example:

(1) Use self.test = torch.rand(1, 2)

import torch
class LinearModel(torch.nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        self.test = torch.rand(1, 2)
        self.linear = torch.nn.Linear(1, 1)  # 构造linear对象,参数包括w,b (1,1)为输入,输出样本维度(特征数量)

    def forward(self, x):  # 重写,覆盖Module里的forward
        y_pred = self.linear(x)
        return y_pred


model = LinearModel()  # 实例化
print("模型",model)
print("参数",list(model.named_parameters()))
模型 LinearModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)
参数 [('linear.weight', Parameter containing:
tensor([[-0.3734]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([0.9272], requires_grad=True))]

(2) Use self.test = torch.nn.Parameter(torch.rand(1, 2))#Note: Parameter is capitalized with P

import torch
class LinearModel(torch.nn.Module):
    def __init__(self):
        super(LinearModel, self).__init__()
        self.test = torch.nn.Parameter(torch.rand(1, 2))#注意:Parameter大写P
        self.linear = torch.nn.Linear(1, 1)  # 构造linear对象,参数包括w,b (1,1)为输入,输出样本维度(特征数量)

    def forward(self, x):  # 重写,覆盖Module里的forward
        y_pred = self.linear(x)
        return y_pred


model = LinearModel()  # 实例化
print("模型",model)
print("参数",list(model.named_parameters()))
模型 LinearModel(
  (linear): Linear(in_features=1, out_features=1, bias=True)
)
参数 [('test', Parameter containing:
tensor([[0.3915, 0.7876]], requires_grad=True)), ('linear.weight', Parameter containing:
tensor([[-0.3052]], requires_grad=True)), ('linear.bias', Parameter containing:
tensor([-0.1999], requires_grad=True))]

2.nn.embedding

torch.nn.Embedding(num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None)

It is a simple lookup table that stores the embedding vectors of a fixed-size dictionary. This means that, given a number, the embedding layer can return the embedding vector corresponding to this number. The embedding vector reflects the semantics between the symbols represented by each number. relation.
The input is a list of numbers, and the output is a list of corresponding symbol embedding vectors.
The first parameter num_embeddings is to generate num_embeddings embedding vectors. The second parameter embedding_dim is the dimension of the embedding vector, that is, the dimension of the embedding_dim value is used to represent a basic unit.
For example, the following code embedding = nn.Embedding(6, 2) generates 6 embedding vectors. The dimension of each embedding vector is 2. The
above is excerpted from

An embedding layer is used to obtain the feature vector of each token in the input sequence. The weight of the embedding layer is a matrix whose number of rows is equal to the size of the input vocabulary (vocab_size) and whose number of columns is equal to the dimension of the feature vector (embed_size). For any input token index i, the embedding layer takes the i-th row (starting from 0) of the weight matrix to return its feature vector.

embedding = torch.nn.Embedding(6, 2)
print(embedding.weight)
x=torch.ones(1,2).to(int)
print("x",x)
print("x.shape",x.shape)
y=embedding(x)
print("y",y)
print("y.shape",y.shape)#(,,embed_size)

Output:

Parameter containing:
tensor([[-0.5130, -1.2553],
        [-0.5915,  0.7818],
        [ 0.0382,  0.2250],
        [-0.9197, -0.3909],
        [-1.2444,  0.0630],
        [-1.8657, -0.6756]], requires_grad=True)
x tensor([[1, 1]])
x.shape torch.Size([1, 2])
y tensor([[[-0.5915,  0.7818],
         [-0.5915,  0.7818]]], grad_fn=<EmbeddingBackward>)
y.shape torch.Size([1, 2, 2])

Example:

import collections
import math
import torch
from torch import nn
from d2l import torch as d2l
#@save
class Seq2SeqEncoder(d2l.Encoder):
    """用于序列到序列学习的循环神经网络编码器"""
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 dropout=0, **kwargs):
        super(Seq2SeqEncoder, self).__init__(**kwargs)
        # 嵌入层
        self.embedding = nn.Embedding(vocab_size, embed_size)##获得每个词元的特征向量
        self.rnn = nn.GRU(embed_size, num_hiddens, num_layers,
                          dropout=dropout)
        #没有dense全连接的输出层,因为encoder不需要输出最终标号

    def forward(self, X, *args):
        # 输出'X'的形状:(batch_size,num_steps,embed_size)
        X = self.embedding(X)
        # 在循环神经网络模型中,第一个轴对应于时间步
        X = X.permute(1, 0, 2)#(num_step,batch_size,)
        # 如果未提及状态,则默认为0
        output, state = self.rnn(X)#拿到最后输出,output是每个时刻最上面的输出,state是最后时刻每一层的输出
        # output的形状:(num_steps,batch_size,num_hiddens)
        # state的形状:(num_layers,batch_size,num_hiddens)
        return output, state
encoder = Seq2SeqEncoder(vocab_size=10, embed_size=8, num_hiddens=16,
                         num_layers=2)
encoder.eval()#dropout不生效
X = torch.zeros((4, 7), dtype=torch.long)#(batch_size,num_step)
output, state = encoder(X)
output.shape#(num_steps,batch_size,num_hiddens)
class Seq2SeqDecoder(d2l.Decoder):
    """用于序列到序列学习的循环神经网络解码器"""
    def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,
                 dropout=0, **kwargs):
        super(Seq2SeqDecoder, self).__init__(**kwargs)
        self.embedding = nn.Embedding(vocab_size, embed_size)#获得每个词元的特征向量
        self.rnn = nn.GRU(embed_size + num_hiddens, num_hiddens, num_layers,
                          dropout=dropout)
        self.dense = nn.Linear(num_hiddens, vocab_size)#输出vacab_size的分类

    def init_state(self, enc_outputs, *args):#encoder的state
        return enc_outputs[1]

    def forward(self, X, state):
        # 输出'X'的形状:(batch_size,num_steps,embed_size)
        X = self.embedding(X).permute(1, 0, 2)
        # 广播context,使其具有与X相同的num_steps
        '''上下文操作。这里state[-1]拿到的是“最右上角的”H(这个H融合和所有的信息)如果state是【2,4,16】的,
        那state[-1]就是【1,4,16】的。repeat重复时间步次。
        这样,可以用到每一个时间步最后的H信息,
        与新的输入X做concat操作(这也是为什么解码器的self.rnn是ebd_size + num_hiddens的原因)。
        如果state[-1]是【1,4,16】,时间步是7,那重复完之后就是【7,4,16】的?????
        (7个时间步,4是batch_size,16是state隐藏单元的个数)。'''
        context = state[-1].repeat(X.shape[0], 1, 1)#state[-1]最后时刻最后一层的输出,它包括了最后所有浓缩的信息
        #X_and_context(num_step,batch_size,ebd_size + num_hidden)
        X_and_context = torch.cat((X, context), 2)#DEcoder输入是ENcoder的state和最后context上下文拼在一起
        output, state = self.rnn(X_and_context, state)
        output = self.dense(output).permute(1, 0, 2)
        # output的形状:(batch_size,num_steps,vocab_size)
        # state的形状:(num_layers,batch_size,num_hiddens)
        return output, state
decoder = Seq2SeqDecoder(vocab_size=10, embed_size=8, num_hiddens=16,
                         num_layers=2)
decoder.eval()
state = decoder.init_state(encoder(X))
output, state = decoder(X, state)
output.shape, state.shape

Guess you like

Origin blog.csdn.net/weixin_44040169/article/details/126901581