PyTorch实战系列
【PyTorch实战】一、双层神经网络的示例
【PyTorch实战】二、利用PyTorch玩个Fizz_Buzz小游戏
1. 用Numpy实现
import numpy as np
N, D_in, H, D_out = 64, 1000, 100, 10
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
learning_rate = 1e-6
for it in range(500):
h = x.dot(w1) # N H
h_relu = np.maximum(0, h) # N H
y_pred = h_relu.dot(w2) # N D_out
loss = np.square(y_pred - y).sum()
grad_y_pred = 2 * (y_pred - y)
grad_w2 = h_relu.T.dot(grad_y_pred)
grad_h_relu = grad_y_pred.dot(w2.T)
# grad_h_relu[h<0] = 0 # 合理 因为小于0的点为0后梯度为0; 不合理,按公式求完后,无论值的大小正负,都会对下一步计算产生影响,因此不可以设定为零
grad_w1 = x.T.dot(grad_h_relu)
w2 -= learning_rate * grad_w2
w1 -= learning_rate * grad_w1
if loss<1:
print(it, loss)
break
2. 用PyTorch低级实现
import numpy as np
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)
learning_rate = 1e-6
for it in range(500):
h = x.mm(w1) # N H
h_relu = h.clamp(min=0) # N H
y_pred = h_relu.mm(w2) # N D_out
# computer loss
loss = (y_pred - y).pow(2).sum()
print(it, loss.item())
# Backward pass
grad_y_pred = 2 * (y_pred - y)
grad_w2 = h_relu.T.mm(grad_y_pred)
grad_h_relu = grad_y_pred.mm(w2.T)
# grad_h_relu[h<0] = 0 # 合理 因为小于0的点为0后梯度为0; 不合理,按公式求完后,无论值的大小正负,都会对下一步计算产生影响,因此不可以设定为零
grad_w1 = x.T.mm(grad_h_relu)
w2 -= learning_rate * grad_w2
w1 -= learning_rate * grad_w1
3. 用PyTorch自动完成求梯度和反向传播
import numpy as np
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
w1 = torch.randn(D_in, H,requires_grad=True)
w2 = torch.randn(H, D_out, requires_grad=True)
learning_rate = 1e-6
for it in range(500):
y_pred = x.mm(w1).clamp(min=0).mm(w2)
# computer loss
loss = (y_pred - y).pow(2).sum()
print(it, loss.item())
# Backward pass
loss.backward() # 完成loss对每个参数求梯度
with torch.no_grad():
w1 -= learning_rate * w1.grad
w2 -= learning_rate * w2.grad
w1.grad.zero_()
w2.grad.zero_()
4. 引入torch.nn建立模型和损失函数
import numpy as np
import torch.nn as nn
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
# w1 = torch.randn(D_in, H,requires_grad=True)
# w2 = torch.randn(H, D_out, requires_grad=True)
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
)
# model = model.cuda()
loss_fn = nn.MSELoss(reduction = 'sum')
learning_rate = 1e-6
for it in range(500):
# y_pred = x.mm(w1).clamp(min=0).mm(w2)
y_pred = model(x) # model.forward()
# computer loss
loss = loss_fn(y_pred, y)
print(it, loss.item())
# Backward pass
loss.backward()
# update weight of w1, w2
with torch.no_grad():
# w1 -= learning_rate * w1.grad
# w2 -= learning_rate * w2.grad
# w1.grad.zero_()
# w2.grad.zero_()
for param in model.parameters(): # param(tensor, grad)
param -= learning_rate * param.grad
model.zero_grad()
5. 加入优化器Optimizer
import numpy as np
import torch.nn as nn
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
# w1 = torch.randn(D_in, H,requires_grad=True)
# w2 = torch.randn(H, D_out, requires_grad=True)
model = torch.nn.Sequential(
torch.nn.Linear(D_in, H),
torch.nn.ReLU(),
torch.nn.Linear(H, D_out),
)
# model = model.cuda()
loss_fn = nn.MSELoss(reduction = 'sum')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
# optim有很多可以SGD等
for it in range(500):
# y_pred = x.mm(w1).clamp(min=0).mm(w2)
y_pred = model(x) # model.forward()
# computer loss
loss = loss_fn(y_pred, y)
print(it, loss.item())
optimizer.zero_grad()
# Backward pass
loss.backward()
# update model parameters
optimizer.step()
6. 把model封装成一个类
import numpy as np
import torch.nn as nn
import torch
N, D_in, H, D_out = 64, 1000, 100, 10
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
class TwoLayerNet(torch.nn.Module):
# define the model archetecture
def __init__(self, D_in, H, D_out):
super(TwoLayerNet, self).__init__()
self.linear1=torch.nn.Linear(D_in, H)
self.linear2 = torch.nn.Linear(H, D_out)
def forward(self, x):
temp = self.linear1(x).clamp(min=0)
y_pred = self.linear2(temp)
return y_pred
model = TwoLayerNet(D_in, H, D_out) # model = model.cuda()
loss_fn = nn.MSELoss(reduction = 'sum')
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)
for it in range(500):
# y_pred = x.mm(w1).clamp(min=0).mm(w2)
y_pred = model(x) # model.forward()
# computer loss
loss = loss_fn(y_pred, y)
print(it, loss.item())
optimizer.zero_grad()
# Backward pass
loss.backward()
# update model parameters
optimizer.step()
参考感谢
[1] 七月在线-褚则伟的pytorch实战课程