在caffe中训练的时候如果使用多GPU则直接在运行程序的时候指定GPU的index即可,但是在Pytorch中则需要在声明模型之后,对声明的模型进行初始化,如:
cnn = DataParallel(AlexNet())
之后直接运行Pytorch之后则默认使用所有的GPU,为了说明上述初始化的作用,我用了一组畸变图像的数据集,写了一个Resent的模块,过了50个epoch,对比一下实验耗时的差别,代码如下:
# -*- coding: utf-8 -*-
# Implementation of https://arxiv.org/pdf/1512.03385.pdf/
# See section 4.2 for model architecture on CIFAR-10.
# Some part of the code was referenced below.
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
import os
from PIL import Image
import time
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.utils.data as data
from torch.nn import DataParallel
kwargs = {'num_workers': 1, 'pin_memory': True}
# def my dataloader, return the data and corresponding label
def default_loader(path):
return Image.open(path).convert('RGB')
class myImageFloder(data.Dataset): # Class inheritance
def __init__(self, root, label, transform=None, target_transform=None, loader=default_loader):
fh = open(label)
c = 0
imgs = []
class_names = []
for line in fh.readlines():
if c == 0:
class_names = [n.strip() for n in line.rstrip().split(' ')]
else:
cls = line.split() # cls is a list
fn = cls.pop(0)
if os.path.isfile(os.path.join(root, fn)):
imgs.append((fn, tuple([float(v) for v in cls]))) # imgs is the list,and the content is the tuple
# we can use the append way to append the element for list
c = c + 1
self.root = root
self.imgs = imgs
self.classes = class_names
self.transform = transform
self.target_transform = target_transform
self.loader = loader
def __getitem__(self, index):
fn, label = self.imgs[index] # eventhough the imgs is just a list, it can return the elements of is
# in a proper way
img = self.loader(os.path.join(self.root, fn))
if self.transform is not None:
img = self.transform(img)
return img, torch.Tensor(label)
def __len__(self):
return len(self.imgs)
def getName(self):
return self.classes
mytransform = transforms.Compose([transforms.ToTensor()]) # almost dont do any operation
train_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Training"
test_data_root = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/Testing"
train_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_train.txt"
test_label = "/home/ying/shiyongjie/rjp/generate_distortion_image_2016_03_15/0_Distorted_Image/NameList_test.txt"
train_loader = torch.utils.data.DataLoader(
myImageFloder(root=train_data_root, label=train_label, transform=mytransform),
batch_size=64, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
myImageFloder(root=test_data_root, label=test_label, transform=mytransform),
batch_size=64, shuffle=True, **kwargs)
# 3x3 Convolution
def conv3x3(in_channels, out_channels, stride=1):
return nn.Conv2d(in_channels, out_channels, kernel_size=3,
stride=stride, padding=1, bias=False)
# Residual Block
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = conv3x3(in_channels, out_channels, stride) # kernel size is default 3
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = conv3x3(out_channels, out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
# ResNet Module
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1):
super(ResNet, self).__init__()
self.in_channels = 16
self.conv = conv3x3(3, 16)
self.bn = nn.BatchNorm2d(16)
self.relu = nn.ReLU(inplace=True)
self.layer1 = self.make_layer(block, 16, layers[0])
self.layer2 = self.make_layer(block, 32, layers[0], 2)
self.layer3 = self.make_layer(block, 64, layers[1], 2) # the input arg is blocks and the stride
self.layer4 = self.make_layer(block, 128, layers[1], 2)
self.layer5 = self.make_layer(block, 256, layers[1], 2)
self.avg_pool = nn.AvgPool2d(kernel_size=8,stride=8) # 2*2
self.fc = nn.Linear(256*2*2, num_classes)
def make_layer(self, block, out_channels, blocks, stride=1):
downsample = None
if (stride != 1) or (self.in_channels != out_channels): # the input channel is not consistant with the output's
downsample = nn.Sequential( # do the downsample, def a conv, for example: 256*256*16 -> 128*128*32
conv3x3(self.in_channels, out_channels, stride=stride),
nn.BatchNorm2d(out_channels))
layers = []
layers.append(block(self.in_channels, out_channels, stride, downsample))
self.in_channels = out_channels # update the input channel and the output channel
for i in range(1, blocks): # reduce a block because the first block is already appened
layers.append(block(out_channels, out_channels)) # 32*32 -> 8*8
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv(x)
out = self.bn(out)
out = self.relu(out)
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out=self.layer4(out)
out=self.layer5(out)
out = self.avg_pool(out)
out = out.view(out.size(0), -1)
out = self.fc(out)
return out
resnet = DataParallel(ResNet(ResidualBlock, [3, 3, 3]))
resnet.cuda()
# Loss and Optimizer
criterion = nn.MSELoss()
lr = 0.001
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
# Training
start=time.clock()
for epoch in range(50):
for i, (images, labels) in enumerate(train_loader):
images = Variable(images.cuda())
labels = Variable(labels.cuda())
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = resnet(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
if (i + 1) % 100 == 0:
print ("Epoch [%d/%d], Iter [%d/%d] Loss: %.4f" % (epoch + 1, 80, i + 1, 500, loss.data[0]))
# Decaying Learning Rate
if (epoch + 1) % 20 == 0:
lr /= 3
optimizer = torch.optim.Adam(resnet.parameters(), lr=lr)
elapsed=time.clock()-start
print("time used:",elapsed)
# # Test
# correct = 0
# total = 0
# for images, labels in test_loader:
# images = Variable(images.cuda())
# outputs = resnet(images)
# _, predicted = torch.max(outputs.data, 1)
# total += labels.size(0)
# correct += (predicted.cpu() == labels).sum()
#
# print('Accuracy of the model on the test images: %d %%' % (100 * correct / total))
# Save the Model
torch.save(resnet.state_dict(), 'resnet.pkl')
作为对比实验,我们同时把ResNet的声明方式修改为
resnet = ResNet(ResidualBlock, [3, 3, 3])
其余不变,再运行程序的时候不指定GPU,直接python resnet.py,在声明DataParallel时,运行耗时结果如下:
('time used:', 17124.861335999998),watch -n 1 nvidia-smi确实显示占用两块GPU
在不声明DataParallel时,实验运行���果耗时如下:
('time used:', 30318.149681000003),watch -n 1 nvidia-smi确实显示占用一块GPU
可以看出,在声明DataParallel时时间压缩了近一半,所以在声明DataParalle是使用多GPU运行Pytorch的一种方法。
官方的doc也给出了多GPU使用的例子以及部分数据在GPU与部分数据在CPU上运行的例子
以下是两组实验结果的输出:
DataParalle初始化
Epoch [1/80], Iter [100/500] Loss: 916.5578
Epoch [1/80], Iter [200/500] Loss: 172.2591
Epoch [1/80], Iter [300/500] Loss: 179.8360
Epoch [1/80], Iter [400/500] Loss: 259.6867
Epoch [1/80], Iter [500/500] Loss: 244.0616
Epoch [1/80], Iter [600/500] Loss: 74.7015
Epoch [1/80], Iter [700/500] Loss: 63.1657
Epoch [1/80], Iter [800/500] Loss: 90.3517
Epoch [1/80], Iter [900/500] Loss: 70.4562
Epoch [2/80], Iter [100/500] Loss: 52.3249
Epoch [2/80], Iter [200/500] Loss: 129.1855
Epoch [2/80], Iter [300/500] Loss: 110.0157
Epoch [2/80], Iter [400/500] Loss: 64.9313
Epoch [2/80], Iter [500/500] Loss: 87.8385
Epoch [2/80], Iter [600/500] Loss: 118.5828
Epoch [2/80], Iter [700/500] Loss: 123.9575
Epoch [2/80], Iter [800/500] Loss: 79.1908
Epoch [2/80], Iter [900/500] Loss: 61.8099
Epoch [3/80], Iter [100/500] Loss: 50.4294
Epoch [3/80], Iter [200/500] Loss: 106.8135
Epoch [3/80], Iter [300/500] Loss: 83.2198
Epoch [3/80], Iter [400/500] Loss: 60.7116
Epoch [3/80], Iter [500/500] Loss: 101.9553
Epoch [3/80], Iter [600/500] Loss: 64.6967
Epoch [3/80], Iter [700/500] Loss: 66.2446
Epoch [3/80], Iter [800/500] Loss: 81.1825
Epoch [3/80], Iter [900/500] Loss: 53.9905
Epoch [4/80], Iter [100/500] Loss: 76.2977
Epoch [4/80], Iter [200/500] Loss: 18.4255
Epoch [4/80], Iter [300/500] Loss: 57.6188
Epoch [4/80], Iter [400/500] Loss: 45.6235
Epoch [4/80], Iter [500/500] Loss: 82.9265
Epoch [4/80], Iter [600/500] Loss: 119.6085
Epoch [4/80], Iter [700/500] Loss: 53.1355
Epoch [4/80], Iter [800/500] Loss: 29.5248
Epoch [4/80], Iter [900/500] Loss: 57.0401
Epoch [5/80], Iter [100/500] Loss: 47.2671
Epoch [5/80], Iter [200/500] Loss: 31.6928
Epoch [5/80], Iter [300/500] Loss: 38.0040
Epoch [5/80], Iter [400/500] Loss: 24.5184
Epoch [5/80], Iter [500/500] Loss: 33.8515
Epoch [5/80], Iter [600/500] Loss: 43.6560
Epoch [5/80], Iter [700/500] Loss: 68.2500
Epoch [5/80], Iter [800/500] Loss: 30.8259
Epoch [5/80], Iter [900/500] Loss: 43.9696
Epoch [6/80], Iter [100/500] Loss: 22.4120
Epoch [6/80], Iter [200/500] Loss: 45.5722
Epoch [6/80], Iter [300/500] Loss: 26.8331
Epoch [6/80], Iter [400/500] Loss: 58.1139
Epoch [6/80], Iter [500/500] Loss: 12.8767
Epoch [6/80], Iter [600/500] Loss: 26.6725
Epoch [6/80], Iter [700/500] Loss: 31.9800
Epoch [6/80], Iter [800/500] Loss: 91.2332
Epoch [6/80], Iter [900/500] Loss: 44.1361
Epoch [7/80], Iter [100/500] Loss: 13.1401
Epoch [7/80], Iter [200/500] Loss: 20.9435
Epoch [7/80], Iter [300/500] Loss: 28.0944
Epoch [7/80], Iter [400/500] Loss: 24.0240
Epoch [7/80], Iter [500/500] Loss: 43.3279
Epoch [7/80], Iter [600/500] Loss: 23.3077
Epoch [7/80], Iter [700/500] Loss: 32.9658
Epoch [7/80], Iter [800/500] Loss: 27.2044
Epoch [7/80], Iter [900/500] Loss: 25.5850
Epoch [8/80], Iter [100/500] Loss: 39.7642
Epoch [8/80], Iter [200/500] Loss: 17.7421
Epoch [8/80], Iter [300/500] Loss: 29.8965
Epoch [8/80], Iter [400/500] Loss: 20.6153
Epoch [8/80], Iter [500/500] Loss: 43.0224
Epoch [8/80], Iter [600/500] Loss: 58.1552
Epoch [8/80], Iter [700/500] Loss: 19.1967
Epoch [8/80], Iter [800/500] Loss: 34.9122
Epoch [8/80], Iter [900/500] Loss: 15.0651
Epoch [9/80], Iter [100/500] Loss: 18.5950
Epoch [9/80], Iter [200/500] Loss: 36.1891
Epoch [9/80], Iter [300/500] Loss: 22.4936
Epoch [9/80], Iter [400/500] Loss: 14.8044
Epoch [9/80], Iter [500/500] Loss: 16.6958
Epoch [9/80], Iter [600/500] Loss: 24.8461
Epoch [9/80], Iter [700/500] Loss: 13.7112
Epoch [9/80], Iter [800/500] Loss: 21.2906
Epoch [9/80], Iter [900/500] Loss: 31.6950
Epoch [10/80], Iter [100/500] Loss: 20.7707
Epoch [10/80], Iter [200/500] Loss: 15.6260
Epoch [10/80], Iter [300/500] Loss: 28.5737
Epoch [10/80], Iter [400/500] Loss: 36.6791
Epoch [10/80], Iter [500/500] Loss: 38.9839
Epoch [10/80], Iter [600/500] Loss: 14.4459
Epoch [10/80], Iter [700/500] Loss: 10.0907
Epoch [10/80], Iter [800/500] Loss: 17.9035
Epoch [10/80], Iter [900/500] Loss: 24.5759
Epoch [11/80], Iter [100/500] Loss: 19.8531
Epoch [11/80], Iter [200/500] Loss: 15.7126
Epoch [11/80], Iter [300/500] Loss: 18.0198
Epoch [11/80], Iter [400/500] Loss: 19.3038
Epoch [11/80], Iter [500/500] Loss: 27.4435
Epoch [11/80], Iter [600/500] Loss: 18.1086
Epoch [11/80], Iter [700/500] Loss: 10.8124
Epoch [11/80], Iter [800/500] Loss: 31.2389
Epoch [11/80], Iter [900/500] Loss: 14.4881
Epoch [12/80], Iter [100/500] Loss: 10.6320
Epoch [12/80], Iter [200/500] Loss: 26.8394
Epoch [12/80], Iter [300/500] Loss: 16.0246
Epoch [12/80], Iter [400/500] Loss: 16.3263
Epoch [12/80], Iter [500/500] Loss: 24.5880
Epoch [12/80], Iter [600/500] Loss: 15.7498
Epoch [12/80], Iter [700/500] Loss: 11.4933
Epoch [12/80], Iter [800/500] Loss: 9.7252
Epoch [12/80], Iter [900/500] Loss: 31.6774
Epoch [13/80], Iter [100/500] Loss: 21.1929
Epoch [13/80], Iter [200/500] Loss: 17.0953
Epoch [13/80], Iter [300/500] Loss: 21.1883
Epoch [13/80], Iter [400/500] Loss: 15.9005
Epoch [13/80], Iter [500/500] Loss: 14.7924
Epoch [13/80], Iter [600/500] Loss: 12.4324
Epoch [13/80], Iter [700/500] Loss: 12.0840
Epoch [13/80], Iter [800/500] Loss: 30.9664
Epoch [13/80], Iter [900/500] Loss: 14.9601
Epoch [14/80], Iter [100/500] Loss: 6.5126
Epoch [14/80], Iter [200/500] Loss: 11.3227
Epoch [14/80], Iter [300/500] Loss: 12.9980
Epoch [14/80], Iter [400/500] Loss: 13.8523
Epoch [14/80], Iter [500/500] Loss: 10.6771
Epoch [14/80], Iter [600/500] Loss: 7.3953
Epoch [14/80], Iter [700/500] Loss: 14.6829
Epoch [14/80], Iter [800/500] Loss: 15.6956
Epoch [14/80], Iter [900/500] Loss: 21.8876
Epoch [15/80], Iter [100/500] Loss: 5.1943
Epoch [15/80], Iter [200/500] Loss: 13.0731
Epoch [15/80], Iter [300/500] Loss: 6.8931
Epoch [15/80], Iter [400/500] Loss: 15.3212
Epoch [15/80], Iter [500/500] Loss: 8.1775
Epoch [15/80], Iter [600/500] Loss: 11.5664
Epoch [15/80], Iter [700/500] Loss: 5.5951
Epoch [15/80], Iter [800/500] Loss: 10.9075
Epoch [15/80], Iter [900/500] Loss: 14.8503
Epoch [16/80], Iter [100/500] Loss: 19.5184
Epoch [16/80], Iter [200/500] Loss: 10.3570
Epoch [16/80], Iter [300/500] Loss: 10.0997
Epoch [16/80], Iter [400/500] Loss: 9.7350
Epoch [16/80], Iter [500/500] Loss: 11.3000
Epoch [16/80], Iter [600/500] Loss: 21.6213
Epoch [16/80], Iter [700/500] Loss: 9.7907
Epoch [16/80], Iter [800/500] Loss: 10.0128
Epoch [16/80], Iter [900/500] Loss: 10.7869
Epoch [17/80], Iter [100/500] Loss: 9.2015
Epoch [17/80], Iter [200/500] Loss: 7.3021
Epoch [17/80], Iter [300/500] Loss: 5.9662
Epoch [17/80], Iter [400/500] Loss: 17.5215
Epoch [17/80], Iter [500/500] Loss: 7.3349
Epoch [17/80], Iter [600/500] Loss: 8.5626
Epoch [17/80], Iter [700/500] Loss: 12.7575
Epoch [17/80], Iter [800/500] Loss: 10.7792
Epoch [17/80], Iter [900/500] Loss: 7.0889
Epoch [18/80], Iter [100/500] Loss: 10.5613
Epoch [18/80], Iter [200/500] Loss: 3.0777
Epoch [18/80], Iter [300/500] Loss: 6.3598
Epoch [18/80], Iter [400/500] Loss: 7.9515
Epoch [18/80], Iter [500/500] Loss: 10.8023
Epoch [18/80], Iter [600/500] Loss: 7.3443
Epoch [18/80], Iter [700/500] Loss: 8.0862
Epoch [18/80], Iter [800/500] Loss: 15.2795
Epoch [18/80], Iter [900/500] Loss: 10.2788
Epoch [19/80], Iter [100/500] Loss: 5.0786
Epoch [19/80], Iter [200/500] Loss: 8.8248
Epoch [19/80], Iter [300/500] Loss: 4.9262
Epoch [19/80], Iter [400/500] Loss: 7.8992
Epoch [19/80], Iter [500/500] Loss: 13.1279
Epoch [19/80], Iter [600/500] Loss: 8.2703
Epoch [19/80], Iter [700/500] Loss: 4.1547
Epoch [19/80], Iter [800/500] Loss: 9.0542
Epoch [19/80], Iter [900/500] Loss: 6.7904
Epoch [20/80], Iter [100/500] Loss: 8.6150
Epoch [20/80], Iter [200/500] Loss: 3.7212
Epoch [20/80], Iter [300/500] Loss: 6.2832
Epoch [20/80], Iter [400/500] Loss: 10.1591
Epoch [20/80], Iter [500/500] Loss: 9.7668
Epoch [20/80], Iter [600/500] Loss: 4.7498
Epoch [20/80], Iter [700/500] Loss: 4.8831
Epoch [20/80], Iter [800/500] Loss: 7.7877
Epoch [20/80], Iter [900/500] Loss: 8.5114
Epoch [21/80], Iter [100/500] Loss: 2.1853
Epoch [21/80], Iter [200/500] Loss: 5.8741
Epoch [21/80], Iter [300/500] Loss: 5.3676
Epoch [21/80], Iter [400/500] Loss: 3.1155
Epoch [21/80], Iter [500/500] Loss: 4.2433
Epoch [21/80], Iter [600/500] Loss: 1.9783
Epoch [21/80], Iter [700/500] Loss: 2.7622
Epoch [21/80], Iter [800/500] Loss: 2.0112
Epoch [21/80], Iter [900/500] Loss: 2.2692
Epoch [22/80], Iter [100/500] Loss: 2.1882
Epoch [22/80], Iter [200/500] Loss: 4.2540
Epoch [22/80], Iter [300/500] Loss: 4.0126
Epoch [22/80], Iter [400/500] Loss: 2.2220
Epoch [22/80], Iter [500/500] Loss: 2.4755
Epoch [22/80], Iter [600/500] Loss: 3.0793
Epoch [22/80], Iter [700/500] Loss: 1.9128
Epoch [22/80], Iter [800/500] Loss: 4.8721
Epoch [22/80], Iter [900/500] Loss: 2.1349
Epoch [23/80], Iter [100/500] Loss: 1.8705
Epoch [23/80], Iter [200/500] Loss: 2.4326
Epoch [23/80], Iter [300/500] Loss: 1.5636
Epoch [23/80], Iter [400/500] Loss: 2.0465
Epoch [23/80], Iter [500/500] Loss: 1.5183
Epoch [23/80], Iter [600/500] Loss: 2.2711
Epoch [23/80], Iter [700/500] Loss: 2.8997
Epoch [23/80], Iter [800/500] Loss: 2.6150
Epoch [23/80], Iter [900/500] Loss: 2.8083
Epoch [24/80], Iter [100/500] Loss: 2.7177
Epoch [24/80], Iter [200/500] Loss: 3.2044
Epoch [24/80], Iter [300/500] Loss: 3.8137
Epoch [24/80], Iter [400/500] Loss: 1.9400
Epoch [24/80], Iter [500/500] Loss: 2.3550
Epoch [24/80], Iter [600/500] Loss: 1.6304
Epoch [24/80], Iter [700/500] Loss: 1.1287
Epoch [24/80], Iter [800/500] Loss: 2.1436
Epoch [24/80], Iter [900/500] Loss: 1.3761
Epoch [25/80], Iter [100/500] Loss: 1.9115
Epoch [25/80], Iter [200/500] Loss: 0.9423
Epoch [25/80], Iter [300/500] Loss: 1.1732
Epoch [25/80], Iter [400/500] Loss: 1.8946
Epoch [25/80], Iter [500/500] Loss: 1.4359
Epoch [25/80], Iter [600/500] Loss: 2.7499
Epoch [25/80], Iter [700/500] Loss: 3.2734
Epoch [25/80], Iter [800/500] Loss: 1.5863
Epoch [25/80], Iter [900/500] Loss: 2.8276
Epoch [26/80], Iter [100/500] Loss: 3.3783
Epoch [26/80], Iter [200/500] Loss: 1.6336
Epoch [26/80], Iter [300/500] Loss: 1.8298
Epoch [26/80], Iter [400/500] Loss: 1.1775
Epoch [26/80], Iter [500/500] Loss: 2.5811
Epoch [26/80], Iter [600/500] Loss: 1.2587
Epoch [26/80], Iter [700/500] Loss: 2.3547
Epoch [26/80], Iter [800/500] Loss: 3.2238
Epoch [26/80], Iter [900/500] Loss: 1.8571
Epoch [27/80], Iter [100/500] Loss: 1.9582
Epoch [27/80], Iter [200/500] Loss: 0.8752
Epoch [27/80], Iter [300/500] Loss: 1.5140
Epoch [27/80], Iter [400/500] Loss: 1.4624
Epoch [27/80], Iter [500/500] Loss: 3.6735
Epoch [27/80], Iter [600/500] Loss: 2.5618
Epoch [27/80], Iter [700/500] Loss: 1.3707
Epoch [27/80], Iter [800/500] Loss: 1.2286
Epoch [27/80], Iter [900/500] Loss: 2.4623
Epoch [28/80], Iter [100/500] Loss: 0.8966
Epoch [28/80], Iter [200/500] Loss: 1.4363
Epoch [28/80], Iter [300/500] Loss: 1.3229
Epoch [28/80], Iter [400/500] Loss: 1.4402
Epoch [28/80], Iter [500/500] Loss: 1.4920
Epoch [28/80], Iter [600/500] Loss: 1.9604
Epoch [28/80], Iter [700/500] Loss: 3.1165
Epoch [28/80], Iter [800/500] Loss: 1.0391
Epoch [28/80], Iter [900/500] Loss: 2.5201
Epoch [29/80], Iter [100/500] Loss: 1.8787
Epoch [29/80], Iter [200/500] Loss: 0.9840
Epoch [29/80], Iter [300/500] Loss: 1.4460
Epoch [29/80], Iter [400/500] Loss: 2.2886
Epoch [29/80], Iter [500/500] Loss: 1.4231
Epoch [29/80], Iter [600/500] Loss: 1.4980
Epoch [29/80], Iter [700/500] Loss: 2.3995
Epoch [29/80], Iter [800/500] Loss: 1.7662
Epoch [29/80], Iter [900/500] Loss: 2.3659
Epoch [30/80], Iter [100/500] Loss: 1.9505
Epoch [30/80], Iter [200/500] Loss: 1.1663
Epoch [30/80], Iter [300/500] Loss: 0.9471
Epoch [30/80], Iter [400/500] Loss: 0.9364
Epoch [30/80], Iter [500/500] Loss: 1.0124
Epoch [30/80], Iter [600/500] Loss: 1.2437
Epoch [30/80], Iter [700/500] Loss: 0.8796
Epoch [30/80], Iter [800/500] Loss: 1.2183
Epoch [30/80], Iter [900/500] Loss: 2.3959
Epoch [31/80], Iter [100/500] Loss: 1.4337
Epoch [31/80], Iter [200/500] Loss: 1.1861
Epoch [31/80], Iter [300/500] Loss: 1.2915
Epoch [31/80], Iter [400/500] Loss: 1.0188
Epoch [31/80], Iter [500/500] Loss: 2.2067
Epoch [31/80], Iter [600/500] Loss: 2.6476
Epoch [31/80], Iter [700/500] Loss: 1.1402
Epoch [31/80], Iter [800/500] Loss: 1.4248
Epoch [31/80], Iter [900/500] Loss: 1.0669
Epoch [32/80], Iter [100/500] Loss: 1.5955
Epoch [32/80], Iter [200/500] Loss: 1.7216
Epoch [32/80], Iter [300/500] Loss: 1.2304
Epoch [32/80], Iter [400/500] Loss: 1.7058
Epoch [32/80], Iter [500/500] Loss: 1.2115
Epoch [32/80], Iter [600/500] Loss: 1.6176
Epoch [32/80], Iter [700/500] Loss: 1.3043
Epoch [32/80], Iter [800/500] Loss: 1.9501
Epoch [32/80], Iter [900/500] Loss: 1.9035
Epoch [33/80], Iter [100/500] Loss: 1.9505
Epoch [33/80], Iter [200/500] Loss: 1.5603
Epoch [33/80], Iter [300/500] Loss: 1.5528
Epoch [33/80], Iter [400/500] Loss: 1.4192
Epoch [33/80], Iter [500/500] Loss: 1.2211
Epoch [33/80], Iter [600/500] Loss: 1.3927
Epoch [33/80], Iter [700/500] Loss: 2.3885
Epoch [33/80], Iter [800/500] Loss: 1.0948
Epoch [33/80], Iter [900/500] Loss: 1.6951
Epoch [34/80], Iter [100/500] Loss: 0.9534
Epoch [34/80], Iter [200/500] Loss: 0.7364
Epoch [34/80], Iter [300/500] Loss: 1.2372
Epoch [34/80], Iter [400/500] Loss: 1.6718
Epoch [34/80], Iter [500/500] Loss: 0.7804
Epoch [34/80], Iter [600/500] Loss: 2.1848
Epoch [34/80], Iter [700/500] Loss: 0.6333
Epoch [34/80], Iter [800/500] Loss: 1.6399
Epoch [34/80], Iter [900/500] Loss: 0.9555
Epoch [35/80], Iter [100/500] Loss: 1.5851
Epoch [35/80], Iter [200/500] Loss: 3.7824
Epoch [35/80], Iter [300/500] Loss: 2.5642
Epoch [35/80], Iter [400/500] Loss: 0.8965
Epoch [35/80], Iter [500/500] Loss: 1.9092
Epoch [35/80], Iter [600/500] Loss: 1.3729
Epoch [35/80], Iter [700/500] Loss: 2.2079
Epoch [35/80], Iter [800/500] Loss: 0.9051
Epoch [35/80], Iter [900/500] Loss: 1.1845
Epoch [36/80], Iter [100/500] Loss: 0.8240
Epoch [36/80], Iter [200/500] Loss: 1.1929
Epoch [36/80], Iter [300/500] Loss: 1.7051
Epoch [36/80], Iter [400/500] Loss: 0.7341
Epoch [36/80], Iter [500/500] Loss: 0.8078
Epoch [36/80], Iter [600/500] Loss: 0.7525
Epoch [36/80], Iter [700/500] Loss: 1.5739
Epoch [36/80], Iter [800/500] Loss: 1.3938
Epoch [36/80], Iter [900/500] Loss: 0.7145
Epoch [37/80], Iter [100/500] Loss: 0.9577
Epoch [37/80], Iter [200/500] Loss: 0.9464
Epoch [37/80], Iter [300/500] Loss: 1.0931
Epoch [37/80], Iter [400/500] Loss: 1.0390
Epoch [37/80], Iter [500/500] Loss: 1.3472
Epoch [37/80], Iter [600/500] Loss: 0.6312
Epoch [37/80], Iter [700/500] Loss: 0.6754
Epoch [37/80], Iter [800/500] Loss: 0.5888
Epoch [37/80], Iter [900/500] Loss: 3.1377
Epoch [38/80], Iter [100/500] Loss: 0.8339
Epoch [38/80], Iter [200/500] Loss: 0.9345
Epoch [38/80], Iter [300/500] Loss: 0.6615
Epoch [38/80], Iter [400/500] Loss: 1.6327
Epoch [38/80], Iter [500/500] Loss: 0.4701
Epoch [38/80], Iter [600/500] Loss: 1.1513
Epoch [38/80], Iter [700/500] Loss: 0.9013
Epoch [38/80], Iter [800/500] Loss: 2.7680
Epoch [38/80], Iter [900/500] Loss: 1.2733
Epoch [39/80], Iter [100/500] Loss: 3.0368
Epoch [39/80], Iter [200/500] Loss: 1.5569
Epoch [39/80], Iter [300/500] Loss: 0.5049
Epoch [39/80], Iter [400/500] Loss: 0.4075
Epoch [39/80], Iter [500/500] Loss: 0.9771
Epoch [39/80], Iter [600/500] Loss: 0.9003
Epoch [39/80], Iter [700/500] Loss: 1.6323
Epoch [39/80], Iter [800/500] Loss: 0.4881
Epoch [39/80], Iter [900/500] Loss: 2.1344
Epoch [40/80], Iter [100/500] Loss: 1.2439
Epoch [40/80], Iter [200/500] Loss: 1.3419
Epoch [40/80], Iter [300/500] Loss: 0.9575
Epoch [40/80], Iter [400/500] Loss: 1.4438
Epoch [40/80], Iter [500/500] Loss: 0.8559
Epoch [40/80], Iter [600/500] Loss: 1.0400
Epoch [40/80], Iter [700/500] Loss: 0.9063
Epoch [40/80], Iter [800/500] Loss: 1.0714
Epoch [40/80], Iter [900/500] Loss: 0.5098
Epoch [41/80], Iter [100/500] Loss: 0.5906
Epoch [41/80], Iter [200/500] Loss: 0.6610
Epoch [41/80], Iter [300/500] Loss: 0.4230
Epoch [41/80], Iter [400/500] Loss: 0.6014
Epoch [41/80], Iter [500/500] Loss: 0.3004
Epoch [41/80], Iter [600/500] Loss: 0.5606
Epoch [41/80], Iter [700/500] Loss: 0.4994
Epoch [41/80], Iter [800/500] Loss: 0.8664
Epoch [41/80], Iter [900/500] Loss: 0.5302
Epoch [42/80], Iter [100/500] Loss: 0.2961
Epoch [42/80], Iter [200/500] Loss: 0.2826
Epoch [42/80], Iter [300/500] Loss: 0.3575
Epoch [42/80], Iter [400/500] Loss: 0.3224
Epoch [42/80], Iter [500/500] Loss: 0.6851
Epoch [42/80], Iter [600/500] Loss: 0.2997
Epoch [42/80], Iter [700/500] Loss: 0.3907
Epoch [42/80], Iter [800/500] Loss: 0.4437
Epoch [42/80], Iter [900/500] Loss: 0.4847
Epoch [43/80], Iter [100/500] Loss: 0.5418
Epoch [43/80], Iter [200/500] Loss: 0.4099
Epoch [43/80], Iter [300/500] Loss: 0.3339
Epoch [43/80], Iter [400/500] Loss: 0.5546
Epoch [43/80], Iter [500/500] Loss: 0.5867
Epoch [43/80], Iter [600/500] Loss: 0.3540
Epoch [43/80], Iter [700/500] Loss: 0.4656
Epoch [43/80], Iter [800/500] Loss: 0.2922
Epoch [43/80], Iter [900/500] Loss: 0.3042
Epoch [44/80], Iter [100/500] Loss: 0.6309
Epoch [44/80], Iter [200/500] Loss: 0.2412
Epoch [44/80], Iter [300/500] Loss: 0.5505
Epoch [44/80], Iter [400/500] Loss: 0.4133
Epoch [44/80], Iter [500/500] Loss: 0.4317
Epoch [44/80], Iter [600/500] Loss: 0.4152
Epoch [44/80], Iter [700/500] Loss: 0.6375
Epoch [44/80], Iter [800/500] Loss: 0.3283
Epoch [44/80], Iter [900/500] Loss: 0.4399
Epoch [45/80], Iter [100/500] Loss: 0.2777
Epoch [45/80], Iter [200/500] Loss: 0.3131
Epoch [45/80], Iter [300/500] Loss: 0.2451
Epoch [45/80], Iter [400/500] Loss: 0.5350
Epoch [45/80], Iter [500/500] Loss: 0.2501
Epoch [45/80], Iter [600/500] Loss: 0.2076
Epoch [45/80], Iter [700/500] Loss: 0.2317
Epoch [45/80], Iter [800/500] Loss: 0.8772
Epoch [45/80], Iter [900/500] Loss: 0.4162
Epoch [46/80], Iter [100/500] Loss: 0.3190
Epoch [46/80], Iter [200/500] Loss: 0.2458
Epoch [46/80], Iter [300/500] Loss: 0.2976
Epoch [46/80], Iter [400/500] Loss: 0.3712
Epoch [46/80], Iter [500/500] Loss: 0.4305
Epoch [46/80], Iter [600/500] Loss: 0.5143
Epoch [46/80], Iter [700/500] Loss: 0.2622
Epoch [46/80], Iter [800/500] Loss: 0.5331
Epoch [46/80], Iter [900/500] Loss: 0.3598
Epoch [47/80], Iter [100/500] Loss: 0.2180
Epoch [47/80], Iter [200/500] Loss: 0.2275
Epoch [47/80], Iter [300/500] Loss: 0.5302
Epoch [47/80], Iter [400/500] Loss: 0.3535
Epoch [47/80], Iter [500/500] Loss: 0.5790
Epoch [47/80], Iter [600/500] Loss: 0.3741
Epoch [47/80], Iter [700/500] Loss: 0.5120
Epoch [47/80], Iter [800/500] Loss: 0.6204
Epoch [47/80], Iter [900/500] Loss: 0.4902
Epoch [48/80], Iter [100/500] Loss: 0.2668
Epoch [48/80], Iter [200/500] Loss: 0.5693
Epoch [48/80], Iter [300/500] Loss: 0.3328
Epoch [48/80], Iter [400/500] Loss: 0.2399
Epoch [48/80], Iter [500/500] Loss: 0.3160
Epoch [48/80], Iter [600/500] Loss: 0.2944
Epoch [48/80], Iter [700/500] Loss: 0.2742
Epoch [48/80], Iter [800/500] Loss: 0.5297
Epoch [48/80], Iter [900/500] Loss: 0.3755
Epoch [49/80], Iter [100/500] Loss: 0.2658
Epoch [49/80], Iter [200/500] Loss: 0.2223
Epoch [49/80], Iter [300/500] Loss: 0.4348
Epoch [49/80], Iter [400/500] Loss: 0.2313
Epoch [49/80], Iter [500/500] Loss: 0.2838
Epoch [49/80], Iter [600/500] Loss: 0.3415
Epoch [49/80], Iter [700/500] Loss: 0.3633
Epoch [49/80], Iter [800/500] Loss: 0.3768
Epoch [49/80], Iter [900/500] Loss: 0.5177
Epoch [50/80], Iter [100/500] Loss: 0.3538
Epoch [50/80], Iter [200/500] Loss: 0.2759
Epoch [50/80], Iter [300/500] Loss: 0.2255
Epoch [50/80], Iter [400/500] Loss: 0.3148
Epoch [50/80], Iter [500/500] Loss: 0.4502
Epoch [50/80], Iter [600/500] Loss: 0.3382
Epoch [50/80], Iter [700/500] Loss: 0.8207
Epoch [50/80], Iter [800/500] Loss: 0.3541
Epoch [50/80], Iter [900/500] Loss: 0.4090
('time used:', 17124.861335999998)
未被DaraParallel初始化
Epoch [1/80], Iter [100/500] Loss: 635.6779
Epoch [1/80], Iter [200/500] Loss: 247.5514
Epoch [1/80], Iter [300/500] Loss: 231.7609
Epoch [1/80], Iter [400/500] Loss: 198.7304
Epoch [1/80], Iter [500/500] Loss: 207.1028
Epoch [1/80], Iter [600/500] Loss: 114.7708
Epoch [1/80], Iter [700/500] Loss: 126.9886
Epoch [1/80], Iter [800/500] Loss: 160.8622
Epoch [1/80], Iter [900/500] Loss: 153.8121
Epoch [2/80], Iter [100/500] Loss: 106.6578
Epoch [2/80], Iter [200/500] Loss: 91.5044
Epoch [2/80], Iter [300/500] Loss: 111.4231
Epoch [2/80], Iter [400/500] Loss: 50.7004
Epoch [2/80], Iter [500/500] Loss: 58.9242
Epoch [2/80], Iter [600/500] Loss: 55.2035
Epoch [2/80], Iter [700/500] Loss: 26.7637
Epoch [2/80], Iter [800/500] Loss: 52.5472
Epoch [2/80], Iter [900/500] Loss: 51.7907
Epoch [3/80], Iter [100/500] Loss: 35.7970
Epoch [3/80], Iter [200/500] Loss: 59.1204
Epoch [3/80], Iter [300/500] Loss: 70.5727
Epoch [3/80], Iter [400/500] Loss: 50.1149
Epoch [3/80], Iter [500/500] Loss: 26.3628
Epoch [3/80], Iter [600/500] Loss: 67.3355
Epoch [3/80], Iter [700/500] Loss: 56.8271
Epoch [3/80], Iter [800/500] Loss: 46.5803
Epoch [3/80], Iter [900/500] Loss: 34.9568
Epoch [4/80], Iter [100/500] Loss: 67.0837
Epoch [4/80], Iter [200/500] Loss: 36.8596
Epoch [4/80], Iter [300/500] Loss: 37.6830
Epoch [4/80], Iter [400/500] Loss: 52.1378
Epoch [4/80], Iter [500/500] Loss: 104.5909
Epoch [4/80], Iter [600/500] Loss: 71.3509
Epoch [4/80], Iter [700/500] Loss: 28.4496
Epoch [4/80], Iter [800/500] Loss: 56.1399
Epoch [4/80], Iter [900/500] Loss: 58.7510
Epoch [5/80], Iter [100/500] Loss: 42.5710
Epoch [5/80], Iter [200/500] Loss: 25.5430
Epoch [5/80], Iter [300/500] Loss: 25.9271
Epoch [5/80], Iter [400/500] Loss: 75.8942
Epoch [5/80], Iter [500/500] Loss: 70.6782
Epoch [5/80], Iter [600/500] Loss: 10.7801
Epoch [5/80], Iter [700/500] Loss: 29.9416
Epoch [5/80], Iter [800/500] Loss: 47.0781
Epoch [5/80], Iter [900/500] Loss: 45.4692
Epoch [6/80], Iter [100/500] Loss: 51.3811
Epoch [6/80], Iter [200/500] Loss: 30.6207
Epoch [6/80], Iter [300/500] Loss: 35.4928
Epoch [6/80], Iter [400/500] Loss: 37.9467
Epoch [6/80], Iter [500/500] Loss: 36.7505
Epoch [6/80], Iter [600/500] Loss: 64.3528
Epoch [6/80], Iter [700/500] Loss: 73.6308
Epoch [6/80], Iter [800/500] Loss: 33.1290
Epoch [6/80], Iter [900/500] Loss: 34.2442
Epoch [7/80], Iter [100/500] Loss: 34.9157
Epoch [7/80], Iter [200/500] Loss: 26.8041
Epoch [7/80], Iter [300/500] Loss: 43.5796
Epoch [7/80], Iter [400/500] Loss: 31.5104
Epoch [7/80], Iter [500/500] Loss: 41.2132
Epoch [7/80], Iter [600/500] Loss: 23.1634
Epoch [7/80], Iter [700/500] Loss: 26.7399
Epoch [7/80], Iter [800/500] Loss: 60.4979
Epoch [7/80], Iter [900/500] Loss: 32.8528
Epoch [8/80], Iter [100/500] Loss: 36.6079
Epoch [8/80], Iter [200/500] Loss: 49.1552
Epoch [8/80], Iter [300/500] Loss: 21.2926
Epoch [8/80], Iter [400/500] Loss: 33.5335
Epoch [8/80], Iter [500/500] Loss: 50.1770
Epoch [8/80], Iter [600/500] Loss: 21.9908
Epoch [8/80], Iter [700/500] Loss: 40.2040
Epoch [8/80], Iter [800/500] Loss: 22.5460
Epoch [8/80], Iter [900/500] Loss: 43.9564
Epoch [9/80], Iter [100/500] Loss: 19.8116
Epoch [9/80], Iter [200/500] Loss: 8.5169
Epoch [9/80], Iter [300/500] Loss: 37.0475
Epoch [9/80], Iter [400/500] Loss: 74.2606
Epoch [9/80], Iter [500/500] Loss: 16.3256
Epoch [9/80], Iter [600/500] Loss: 26.0609
Epoch [9/80], Iter [700/500] Loss: 24.3721
Epoch [9/80], Iter [800/500] Loss: 37.5132
Epoch [9/80], Iter [900/500] Loss: 27.4818
Epoch [10/80], Iter [100/500] Loss: 11.7654
Epoch [10/80], Iter [200/500] Loss: 9.3536
Epoch [10/80], Iter [300/500] Loss: 11.6718
Epoch [10/80], Iter [400/500] Loss: 24.4423
Epoch [10/80], Iter [500/500] Loss: 25.6966
Epoch [10/80], Iter [600/500] Loss: 35.2358
Epoch [10/80], Iter [700/500] Loss: 17.2685
Epoch [10/80], Iter [800/500] Loss: 22.3965
Epoch [10/80], Iter [900/500] Loss: 42.6901
Epoch [11/80], Iter [100/500] Loss: 17.9832
Epoch [11/80], Iter [200/500] Loss: 18.8705
Epoch [11/80], Iter [300/500] Loss: 25.3700
Epoch [11/80], Iter [400/500] Loss: 10.8511
Epoch [11/80], Iter [500/500] Loss: 18.3028
Epoch [11/80], Iter [600/500] Loss: 23.2316
Epoch [11/80], Iter [700/500] Loss: 10.2498
Epoch [11/80], Iter [800/500] Loss: 14.7609
Epoch [11/80], Iter [900/500] Loss: 20.1801
Epoch [12/80], Iter [100/500] Loss: 23.8675
Epoch [12/80], Iter [200/500] Loss: 15.7924
Epoch [12/80], Iter [300/500] Loss: 13.7092
Epoch [12/80], Iter [400/500] Loss: 12.0196
Epoch [12/80], Iter [500/500] Loss: 7.2408
Epoch [12/80], Iter [600/500] Loss: 10.7912
Epoch [12/80], Iter [700/500] Loss: 11.9665
Epoch [12/80], Iter [800/500] Loss: 13.7599
Epoch [12/80], Iter [900/500] Loss: 18.3869
Epoch [13/80], Iter [100/500] Loss: 11.1715
Epoch [13/80], Iter [200/500] Loss: 17.6397
Epoch [13/80], Iter [300/500] Loss: 9.3256
Epoch [13/80], Iter [400/500] Loss: 12.7995
Epoch [13/80], Iter [500/500] Loss: 7.8598
Epoch [13/80], Iter [600/500] Loss: 10.7001
Epoch [13/80], Iter [700/500] Loss: 26.3672
Epoch [13/80], Iter [800/500] Loss: 15.4815
Epoch [13/80], Iter [900/500] Loss: 14.0478
Epoch [14/80], Iter [100/500] Loss: 16.0473
Epoch [14/80], Iter [200/500] Loss: 4.7192
Epoch [14/80], Iter [300/500] Loss: 10.7586
Epoch [14/80], Iter [400/500] Loss: 13.6734
Epoch [14/80], Iter [500/500] Loss: 9.3228
Epoch [14/80], Iter [600/500] Loss: 5.5830
Epoch [14/80], Iter [700/500] Loss: 7.5252
Epoch [14/80], Iter [800/500] Loss: 7.6239
Epoch [14/80], Iter [900/500] Loss: 7.1024
Epoch [15/80], Iter [100/500] Loss: 17.5188
Epoch [15/80], Iter [200/500] Loss: 11.8842
Epoch [15/80], Iter [300/500] Loss: 9.0330
Epoch [15/80], Iter [400/500] Loss: 11.7120
Epoch [15/80], Iter [500/500] Loss: 17.0862
Epoch [15/80], Iter [600/500] Loss: 11.4103
Epoch [15/80], Iter [700/500] Loss: 12.2746
Epoch [15/80], Iter [800/500] Loss: 13.6224
Epoch [15/80], Iter [900/500] Loss: 12.7686
Epoch [16/80], Iter [100/500] Loss: 5.5978
Epoch [16/80], Iter [200/500] Loss: 12.2122
Epoch [16/80], Iter [300/500] Loss: 5.1189
Epoch [16/80], Iter [400/500] Loss: 14.1793
Epoch [16/80], Iter [500/500] Loss: 10.3744
Epoch [16/80], Iter [600/500] Loss: 5.2099
Epoch [16/80], Iter [700/500] Loss: 6.7522
Epoch [16/80], Iter [800/500] Loss: 13.2532
Epoch [16/80], Iter [900/500] Loss: 6.7040
Epoch [17/80], Iter [100/500] Loss: 10.7390
Epoch [17/80], Iter [200/500] Loss: 8.1525
Epoch [17/80], Iter [300/500] Loss: 14.2229
Epoch [17/80], Iter [400/500] Loss: 7.6302
Epoch [17/80], Iter [500/500] Loss: 6.4554
Epoch [17/80], Iter [600/500] Loss: 8.2380
Epoch [17/80], Iter [700/500] Loss: 6.4445
Epoch [17/80], Iter [800/500] Loss: 8.4644
Epoch [17/80], Iter [900/500] Loss: 9.0200
Epoch [18/80], Iter [100/500] Loss: 9.5088
Epoch [18/80], Iter [200/500] Loss: 3.8648
Epoch [18/80], Iter [300/500] Loss: 8.8408
Epoch [18/80], Iter [400/500] Loss: 7.4195
Epoch [18/80], Iter [500/500] Loss: 15.0480
Epoch [18/80], Iter [600/500] Loss: 5.6232
Epoch [18/80], Iter [700/500] Loss: 5.2233
Epoch [18/80], Iter [800/500] Loss: 6.5702
Epoch [18/80], Iter [900/500] Loss: 13.7427
Epoch [19/80], Iter [100/500] Loss: 3.5658
Epoch [19/80], Iter [200/500] Loss: 4.7062
Epoch [19/80], Iter [300/500] Loss: 10.7831
Epoch [19/80], Iter [400/500] Loss: 13.1375
Epoch [19/80], Iter [500/500] Loss: 22.2764
Epoch [19/80], Iter [600/500] Loss: 10.3463
Epoch [19/80], Iter [700/500] Loss: 7.2373
Epoch [19/80], Iter [800/500] Loss: 5.5266
Epoch [19/80], Iter [900/500] Loss: 9.2434
Epoch [20/80], Iter [100/500] Loss: 7.8164
Epoch [20/80], Iter [200/500] Loss: 9.6628
Epoch [20/80], Iter [300/500] Loss: 4.1032
Epoch [20/80], Iter [400/500] Loss: 16.5922
Epoch [20/80], Iter [500/500] Loss: 6.9907
Epoch [20/80], Iter [600/500] Loss: 10.9906
Epoch [20/80], Iter [700/500] Loss: 8.5092
Epoch [20/80], Iter [800/500] Loss: 7.1332
Epoch [20/80], Iter [900/500] Loss: 6.1639
Epoch [21/80], Iter [100/500] Loss: 6.3100
Epoch [21/80], Iter [200/500] Loss: 4.5190
Epoch [21/80], Iter [300/500] Loss: 4.3493
Epoch [21/80], Iter [400/500] Loss: 7.9860
Epoch [21/80], Iter [500/500] Loss: 8.8312
Epoch [21/80], Iter [600/500] Loss: 10.7502
Epoch [21/80], Iter [700/500] Loss: 3.2116
Epoch [21/80], Iter [800/500] Loss: 4.0126
Epoch [21/80], Iter [900/500] Loss: 5.3675
Epoch [22/80], Iter [100/500] Loss: 1.4893
Epoch [22/80], Iter [200/500] Loss: 1.6984
Epoch [22/80], Iter [300/500] Loss: 2.6195
Epoch [22/80], Iter [400/500] Loss: 2.1465
Epoch [22/80], Iter [500/500] Loss: 2.9847
Epoch [22/80], Iter [600/500] Loss: 4.9699
Epoch [22/80], Iter [700/500] Loss: 1.6728
Epoch [22/80], Iter [800/500] Loss: 1.3381
Epoch [22/80], Iter [900/500] Loss: 2.0680
Epoch [23/80], Iter [100/500] Loss: 1.9145
Epoch [23/80], Iter [200/500] Loss: 0.9280
Epoch [23/80], Iter [300/500] Loss: 2.9585
Epoch [23/80], Iter [400/500] Loss: 1.0787
Epoch [23/80], Iter [500/500] Loss: 3.1779
Epoch [23/80], Iter [600/500] Loss: 2.4411
Epoch [23/80], Iter [700/500] Loss: 2.0049
Epoch [23/80], Iter [800/500] Loss: 2.2844
Epoch [23/80], Iter [900/500] Loss: 2.2328
Epoch [24/80], Iter [100/500] Loss: 1.5221
Epoch [24/80], Iter [200/500] Loss: 2.0100
Epoch [24/80], Iter [300/500] Loss: 1.8868
Epoch [24/80], Iter [400/500] Loss: 1.4898
Epoch [24/80], Iter [500/500] Loss: 1.1626
Epoch [24/80], Iter [600/500] Loss: 1.2527
Epoch [24/80], Iter [700/500] Loss: 1.3430
Epoch [24/80], Iter [800/500] Loss: 1.3355
Epoch [24/80], Iter [900/500] Loss: 1.8292
Epoch [25/80], Iter [100/500] Loss: 2.2471
Epoch [25/80], Iter [200/500] Loss: 2.8727
Epoch [25/80], Iter [300/500] Loss: 1.3531
Epoch [25/80], Iter [400/500] Loss: 1.1110
Epoch [25/80], Iter [500/500] Loss: 2.7648
Epoch [25/80], Iter [600/500] Loss: 1.8364
Epoch [25/80], Iter [700/500] Loss: 1.4299
Epoch [25/80], Iter [800/500] Loss: 1.5985
Epoch [25/80], Iter [900/500] Loss: 2.5364
Epoch [26/80], Iter [100/500] Loss: 2.6469
Epoch [26/80], Iter [200/500] Loss: 3.1215
Epoch [26/80], Iter [300/500] Loss: 1.4029
Epoch [26/80], Iter [400/500] Loss: 1.2688
Epoch [26/80], Iter [500/500] Loss: 2.4794
Epoch [26/80], Iter [600/500] Loss: 1.1937
Epoch [26/80], Iter [700/500] Loss: 1.0709
Epoch [26/80], Iter [800/500] Loss: 1.4961
Epoch [26/80], Iter [900/500] Loss: 1.4560
Epoch [27/80], Iter [100/500] Loss: 2.0633
Epoch [27/80], Iter [200/500] Loss: 2.6687
Epoch [27/80], Iter [300/500] Loss: 5.2073
Epoch [27/80], Iter [400/500] Loss: 2.2762
Epoch [27/80], Iter [500/500] Loss: 1.6105
Epoch [27/80], Iter [600/500] Loss: 1.6631
Epoch [27/80], Iter [700/500] Loss: 1.0523
Epoch [27/80], Iter [800/500] Loss: 2.8945
Epoch [27/80], Iter [900/500] Loss: 1.5388
Epoch [28/80], Iter [100/500] Loss: 1.6230
Epoch [28/80], Iter [200/500] Loss: 1.8003
Epoch [28/80], Iter [300/500] Loss: 1.4840
Epoch [28/80], Iter [400/500] Loss: 0.9465
Epoch [28/80], Iter [500/500] Loss: 1.6054
Epoch [28/80], Iter [600/500] Loss: 3.3669
Epoch [28/80], Iter [700/500] Loss: 1.4555
Epoch [28/80], Iter [800/500] Loss: 2.2903
Epoch [28/80], Iter [900/500] Loss: 1.2850
Epoch [29/80], Iter [100/500] Loss: 1.7152
Epoch [29/80], Iter [200/500] Loss: 1.2824
Epoch [29/80], Iter [300/500] Loss: 1.5778
Epoch [29/80], Iter [400/500] Loss: 3.1152
Epoch [29/80], Iter [500/500] Loss: 1.2492
Epoch [29/80], Iter [600/500] Loss: 0.9721
Epoch [29/80], Iter [700/500] Loss: 1.4465
Epoch [29/80], Iter [800/500] Loss: 0.9678
Epoch [29/80], Iter [900/500] Loss: 1.5000
Epoch [30/80], Iter [100/500] Loss: 1.5524
Epoch [30/80], Iter [200/500] Loss: 1.5233
Epoch [30/80], Iter [300/500] Loss: 1.4226
Epoch [30/80], Iter [400/500] Loss: 0.9432
Epoch [30/80], Iter [500/500] Loss: 1.4623
Epoch [30/80], Iter [600/500] Loss: 1.3845
Epoch [30/80], Iter [700/500] Loss: 1.3301
Epoch [30/80], Iter [800/500] Loss: 1.0105
Epoch [30/80], Iter [900/500] Loss: 1.8372
Epoch [31/80], Iter [100/500] Loss: 1.3019
Epoch [31/80], Iter [200/500] Loss: 1.1216
Epoch [31/80], Iter [300/500] Loss: 0.8553
Epoch [31/80], Iter [400/500] Loss: 1.6882
Epoch [31/80], Iter [500/500] Loss: 1.7691
Epoch [31/80], Iter [600/500] Loss: 1.7412
Epoch [31/80], Iter [700/500] Loss: 2.2204
Epoch [31/80], Iter [800/500] Loss: 0.6559
Epoch [31/80], Iter [900/500] Loss: 1.4613
Epoch [32/80], Iter [100/500] Loss: 1.1408
Epoch [32/80], Iter [200/500] Loss: 3.6378
Epoch [32/80], Iter [300/500] Loss: 1.5543
Epoch [32/80], Iter [400/500] Loss: 2.1538
Epoch [32/80], Iter [500/500] Loss: 1.1102
Epoch [32/80], Iter [600/500] Loss: 1.3187
Epoch [32/80], Iter [700/500] Loss: 0.7230
Epoch [32/80], Iter [800/500] Loss: 1.6149
Epoch [32/80], Iter [900/500] Loss: 1.0926
Epoch [33/80], Iter [100/500] Loss: 1.9460
Epoch [33/80], Iter [200/500] Loss: 0.9948
Epoch [33/80], Iter [300/500] Loss: 1.4460
Epoch [33/80], Iter [400/500] Loss: 1.5855
Epoch [33/80], Iter [500/500] Loss: 1.5834
Epoch [33/80], Iter [600/500] Loss: 0.8896
Epoch [33/80], Iter [700/500] Loss: 1.1927
Epoch [33/80], Iter [800/500] Loss: 1.5707
Epoch [33/80], Iter [900/500] Loss: 0.7817
Epoch [34/80], Iter [100/500] Loss: 0.9155
Epoch [34/80], Iter [200/500] Loss: 0.7930
Epoch [34/80], Iter [300/500] Loss: 1.2760
Epoch [34/80], Iter [400/500] Loss: 0.7170
Epoch [34/80], Iter [500/500] Loss: 1.9962
Epoch [34/80], Iter [600/500] Loss: 1.2418
Epoch [34/80], Iter [700/500] Loss: 1.4847
Epoch [34/80], Iter [800/500] Loss: 0.8495
Epoch [34/80], Iter [900/500] Loss: 1.3709
Epoch [35/80], Iter [100/500] Loss: 1.8495
Epoch [35/80], Iter [200/500] Loss: 0.9494
Epoch [35/80], Iter [300/500] Loss: 0.6224
Epoch [35/80], Iter [400/500] Loss: 0.5101
Epoch [35/80], Iter [500/500] Loss: 0.9373
Epoch [35/80], Iter [600/500] Loss: 1.5811
Epoch [35/80], Iter [700/500] Loss: 1.5295
Epoch [35/80], Iter [800/500] Loss: 0.7787
Epoch [35/80], Iter [900/500] Loss: 1.0337
Epoch [36/80], Iter [100/500] Loss: 0.6236
Epoch [36/80], Iter [200/500] Loss: 1.8516
Epoch [36/80], Iter [300/500] Loss: 1.5021
Epoch [36/80], Iter [400/500] Loss: 1.0459
Epoch [36/80], Iter [500/500] Loss: 1.4737
Epoch [36/80], Iter [600/500] Loss: 0.7842
Epoch [36/80], Iter [700/500] Loss: 1.6798
Epoch [36/80], Iter [800/500] Loss: 1.7413
Epoch [36/80], Iter [900/500] Loss: 0.6222
Epoch [37/80], Iter [100/500] Loss: 0.5713
Epoch [37/80], Iter [200/500] Loss: 1.3030
Epoch [37/80], Iter [300/500] Loss: 1.6937
Epoch [37/80], Iter [400/500] Loss: 0.8656
Epoch [37/80], Iter [500/500] Loss: 1.3340
Epoch [37/80], Iter [600/500] Loss: 0.6310
Epoch [37/80], Iter [700/500] Loss: 1.1445
Epoch [37/80], Iter [800/500] Loss: 0.6099
Epoch [37/80], Iter [900/500] Loss: 1.3679
Epoch [38/80], Iter [100/500] Loss: 0.9127
Epoch [38/80], Iter [200/500] Loss: 1.9450
Epoch [38/80], Iter [300/500] Loss: 1.2240
Epoch [38/80], Iter [400/500] Loss: 1.4049
Epoch [38/80], Iter [500/500] Loss: 0.9247
Epoch [38/80], Iter [600/500] Loss: 1.5308
Epoch [38/80], Iter [700/500] Loss: 1.9777
Epoch [38/80], Iter [800/500] Loss: 1.2109
Epoch [38/80], Iter [900/500] Loss: 0.8337
Epoch [39/80], Iter [100/500] Loss: 0.7904
Epoch [39/80], Iter [200/500] Loss: 0.8451
Epoch [39/80], Iter [300/500] Loss: 1.6993
Epoch [39/80], Iter [400/500] Loss: 1.2196
Epoch [39/80], Iter [500/500] Loss: 1.0665
Epoch [39/80], Iter [600/500] Loss: 0.7412
Epoch [39/80], Iter [700/500] Loss: 0.6486
Epoch [39/80], Iter [800/500] Loss: 1.5608
Epoch [39/80], Iter [900/500] Loss: 1.9978
Epoch [40/80], Iter [100/500] Loss: 1.7101
Epoch [40/80], Iter [200/500] Loss: 1.4484
Epoch [40/80], Iter [300/500] Loss: 1.5894
Epoch [40/80], Iter [400/500] Loss: 1.3371
Epoch [40/80], Iter [500/500] Loss: 0.9766
Epoch [40/80], Iter [600/500] Loss: 1.9935
Epoch [40/80], Iter [700/500] Loss: 2.0719
Epoch [40/80], Iter [800/500] Loss: 0.9455
Epoch [40/80], Iter [900/500] Loss: 0.8072
Epoch [41/80], Iter [100/500] Loss: 1.3899
Epoch [41/80], Iter [200/500] Loss: 0.9863
Epoch [41/80], Iter [300/500] Loss: 1.3738
Epoch [41/80], Iter [400/500] Loss: 0.6883
Epoch [41/80], Iter [500/500] Loss: 0.8442
Epoch [41/80], Iter [600/500] Loss: 2.0286
Epoch [41/80], Iter [700/500] Loss: 1.1960
Epoch [41/80], Iter [800/500] Loss: 1.2499
Epoch [41/80], Iter [900/500] Loss: 0.6043
Epoch [42/80], Iter [100/500] Loss: 0.3437
Epoch [42/80], Iter [200/500] Loss: 0.6596
Epoch [42/80], Iter [300/500] Loss: 0.4450
Epoch [42/80], Iter [400/500] Loss: 0.7189
Epoch [42/80], Iter [500/500] Loss: 0.5022
Epoch [42/80], Iter [600/500] Loss: 0.4597
Epoch [42/80], Iter [700/500] Loss: 0.7743
Epoch [42/80], Iter [800/500] Loss: 0.3344
Epoch [42/80], Iter [900/500] Loss: 0.7295
Epoch [43/80], Iter [100/500] Loss: 0.5074
Epoch [43/80], Iter [200/500] Loss: 0.3128
Epoch [43/80], Iter [300/500] Loss: 0.2800
Epoch [43/80], Iter [400/500] Loss: 0.3059
Epoch [43/80], Iter [500/500] Loss: 0.3486
Epoch [43/80], Iter [600/500] Loss: 0.7222
Epoch [43/80], Iter [700/500] Loss: 0.7349
Epoch [43/80], Iter [800/500] Loss: 0.8455
Epoch [43/80], Iter [900/500] Loss: 0.7261
Epoch [44/80], Iter [100/500] Loss: 0.5404
Epoch [44/80], Iter [200/500] Loss: 0.5428
Epoch [44/80], Iter [300/500] Loss: 0.5385
Epoch [44/80], Iter [400/500] Loss: 0.4106
Epoch [44/80], Iter [500/500] Loss: 0.5296
Epoch [44/80], Iter [600/500] Loss: 0.6045
Epoch [44/80], Iter [700/500] Loss: 0.3837
Epoch [44/80], Iter [800/500] Loss: 0.7552
Epoch [44/80], Iter [900/500] Loss: 0.4996
Epoch [45/80], Iter [100/500] Loss: 0.3381
Epoch [45/80], Iter [200/500] Loss: 0.3910
Epoch [45/80], Iter [300/500] Loss: 0.3790
Epoch [45/80], Iter [400/500] Loss: 0.2718
Epoch [45/80], Iter [500/500] Loss: 0.3572
Epoch [45/80], Iter [600/500] Loss: 0.2913
Epoch [45/80], Iter [700/500] Loss: 0.5244
Epoch [45/80], Iter [800/500] Loss: 0.3647
Epoch [45/80], Iter [900/500] Loss: 0.3161
Epoch [46/80], Iter [100/500] Loss: 0.4728
Epoch [46/80], Iter [200/500] Loss: 0.4386
Epoch [46/80], Iter [300/500] Loss: 0.2861
Epoch [46/80], Iter [400/500] Loss: 0.2460
Epoch [46/80], Iter [500/500] Loss: 0.3490
Epoch [46/80], Iter [600/500] Loss: 0.5804
Epoch [46/80], Iter [700/500] Loss: 0.4951
Epoch [46/80], Iter [800/500] Loss: 0.4600
Epoch [46/80], Iter [900/500] Loss: 0.5658
Epoch [47/80], Iter [100/500] Loss: 0.2479
Epoch [47/80], Iter [200/500] Loss: 0.2688
Epoch [47/80], Iter [300/500] Loss: 0.3082
Epoch [47/80], Iter [400/500] Loss: 0.3929
Epoch [47/80], Iter [500/500] Loss: 0.3126
Epoch [47/80], Iter [600/500] Loss: 0.5041
Epoch [47/80], Iter [700/500] Loss: 0.5848
Epoch [47/80], Iter [800/500] Loss: 0.4968
Epoch [47/80], Iter [900/500] Loss: 0.3496
Epoch [48/80], Iter [100/500] Loss: 0.2753
Epoch [48/80], Iter [200/500] Loss: 0.3885
Epoch [48/80], Iter [300/500] Loss: 0.3743
Epoch [48/80], Iter [400/500] Loss: 0.2425
Epoch [48/80], Iter [500/500] Loss: 0.2472
Epoch [48/80], Iter [600/500] Loss: 0.3003
Epoch [48/80], Iter [700/500] Loss: 0.4936
Epoch [48/80], Iter [800/500] Loss: 0.3169
Epoch [48/80], Iter [900/500] Loss: 0.2543
Epoch [49/80], Iter [100/500] Loss: 0.4262
Epoch [49/80], Iter [200/500] Loss: 0.3396
Epoch [49/80], Iter [300/500] Loss: 0.4670
Epoch [49/80], Iter [400/500] Loss: 0.2543
Epoch [49/80], Iter [500/500] Loss: 0.3146
Epoch [49/80], Iter [600/500] Loss: 1.3187
Epoch [49/80], Iter [700/500] Loss: 0.2993
Epoch [49/80], Iter [800/500] Loss: 0.3053
Epoch [49/80], Iter [900/500] Loss: 0.3343
Epoch [50/80], Iter [100/500] Loss: 0.2081
Epoch [50/80], Iter [200/500] Loss: 0.5631
Epoch [50/80], Iter [300/500] Loss: 0.4358
Epoch [50/80], Iter [400/500] Loss: 0.4028
Epoch [50/80], Iter [500/500] Loss: 0.2510
Epoch [50/80], Iter [600/500] Loss: 0.5876
Epoch [50/80], Iter [700/500] Loss: 0.3692
Epoch [50/80], Iter [800/500] Loss: 0.4500
Epoch [50/80], Iter [900/500] Loss: 0.1850
('time used:', 30318.149681000003)