When performing deep learning training models, for models with a smaller amount of calculation, it can be performed on the CPU. But when the amount of calculation is relatively large, we hope to use the ability of GPU parallel computing to speed up the training.
View the GPU version number
import torch
print(torch.__version__) # 查看torch当前版本号
print(torch.version.cuda) # 编译当前版本的torch使用的cuda版本号
print(torch.cuda.is_available()) # 查看当前cuda是否可用于当前版本的Torch,如果输出True,则表示可用
View the number of GPUs
def try_gpu(i=0):
"""如果存在,则返回gpu(i),否则返回cpu()"""
if torch.cuda.device_count() >= i + 1:
return torch.device(f'cuda:{i}')
return torch.device('cpu')
def try_all_gpus():
"""返回所有可用的GPU,如果没有GPU,则返回[cpu(),]"""
devices = [torch.device(f'cuda:{i}')
for i in range(torch.cuda.device_count())]
return devices if devices else [torch.device('cpu')]
# 0号GPU是否存在,10号GPU是否存在
try_gpu(), try_gpu(10), try_all_gpus()
Specify GPU
import torch
from torch import nn
torch.device('gpu'), torch.cuda.device('cuda'), torch.cuda.device('cuda:1')
GPU computing tensor
# 创建一个张量Y在1号GPU
Y = torch.rand(2, 3, device=try_gpu(1))
Z = X.cuda(1) # 将X的内容复制在1号GPU的Z
print(X)
print(Z)
tensor([[1., 1., 1.],
[1., 1., 1.]], device='cuda:0')
tensor([[1., 1., 1.],
[1., 1., 1.]], device='cuda:1')
Specify the GPU to compute the neural network model
from torch import nn
net = nn.Linear(3, 1)
print(list(net.parameters())[0].device) # cpu
net.cuda(0)
print(list(net.parameters())[0].device) # cuda:0
net2 = nn.Linear(3, 1, device = torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
print(list(net2.parameters())[0].device) # cuda:0
net3 = nn.Linear(3, 1).to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
print(list(net3.parameters())[0].device) # cuda:0
net3 = net3.cpu()
print(list(net3.parameters())[0].device) # cpu