以lenet5为例(2个卷积 + 3个全连接层)。
1.Python api (pytorch)
def __init__(self):
super(Lenet5, self).__init__()
self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0)
self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
# tmp = torch.ones(1, 1, 32, 32).to('cuda:0') shape: (1, 1, 32, 32)
x = F.relu(self.conv1(x)) # nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0) -> [1, 6, 28, 28]
x = self.pool1(x) # nn.AvgPool2d(kernel_size=2, stride=2, padding=0) -> [1, 6, 14, 14]
x = F.relu(self.conv2(x)) # nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0) -> [1, 16, 10, 10]
x = self.pool1(x) # nn.AvgPool2d(kernel_size=2, stride=2, padding=0) -> [1, 16, 5, 5]
x = x.view(x.size(0), -1) # view [1, 16, 5, 5] -> [1, 400]
x = F.relu(self.fc1(x)) # nn.Linear(16 * 5 * 5, 120) -> [1, 120]
x = F.relu(self.fc2(x)) # nn.Linear(120, 84) -> [1, 84]
x = F.softmax(self.fc3(x), dim=1) # nn.Linear(84, 10) -> [1, 10]
return x
2. python和tensorRT C++ api对比
x = torch.ones(1, 1, 32, 32).to('cuda:0') shape: (1, 1, 32, 32)
""" c++ creates input tensor
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{1, INPUT_H, INPUT_W});
assert(data);
"""
x = F.relu(self.conv1(x)) # nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0) -> [1, 6, 28, 28]
""" c++ conv1 + relu
IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 6, DimsHW{5, 5}, weightMap["conv1.weight"], weightMap["conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{1, 1});
IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(relu1);
"""
x = self.pool1(x) # nn.AvgPool2d(kernel_size=2, stride=2, padding=0) -> [1, 6, 14, 14]
""" c++ pool1
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
assert(pool1);
pool1->setStrideNd(DimsHW{2, 2});
"""
x = F.relu(self.conv2(x)) # nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0) -> [1, 16, 10, 10]
""" c++ conv2 + relu
IConvolutionLayer* conv2 = network->addConvolutionNd(*pool1->getOutput(0), 16, DimsHW{5, 5}, weightMap["conv2.weight"], weightMap["conv2.bias"]);
assert(conv2);
conv2->setStrideNd(DimsHW{1, 1});
IActivationLayer* relu2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
assert(relu2);
"""
x = self.pool1(x) # nn.AvgPool2d(kernel_size=2, stride=2, padding=0) -> [1, 16, 5, 5]
""" c++ pool2
IPoolingLayer* pool2 = network->addPoolingNd(*relu2->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
assert(pool2);
pool2->setStrideNd(DimsHW{2, 2});
"""
x = x.view(x.size(0), -1) # view [1, 16, 5, 5] -> [1, 400]
# c++不需要view
x = F.relu(self.fc1(x)) # nn.Linear(16 * 5 * 5, 120) -> [1, 120]
""" c++ fc1 + relu
IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool2->getOutput(0), 120, weightMap["fc1.weight"], weightMap["fc1.bias"]);
assert(fc1);
IActivationLayer* relu3 = network->addActivation(*fc1->getOutput(0), ActivationType::kRELU);
assert(relu3);
"""
x = F.relu(self.fc2(x)) # nn.Linear(120, 84) -> [1, 84]
""" c++ fc2 + relu
IFullyConnectedLayer* fc2 = network->addFullyConnected(*relu3->getOutput(0), 84, weightMap["fc2.weight"], weightMap["fc2.bias"]);
assert(fc2);
IActivationLayer* relu4 = network->addActivation(*fc2->getOutput(0), ActivationType::kRELU);
assert(relu4);
"""
x = F.softmax(self.fc3(x), dim=1) # nn.Linear(84, 10) -> [1, 10]
""" c++ fc3 + softmax
IFullyConnectedLayer* fc3 = network->addFullyConnected(*relu4->getOutput(0), OUTPUT_SIZE, weightMap["fc3.weight"], weightMap["fc3.bias"]);
assert(fc3);
ISoftMaxLayer* prob = network->addSoftMax(*fc3->getOutput(0));
assert(prob);
prob->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*prob->getOutput(0));
"""