网络搭建-tensorRT c++ api和python pytorch api的对比

以lenet5为例（2个卷积 + 3个全连接层）。

1.Python api (pytorch)

    def __init__(self):
        super(Lenet5, self).__init__()

        self.conv1 = nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0)
        self.pool1 = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # tmp = torch.ones(1, 1, 32, 32).to('cuda:0')     shape: (1, 1, 32, 32)
        
        x = F.relu(self.conv1(x))  # nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0)  -> [1, 6, 28, 28]

        x = self.pool1(x)          # nn.AvgPool2d(kernel_size=2, stride=2, padding=0)  -> [1, 6, 14, 14]

        x = F.relu(self.conv2(x))  # nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0) -> [1, 16, 10, 10]

        x = self.pool1(x)          # nn.AvgPool2d(kernel_size=2, stride=2, padding=0)    -> [1, 16, 5, 5]

        x = x.view(x.size(0), -1)  # view                           [1, 16, 5, 5]       -> [1, 400]

        x = F.relu(self.fc1(x))    # nn.Linear(16 * 5 * 5, 120)                         -> [1, 120]

        x = F.relu(self.fc2(x))    # nn.Linear(120, 84)                                 -> [1, 84]
        x = F.softmax(self.fc3(x), dim=1)  # nn.Linear(84, 10)                          -> [1, 10]
        return x

2. python和tensorRT C++ api对比

x = torch.ones(1, 1, 32, 32).to('cuda:0')     shape: (1, 1, 32, 32)
""" c++ creates input tensor
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{1, INPUT_H, INPUT_W});
assert(data);   
"""
   
x = F.relu(self.conv1(x))  # nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0)  -> [1, 6, 28, 28]
""" c++ conv1 + relu
IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 6, DimsHW{5, 5}, weightMap["conv1.weight"], weightMap["conv1.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{1, 1}); 

IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(relu1);
"""

x = self.pool1(x)          # nn.AvgPool2d(kernel_size=2, stride=2, padding=0)  -> [1, 6, 14, 14]
""" c++ pool1
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
assert(pool1);
pool1->setStrideNd(DimsHW{2, 2});
"""

x = F.relu(self.conv2(x))  # nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0) -> [1, 16, 10, 10]
""" c++ conv2 + relu
IConvolutionLayer* conv2 = network->addConvolutionNd(*pool1->getOutput(0), 16, DimsHW{5, 5}, weightMap["conv2.weight"], weightMap["conv2.bias"]);
assert(conv2);
conv2->setStrideNd(DimsHW{1, 1});

IActivationLayer* relu2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
assert(relu2);
"""

x = self.pool1(x)          # nn.AvgPool2d(kernel_size=2, stride=2, padding=0)      -> [1, 16, 5, 5]
""" c++ pool2
IPoolingLayer* pool2 = network->addPoolingNd(*relu2->getOutput(0), PoolingType::kAVERAGE, DimsHW{2, 2});
assert(pool2);
pool2->setStrideNd(DimsHW{2, 2});
"""

x = x.view(x.size(0), -1)  # view                           [1, 16, 5, 5]       -> [1, 400]
# c++不需要view

x = F.relu(self.fc1(x))    # nn.Linear(16 * 5 * 5, 120)                         -> [1, 120]
""" c++ fc1 + relu
IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool2->getOutput(0), 120, weightMap["fc1.weight"], weightMap["fc1.bias"]);
assert(fc1);

IActivationLayer* relu3 = network->addActivation(*fc1->getOutput(0), ActivationType::kRELU);
assert(relu3);
"""

x = F.relu(self.fc2(x))    # nn.Linear(120, 84)                                 -> [1, 84]
""" c++ fc2 + relu
IFullyConnectedLayer* fc2 = network->addFullyConnected(*relu3->getOutput(0), 84, weightMap["fc2.weight"], weightMap["fc2.bias"]);
assert(fc2);

IActivationLayer* relu4 = network->addActivation(*fc2->getOutput(0), ActivationType::kRELU);
assert(relu4);
"""

x = F.softmax(self.fc3(x), dim=1)  # nn.Linear(84, 10)                          -> [1, 10]
""" c++ fc3 + softmax
IFullyConnectedLayer* fc3 = network->addFullyConnected(*relu4->getOutput(0), OUTPUT_SIZE, weightMap["fc3.weight"], weightMap["fc3.bias"]);
assert(fc3);

ISoftMaxLayer* prob = network->addSoftMax(*fc3->getOutput(0));
assert(prob);
prob->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*prob->getOutput(0));
"""

网络搭建-tensorRT c++ api和python pytorch api的对比

1.Python api (pytorch)

2. python和tensorRT C++ api对比

猜你喜欢