Inception V1-2014年

之前的网络结构，比如vgg,AlexNet,都是通过增加网络的深度来提高网络的性能。但是这样也会导致很多问题，比如过拟合，梯度爆炸等。inception则从另一个角度来提升训练效果：即在相同的计算资源上可以提取到更多的特征。这里也可以认为，inception结构相当于一个卷积层，一个可以提取不同尺度特征的模块。

inception结构核心思想是1*1卷积、多个尺度上进行卷积然后在进行特征融合。

1*1卷积作用：1 在相同尺寸的感受野上叠加更多的卷积，可以增加网络的非线性，提取更加丰富的特征。 2 可以进行特征降维，在不损失模型性能的情况减少模型的计算量。

多尺度卷积在聚合：可以提取更加丰富的特征信息。

深入理解GoogLeNet结构（原创） - 知乎 (zhihu.com)

inception 代码

from typing import Tuple
from torch import nn
import torch
from torch.nn import Conv2d, MaxPool2d, AvgPool2d


class BasicConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size,stride,padding=0,dilation=1,bias=True,normalization=True) :
        super(BasicConv2d, self).__init__()
        self.conv2d = Conv2d(in_channels=in_channels,out_channels=out_channels,kernel_size=kernel_size,stride=stride,padding=padding,dilation=dilation,bias=bias,)
        self.normalization = normalization
        self.norm = nn.BatchNorm2d(
            num_features=out_channels, affine=True, eps=0.00001, momentum=0.1
        )
        self.activation = nn.ReLU(inplace=False)

    def forward(self, x):
        x = self.conv2d(x)
        if self.normalization:
            x = self.norm(x)
        x = self.activation(x)
        return x


class Inception_block(nn.Module):
    def __init__(self,in_channel: int,b1_out_channel: Tuple[int, int],b2_channels: Tuple[int, int],b3_channels: Tuple[int, int],b4_channel: int,) -> None:
        """
        b1_out_channel: out channel of 1x1 convolution branch
        b2_channels: out channel of 1x1 and 3x3 conv branch
        b3_channels: out channle of 1x1 and 5x5 conv branch
        b4_channel: out channel of max_pool branch
        """
        super(Inception_block, self).__init__()
        self.branch_1 = BasicConv2d(
            in_channels=in_channel, out_channels=b1_out_channel, kernel_size=1, stride=1
        )
        self.branch_2 = nn.Sequential(
            BasicConv2d(in_channels=in_channel,out_channels=b2_channels[0],kernel_size=1,stride=1,padding=1,),
            BasicConv2d(in_channels=b2_channels[0],out_channels=b2_channels[1],kernel_size=3,stride=1,),
        )
        self.branch_3 = nn.Sequential(
            BasicConv2d(in_channels=in_channel,out_channels=b3_channels[0],kernel_size=1,stride=1),
            BasicConv2d(in_channels=b3_channels[0],out_channels=b3_channels[1],kernel_size=5,stride=1,padding=2,),
        )
        self.branch_4 = nn.Sequential(
            MaxPool2d(3, 1),
            BasicConv2d(in_channels=in_channel,out_channels=b4_channel,kernel_size=1,stride=1,padding=1,),
        )

    def _forward(self, x):
        out_b1 = self.branch_1(x)
        out_b2 = self.branch_2(x)
        out_b3 = self.branch_3(x)
        out_b4 = self.branch_4(x)

        return [out_b1, out_b2, out_b3, out_b4]

    def forward(self, x):
        outs = self._forward(x)
        return torch.concat(outs, 1)


class AuxClassifier(nn.Module):
    # • Average_pooling 5x5 s=3
    # • A 1×1 convolution with 128 filters for dimension reduction and rectified linear activation.
    # • A fully connected layer with 1024 units and rectified linear activation.
    # • A dropout layer with 70% ratio of dropped outputs.
    # • A linear layer with softmax loss as the classifier
    def __init__(self, in_channel: int, classes: int, fc_in: int) -> None:
        super(AuxClassifier, self).__init__()
        self.avgPool = AvgPool2d(kernel_size=5, stride=3)
        self.conv = BasicConv2d(in_channels=in_channel, out_channels=128, kernel_size=1, stride=1)
        self.fc = nn.Linear(fc_in, 1024)
        self.activation = nn.ReLU()
        self.classifier = nn.Sequential(
            nn.Dropout2d(p=0.7),
            nn.Linear(1024, classes)
        )

    def forward(self, x):
        N = x.shape[0]
        x = self.avgPool(x)
        x = self.conv(x)

        x = x.reshape(N, -1)
        x = self.fc(x)
        x = self.classifier(x)
        return x


class InceptionV1(nn.Module):
    def __init__(self, classes) -> None:
        super(InceptionV1, self).__init__()
        self.max_pool = MaxPool2d(3, stride=2, padding=1)
        self.first_convolution = nn.Sequential(
            BasicConv2d(3, 64, 7, stride=2, padding=3),
            self.max_pool,
            BasicConv2d(64, 64, 1, stride=1, padding=1),
            BasicConv2d(64, 192, 3, stride=1),
            self.max_pool,
        )
        self.inception_block_3a = Inception_block(192, 64, (96, 128), (16, 32), 32)
        self.inception_block_3b = Inception_block(256, 128, (128, 192), (32, 96), 64)

        self.inception_block_4a = Inception_block(480, 192, (96, 208), (16, 48), 64)
        #  -> 4x4x512
        self.aux_1 = AuxClassifier(in_channel=512, classes=classes, fc_in=2048)
        self.inception_block_4b = Inception_block(512, 160, (112, 224), (24, 64), 64)
        self.inception_block_4c = Inception_block(512, 128, (128, 256), (24, 64), 64)
        self.inception_block_4d = Inception_block(512, 112, (144, 288), (32, 64), 64)
        # Average_pooling 5x5 s=3 -> 4x4x528
        self.aux_2 = AuxClassifier(in_channel=528, classes=classes, fc_in=2048)
        self.inception_block_4e = Inception_block(528, 256, (160, 320), (32, 128), 128)

        self.inception_block_5a = Inception_block(832, 256, (160, 320), (32, 128), 128)
        self.inception_block_5b = Inception_block(832, 384, (192, 384), (48, 128), 128)

        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.classifier = nn.Sequential(
            nn.Dropout(p=0.4),
            nn.Linear(1024 * 7 * 7, classes)
        )

    def forward(self, x):
        N = x.shape[0]
        x = self.first_convolution(x)
        x = self.inception_block_3a(x)
        x = self.inception_block_3b(x)
        x = self.max_pool(x)
        x = self.inception_block_4a(x)
        aux_1 = self.aux_1(x)
        x = self.inception_block_4b(x)
        x = self.inception_block_4c(x)
        x = self.inception_block_4d(x)
        aux_2 = self.aux_2(x)
        x = self.inception_block_4e(x)
        x = self.max_pool(x)
        x = self.inception_block_5a(x)
        x = self.inception_block_5b(x)

        x = self.avgpool(x)
        x = x.reshape(N, -1)
        x = self.classifier(x)
        if self.training == True:
            return [x, aux_1, aux_2]
        return x

if __name__=='__main__':
    model=InceptionV1(10)
    print(model)

inception_v1.pytorch/inception_v1.py at master · antspy/inception_v1.pytorch (github.com)

asasapd/Vision-backbones: Pytorch implementation of InceptionV1 architecture https://arxiv.org/abs/1409.4842 (github.com)

猜你喜欢