easy-Fpn源码解读(五):rpn

easy-Fpn源码解读(五):rpn

region_proposal_network.py代码解析

from typing import Tuple, List

import numpy as np
import torch
from torch import nn, Tensor
from torch.nn import functional as F

from bbox import BBox
from nms.nms import NMS


class RegionProposalNetwork(nn.Module):

    def __init__(self, num_features_out: int, anchor_ratios: List[Tuple[int, int]], anchor_scales: List[int], pre_nms_top_n: int, post_nms_top_n: int):
        super().__init__()

        self._features = nn.Sequential(
            nn.Conv2d(in_channels=num_features_out, out_channels=512, kernel_size=3, padding=1),
            nn.ReLU()
        )
        # 通道数从256维升至512维

        self._anchor_ratios = anchor_ratios
        self._anchor_scales = anchor_scales

        num_anchor_ratios = len(self._anchor_ratios)
        num_anchor_scales = len(self._anchor_scales)
        num_anchors = num_anchor_ratios * num_anchor_scales

        self._pre_nms_top_n = pre_nms_top_n
        self._post_nms_top_n = post_nms_top_n

        self._objectness = nn.Conv2d(in_channels=512, out_channels=num_anchors * 2, kernel_size=1)
        self._transformer = nn.Conv2d(in_channels=512, out_channels=num_anchors * 4, kernel_size=1)
        # 因为是1*1的kernel,所以不会改变特征图的size,
        # 对一个特征图像上的每一个点来说,会生成num*anchors个anchor,而每个anchor都有两个前景/背景概率
        # 和4个坐标位置,因此对于self._objectness来说两个通道对应一个anchor,而对于self._transformer来说
        # 4个通道对应一个anchor。这4个通道是一个anchor的四个位置的修正偏移量。
        # 所以在forward的里,才会将通道数换至最后一维,再展开。这样每一行就会对应一个anchor

    def forward(self, features: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor]:
        features = self._features(features)
        objectnesses = self._objectness(features)  # anchor前景/背景修正
        transformers = self._transformer(features)  # anchor位置偏移量修正

        # permute函数将tensor的维度换位:num*CWH -> num*WHC
        #
        # contiguous()一般在permute()等改变形状和计算返回的tensor后面,因为改变形状后,
        # 有的tensor并不是占用一整块内存,而是由不同的数据块组成,
        # 而tensor的view()操作依赖于内存是整块的,这时只需要执行contiguous()这个函数,
        # 把tensor变成在内存中连续分布的形式。
        objectnesses = objectnesses.permute(0, 2, 3, 1).contiguous().view(-1, 2)
        # view(-1, 2):将四维矩阵num*HWC转换成二维矩阵,其中二维矩阵的第二维是2
        transformers = transformers.permute(0, 2, 3, 1).contiguous().view(-1, 4)

        return objectnesses, transformers

    def sample(self, anchor_bboxes: Tensor, gt_bboxes: Tensor, image_width: int, image_height: int) -> Tuple[Tensor, Tensor, Tensor, Tensor]:
        sample_fg_indices = torch.arange(end=len(anchor_bboxes), dtype=torch.long)
        sample_selected_indices = torch.arange(end=len(anchor_bboxes), dtype=torch.long)

        # 从cuda中取数据,要先GPU->CPU,即使用cpu()
        anchor_bboxes = anchor_bboxes.cpu()
        gt_bboxes = gt_bboxes.cpu()

        # remove cross-boundary
        boundary = torch.tensor(BBox(0, 0, image_width, image_height).tolist(), dtype=torch.float)
        # boundary=tensor([0,0,w,h])
        # 找出界内anchor的索引
        inside_indices = BBox.inside(anchor_bboxes, boundary.unsqueeze(dim=0)).squeeze().nonzero().view(-1)
        # unsqueeze(dim=x):增加一个维度在x维度上
        # squeeze(dim=x):减去第x个维度,前提是第x个维度的值要为1,dim的默认值为0
        # boundary.unsqueeze(dim=0):tensor([[0, 0, w, h]]),torch.size(1,4)

        anchor_bboxes = anchor_bboxes[inside_indices]
        # c = torch.Tensor([[3, 5, 6, 7], [3, 5, 6, 7],[3, 2, 6, 9]])
        # d = torch.Tensor([[0,0,7,8]])
        # e = ((c[:, :, 0] >= d[:, :, 0]) * (c[:, :, 1] >= d[:, :, 1]) *
        #                 (c[:, :, 2] <= d[:, :, 2]) * (c[:, :, 3] <= d[:, :, 3]))
        # print(e)
        # print(c[e])
        # >> tensor([[0],
        #            [1],
        #            [1]], dtype=torch.uint8)
        # >> tensor([[3., 5., 6., 7.],
        #            [3., 5., 6., 7.]])

        sample_fg_indices = sample_fg_indices[inside_indices]
        sample_selected_indices = sample_selected_indices[inside_indices]

        # find labels for each `anchor_bboxes`
        labels = torch.ones(len(anchor_bboxes), dtype=torch.long) * -1
        ious = BBox.iou(anchor_bboxes, gt_bboxes)  # ious是二维张量
        anchor_max_ious, anchor_assignments = ious.max(dim=1)  # dim=1:表示每行取一个最大值,返回该最大值和该值的列索引,dim=0:表示每列取一个最大值,返回该值和该值的行索引
        gt_max_ious, gt_assignments = ious.max(dim=0)
        anchor_additions = (ious == gt_max_ious).nonzero()[:, 0]
        # 每一个anchor都会跟所有的gt_bbox有一个交叠度,对于该anchor来说,如果所有交叠度都小于0.3,那么该anchor标记为0,代表负样本
        # 相反,如果大于0.7,那么就会标记为1,代表正样本
        # 另外,还会从每列找出一个最大值,并将该最大值的行索引代表的anchor标记为正样本
        labels[anchor_max_ious < 0.3] = 0
        labels[anchor_additions] = 1
        labels[anchor_max_ious >= 0.7] = 1

        # select 256 samples
        fg_indices = (labels == 1).nonzero().view(-1)
        bg_indices = (labels == 0).nonzero().view(-1)
        # randperm功能是随机打乱一个数字序列
        # torch.randperm(5)
        # >> tensor([1, 4, 2, 3, 0])
        # 打乱前景/背景anchor数组的排列顺序,并且每种选取数量不超过128个
        fg_indices = fg_indices[torch.randperm(len(fg_indices))[:min(len(fg_indices), 128)]]
        bg_indices = bg_indices[torch.randperm(len(bg_indices))[:256 - len(fg_indices)]]
        selected_indices = torch.cat([fg_indices, bg_indices])
        selected_indices = selected_indices[torch.randperm(len(selected_indices))]

        gt_anchor_objectnesses = labels[selected_indices]
        gt_bboxes = gt_bboxes[anchor_assignments[fg_indices]]
        anchor_bboxes = anchor_bboxes[fg_indices]
        # 计算anchor与gt_bboxes的偏移量
        gt_anchor_transformers = BBox.calc_transformer(anchor_bboxes, gt_bboxes)

        gt_anchor_objectnesses = gt_anchor_objectnesses.cuda()
        gt_anchor_transformers = gt_anchor_transformers.cuda()

        # 前景anchor索引采样
        sample_fg_indices = sample_fg_indices[fg_indices]
        # 前景/背景anchor索引采样
        sample_selected_indices = sample_selected_indices[selected_indices]

        return sample_fg_indices, sample_selected_indices, gt_anchor_objectnesses, gt_anchor_transformers

    def loss(self, anchor_objectnesses: Tensor, anchor_transformers: Tensor, gt_anchor_objectnesses: Tensor, gt_anchor_transformers: Tensor) -> Tuple[Tensor, Tensor]:
        cross_entropy = F.cross_entropy(input=anchor_objectnesses, target=gt_anchor_objectnesses)
        
        # smooth_l1_loss:https://blog.csdn.net/wfei101/article/details/79252021
        # NOTE: The default of `reduction` is `elementwise_mean`, which is divided by N x 4 (number of all elements), here we replaced by N for better performance
        smooth_l1_loss = F.smooth_l1_loss(input=anchor_transformers, target=gt_anchor_transformers, reduction='sum')
        smooth_l1_loss /= len(gt_anchor_transformers)

        return cross_entropy, smooth_l1_loss

    def generate_anchors(self, image_width: int, image_height: int, num_x_anchors: int, num_y_anchors: int, anchor_size: int) -> Tensor:
        center_ys = np.linspace(start=0, stop=image_height, num=num_y_anchors + 2)[1:-1]
        center_xs = np.linspace(start=0, stop=image_width, num=num_x_anchors + 2)[1:-1]
        ratios = np.array(self._anchor_ratios)  # self._anchor_ratios是由元组组成的数组。转换成numpy的二维数组
        ratios = ratios[:, 0] / ratios[:, 1]
        scales = np.array(self._anchor_scales)

        # NOTE: it's important to let `center_ys` be the major index (i.e., move horizontally and then vertically) for consistency with 2D convolution

        # giving the string 'ij' returns a meshgrid with matrix indexing, i.e., with shape (#center_ys, #center_xs, #ratios, #scales)
        center_ys, center_xs, ratios, scales = np.meshgrid(center_ys, center_xs, ratios, scales, indexing='ij')
        # x = np.array([0, 1, 2])
        # y = np.array([0, 1])
        #
        # X, Y = np.meshgrid(x, y) # 返回坐标矩阵
        # print(X)
        # print(Y)
        # >> [[0 1 2]
        #     [0 1 2]]
        # >> [[0 0 0]
        #     [1 1 1]]

        center_ys = center_ys.reshape(-1)
        center_xs = center_xs.reshape(-1)
        ratios = ratios.reshape(-1)
        scales = scales.reshape(-1)

        widths = anchor_size * scales * np.sqrt(1 / ratios)
        heights = anchor_size * scales * np.sqrt(ratios)

        # np.stack():https://blog.csdn.net/wgx571859177/article/details/80987459
        center_based_anchor_bboxes = np.stack((center_xs, center_ys, widths, heights), axis=1)
        center_based_anchor_bboxes = torch.from_numpy(center_based_anchor_bboxes).float()
        anchor_bboxes = BBox.from_center_base(center_based_anchor_bboxes)
        # 通过使用中心点和anchor的长宽进行计算得到anchor的4个点的坐标
        return anchor_bboxes  # 这里的anchor_bboxes是直接手动计算得到的,前面的transformer是卷积得到的。

    def generate_proposals(self, anchor_bboxes: Tensor, objectnesses: Tensor, transformers: Tensor, image_width: int, image_height: int) -> Tensor:
        proposal_score = objectnesses[:, 1]
        _, sorted_indices = torch.sort(proposal_score, dim=0, descending=True)

        sorted_transformers = transformers[sorted_indices]
        sorted_anchor_bboxes = anchor_bboxes[sorted_indices]

        # 使用偏移量和anchor生成proposal
        proposal_bboxes = BBox.apply_transformer(sorted_anchor_bboxes, sorted_transformers.detach())
        # 将所有anchor收缩到特征图像里,目的是防止anchor越界
        proposal_bboxes = BBox.clip(proposal_bboxes, 0, 0, image_width, image_height)

        # 非极大值抑制之前,取self._pre_nms_top_n个proposal,之前已经通过前景/背景概率进行倒序排序了。
        proposal_bboxes = proposal_bboxes[:self._pre_nms_top_n]
        # 非极大值抑制,使用c扩展和CUDA编程实现
        # 非极大值抑制实现原理:https://blog.csdn.net/qidailiming1994/article/details/100162305
        # 非极大值抑制CUDA实现:https://blog.csdn.net/qq_35218039/article/details/99968493
        # pytorch的c扩展:https://blog.csdn.net/manong_wxd/article/details/78720182
        kept_indices = NMS.suppress(proposal_bboxes, threshold=0.7)
        proposal_bboxes = proposal_bboxes[kept_indices]
        proposal_bboxes = proposal_bboxes[:self._post_nms_top_n]

        return proposal_bboxes

猜你喜欢

转载自blog.csdn.net/ThunderF/article/details/104777247
RPN
FPN
今日推荐