[实例分割]IOU,NMS笔记

1. IOU

1.1 IOU

参考:知乎
自己没法直接理解多维度多个box之间的iou,索性一步步写下来。
pytoch 源代码

=

# IOU计算
    # 假设box1维度为[N,4]   box2维度为[M,4]
    def iou(self, box1, box2):
        N = box1.size(0)
        M = box2.size(0)

        lt = torch.max(  # 左上角的点 注意:是inter的左上角的点,因此才要算MAX
            box1[:, :2].unsqueeze(1).expand(N, M, 2),   # [N,2]->[N,1,2]->[N,M,2]
            box2[:, :2].unsqueeze(0).expand(N, M, 2),   # [M,2]->[1,M,2]->[N,M,2]
        )

        rb = torch.min( # 右下角的点 注意:是inter的右下角的点,因此才要算MIN
            box1[:, 2:].unsqueeze(1).expand(N, M, 2),
            box2[:, 2:].unsqueeze(0).expand(N, M, 2),
        )

        wh = rb - lt  # [N,M,2]
        wh[wh < 0] = 0   # 两个box没有重叠区域 如果两个box没有重合,那么rb和lt就分别是同一个box的右下角和左上角的坐标,因此rb - lt肯定是小于0的
        inter = wh[:,:,0] * wh[:,:,1]   # [N,M]

        area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1])  # (N,)
        area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1])  # (M,)
        area1 = area1.unsqueeze(1).expand(N,M)  # (N,M)
        area2 = area2.unsqueeze(0).expand(N,M)  # (N,M)

        iou = inter / (area1+area2-inter)
        return iou

自己实现下

import torch
a = torch.randn((2,4))
b = torch.randn((3,4))
tensor([[-1.3839, -2.1049,  0.0442,  0.1294],
        [ 3.6880,  1.6080, -0.0313,  0.2779]])
tensor([[ 0.3556,  0.4686,  1.0932, -2.3597],
        [ 1.2610,  0.2251, -0.3971,  1.7352],
        [-0.5320, -2.6367,  0.6560,  0.2212]])
print(b)
N = a.size(0)
M = b.size(0)
print(N, M)

tensor([[ 0.3556,  0.4686,  1.0932, -2.3597],
        [ 1.2610,  0.2251, -0.3971,  1.7352],
        [-0.5320, -2.6367,  0.6560,  0.2212]])
2 3

# a1 sigmoid
a1 = torch.sigmoid(a[:, :2].unsqueeze(1).expand(N, M, 2))
a1

tensor([[[0.2004, 0.1086],
         [0.2004, 0.1086],
         [0.2004, 0.1086]],

        [[0.9756, 0.8331],
         [0.9756, 0.8331],
         [0.9756, 0.8331]]])
# b1 sigmoid
b1 = torch.sigmoid(b[:, :2].unsqueeze(0).expand(N, M, 2))
b1

tensor([[[0.5880, 0.6150],
         [0.7792, 0.5560],
         [0.3701, 0.0668]],

        [[0.5880, 0.6150],
         [0.7792, 0.5560],
         [0.3701, 0.0668]]])
lt  = torch.max(a1, b1)

因此rb部分也是类似的:

torch.max(a1, b1).size()
a2 = torch.sigmoid(a[:, 2:].unsqueeze(1).expand(N, M, 2))
b2 = torch.sigmoid(b[:, 2:].unsqueeze(0).expand(N, M, 2))
rb = torch.min(a2, b2)

print(lt)
print('-'*50)
print(rb)
wh = lt -rb 
print('-'*50)

--------------------------------------------------
lt:
tensor([[[0.5880, 0.6150],
         [0.7792, 0.5560],
         [0.3701, 0.1086]],

        [[0.9756, 0.8331],
         [0.9756, 0.8331],
         [0.9756, 0.8331]]])
--------------------------------------------------
rb:
tensor([[[0.5110, 0.0863],
         [0.4020, 0.5323],
         [0.5110, 0.5323]],

        [[0.4922, 0.0863],
         [0.4020, 0.5690],
         [0.4922, 0.5551]]])
--------------------------------------------------
wh:
tensor([[[ 0.0769,  0.5287],
         [ 0.3772,  0.0237],
         [-0.1410, -0.4237]],

        [[ 0.4834,  0.7468],
         [ 0.5736,  0.2641],
         [ 0.4834,  0.2781]]])
print(wh<0)
wh[wh<0] = 0
# 把 True的对于位置的元素改成0
tensor([[[False, False],
         [False, False],
         [ True,  True]],

        [[False, False],
         [False, False],
         [False, False]]])
tensor([[[0.0769, 0.5287],
         [0.3772, 0.0237],
         [0.0000, 0.0000]],

        [[0.4834, 0.7468],
         [0.5736, 0.2641],
         [0.4834, 0.2781]]])
inter = wh[:,:,0] * wh[:,:,1]   # [N,M]
inter

tensor([[0.0407, 0.0090, 0.0000],
        [0.3610, 0.1515, 0.1344]])

 area1 = (a[:,2]-a[:,0]) * (a[:,3]-a[:,1])
 area2 = (b[:,2]-b[:,0]) * (b[:,3]-b[:,1])
 print(area1, area2)
 area1 = area1.unsqueeze(1).expand(N,M)  # (N,M)
 area2 = area2.unsqueeze(0).expand(N,M)  # (N,M)
 print(area1)
 print(area2)
 iou = inter / (area1+area2-inter)
 iou
tensor([3.1906, 4.9471]) tensor([-2.0859, -2.5036,  3.3952])
tensor([[3.1906, 3.1906, 3.1906],
        [4.9471, 4.9471, 4.9471]])
tensor([[-2.0859, -2.5036,  3.3952],
        [-2.0859, -2.5036,  3.3952]])
tensor([[0.0382, 0.0132, 0.0000],
        [0.1444, 0.0661, 0.0164]])

1.1 GIOU

在这里插入图片描述

def generalized_box_iou(boxes1, boxes2):
    """
    Generalized IoU from https://giou.stanford.edu/

    The boxes should be in [x0, y0, x1, y1] format

    Returns a [N, M] pairwise matrix, where N = len(boxes1)
    and M = len(boxes2)
    """
    # degenerate boxes gives inf / nan results
    # so do an early check
    assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
    assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
    iou, union = box_iou(boxes1, boxes2)

    lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
    rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])

    wh = (rb - lt).clamp(min=0)  # [N,M,2]
    area = wh[:, :, 0] * wh[:, :, 1]

    return iou - (area - union) / area

2. nms

2.1 vanilla nms

参考:知乎,很棒有源码实现,适合理解。
基本步骤
(1)将所有检出的output bbox按cls score划分(如文本检测仅包含文1类,即将output bbox按照其对应的cls score划分为2个集合,1个为bg类,bg类不需要做NMS而已)
(2)在每个集合内根据各个bbox的cls score做降序排列,得到一个降序的list_k
(3)从list_k中top1 cls score开始,计算该bbox_x与list中其他bbox_y的IoU,若IoU大于阈值T,则剔除该bbox_y,最终保留bbox_x,从list_k中取出
(4)对剩余的bbox_x,重复step-3中的迭代操作,直至list_k中所有bbox都完成筛选;
(5)对每个集合的list_k,重复step-3、4中的迭代操作,直至所有list_k都完成筛选;

#coding=utf-8
import numpy as np

def py_cpu_nms(dets, thresh):
    """Pure Python NMS baseline."""
    # tl_x,tl_y,br_x,br_y及score
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
    scores = dets[:, 4]
    print('areas:', scores)

    #计算每个检测框的面积,并对目标检测得分进行降序排序
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    print(areas)
    order = scores.argsort()[::-1]
    print('order:', order)
    keep = []   #保留框的结果集合
    print('-'*50)

    while order.size > 0:
        i = order[0]
        print('i:', i)
        keep.append(i)#保留该类剩余box中得分最高的一个
        # 计算最高得分矩形框与剩余矩形框的相交区域
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])
        #print(xx1.shape)
        print(xx1,yy1,xx2,yy2)
       #计算相交的面积,不重叠时面积为0
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        print('w, h: ', w, h)
        inter = w * h
        print('inter: ', inter)
        #计算IoU:重叠面积 /(面积1+面积2-重叠面积)
        ovr = inter / (areas[i] + areas[order[1:]] - inter) # 按照order[0]以后的顺序来排areas
        print('areas[order[1:]]:', areas[order[1:]])
        print('ovr:', ovr)
        #保留IoU小于阈值的box
        inds = np.where(ovr <= thresh)[0]
        print('inds:', inds)
        order = order[inds + 1]   #注意这里索引加了1,因为ovr数组的长度比order数组的长度少一个
        print('inds + 1:', inds + 1)
        print('order:', order)
        print('keep:', keep)
        print('-'*50)

    return keep
if __name__ == '__main__':
    dets = np.array([[100,120,170,200,0.98],
                     [20,40,80,90,0.99],
                     [20,38,82,88,0.96],
                     [200,380,282,488,0.9],
                     [19,38,75,91, 0.8]])

    print(py_cpu_nms(dets, 0.5))


OUTPUTs:


#coding=utf-8
import numpy as np

def py_cpu_nms(dets, thresh):
    """Pure Python NMS baseline."""
    # tl_x,tl_y,br_x,br_y及score
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]return keep
if __name__ == '__main__':
    dets = np.array([[100,120,170,200,0.98],
                     [20,40,80,90,0.99],
                     [20,38,82,88,0.96],
                     [200,380,282,488,0.9],
                     [19,38,75,91, 0.8]])

    print(py_cpu_nms(dets, 0.5))
areas: [0.98 0.99 0.96 0.9  0.8 ]
[5751. 3111. 3213. 9047. 3078.]
order: [1 0 2 3 4]
--------------------------------------------------
i: 1
[100.  20. 200.  20.] [120.  40. 380.  40.] [80. 80. 80. 75.] [90. 88. 90. 90.]
w, h:  [ 0. 61.  0. 56.] [ 0. 49.  0. 51.]
inter:  [   0. 2989.    0. 2856.]
areas[order[1:]]: [5751. 3213. 9047. 3078.]
ovr: [0.         0.89625187 0.         0.85688569]
inds: [0 2]
inds + 1: [1 3]
order: [0 3]
keep: [1]
--------------------------------------------------
i: 0
[200.] [380.] [170.] [200.]
w, h:  [0.] [0.]
inter:  [0.]
areas[order[1:]]: [9047.]
ovr: [0.]
inds: [0]
inds + 1: [1]
order: [3]
keep: [1, 0]
--------------------------------------------------
i: 3
[] [] [] []
w, h:  [] []
inter:  []
areas[order[1:]]: []
ovr: []
inds: []
inds + 1: []
order: []
keep: [1, 0, 3]
--------------------------------------------------
[1, 0, 3]

2.2. soft NMS

论文:http://cn.arxiv.org/pdf/1704.04503v2
转自:知乎
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

def py_cpu_softnms(dets, sc, Nt=0.3, sigma=0.5, thresh=0.05, method=2):
    """
    py_cpu_softnms
    :param dets:   boexs 坐标矩阵 format [y1, x1, y2, x2]
    :param sc:     每个 boxes 对应的分数
    :param Nt:     iou 交叠门限
    :param sigma:  使用 gaussian 函数的方差
    :param thresh: 最后的分数门限
    :param method: 使用的方法
    :return:       留下的 boxes 的 index
    """

    # indexes concatenate boxes with the last column
    N = dets.shape[0]
    indexes = np.array([np.arange(N)])
    dets = np.concatenate((dets, indexes.T), axis=1)

    print(N, indexes, dets)
    # the order of boxes coordinate is [y1,x1,y2,x2]
    y1 = dets[:, 0]
    x1 = dets[:, 1]
    y2 = dets[:, 2]
    x2 = dets[:, 3]
    scores = sc
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
    print('scores:', scores)
    print('areas:', areas)
    print('-'*50)

    for i in range(N):
        # intermediate parameters for later parameters exchange
        tBD = dets[i, :].copy()
        tscore = scores[i].copy()
        tarea = areas[i].copy()
        pos = i + 1
        print('pos:', pos)
        if i != N-1:
            maxscore = np.max(scores[pos:], axis=0)
            maxpos = np.argmax(scores[pos:], axis=0)
            print('maxscore, maxpos:', maxscore, maxpos)

        else:
            maxscore = scores[-1]
            maxpos = 0
        # 找到最大的score 置换
        if tscore < maxscore:
            dets[i, :] = dets[maxpos + i + 1, :]
            dets[maxpos + i + 1, :] = tBD
            tBD = dets[i, :]

            scores[i] = scores[maxpos + i + 1]
            scores[maxpos + i + 1] = tscore
            tscore = scores[i]

            areas[i] = areas[maxpos + i + 1]
            areas[maxpos + i + 1] = tarea
            tarea = areas[i]
            print('tBD', tBD)
            print('tarea', tarea)
        # IoU calculate
        xx1 = np.maximum(dets[i, 0], dets[pos:, 0])
        yy1 = np.maximum(dets[i, 1], dets[pos:, 1])
        xx2 = np.minimum(dets[i, 2], dets[pos:, 2])
        yy2 = np.minimum(dets[i, 3], dets[pos:, 3])
        print('xx1, yy1, xx2, yy2:',xx1,yy1,xx2,yy2)
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        inter = w * h
        print('inter',inter)
        ovr = inter / (areas[i] + areas[pos:] - inter)
        print('ovr:', ovr)
        # Three methods: 1.linear 2.gaussian 3.original NMS
        if method == 1:  # linear
            weight = np.ones(ovr.shape)
            weight[ovr > Nt] = weight[ovr > Nt] - ovr[ovr > Nt]
        elif method == 2:  # gaussian
            print("gaussian:")
            weight = np.exp(-(ovr * ovr) / sigma)
            print('weight:', weight)
        else:  # original NMS  method == 0 如果使用这个效果也是一样的
            weight = np.ones(ovr.shape)
            weight[ovr > Nt] = 0
        scores[pos:] = weight * scores[pos:]
        print('scrores', scores[pos:])
        print('-'*50)
    # select the boxes and keep the corresponding indexes
    # if box score falls below threshold, discard the box by swapping with last box
    # 注意!!  threshold 和 nt是不一样的 所以才能消除
    inds = dets[:, 4][scores > thresh]
    print(scores)
    keep = inds.astype(int)
    return keep

if __name__ == '__main__':
    dets = np.array([[100,120,170,200],
                     [20,40,80,90],
                     [20,38,82,88],
                     [200,380,282,488],
                     [19,38,75,91]])
    scores = np.array([0.98, 0.99, 0.96, 0.9, 0.8])
    print(py_cpu_softnms(dets, scores))

OUTPUTS:

5 [[0 1 2 3 4]] [[100 120 170 200   0]
 [ 20  40  80  90   1]
 [ 20  38  82  88   2]
 [200 380 282 488   3]
 [ 19  38  75  91   4]]
scores: [0.98 0.99 0.96 0.9  0.8 ]
areas: [5751 3111 3213 9047 3078]
--------------------------------------------------
pos: 1
maxscore, maxpos: 0.99 0
tBD [20 40 80 90  1]
tarea 3111
xx1, yy1, xx2, yy2: [100  20 200  20] [120  40 380  40] [80 80 80 75] [90 88 90 90]
inter [   0. 2989.    0. 2856.]
ovr: [0.         0.89625187 0.         0.85688569]
gaussian:
weight: [1.         0.20058146 1.         0.23026921]
scrores [0.98       0.1925582  0.9        0.18421537]
--------------------------------------------------
pos: 2
maxscore, maxpos: 0.9 1
xx1, yy1, xx2, yy2: [100 200 100] [120 380 120] [ 82 170  75] [ 88 200  91]
inter [0. 0. 0.]
ovr: [0. 0. 0.]
gaussian:
weight: [1. 1. 1.]
scrores [0.1925582  0.9        0.18421537]
--------------------------------------------------
pos: 3
maxscore, maxpos: 0.9 0
tBD [200 380 282 488   3]
tarea 9047
xx1, yy1, xx2, yy2: [200 200] [380 380] [82 75] [88 91]
inter [0. 0.]
ovr: [0. 0.]
gaussian:
weight: [1. 1.]
scrores [0.1925582  0.18421537]
--------------------------------------------------
pos: 4
maxscore, maxpos: 0.1842153697915767 0
xx1, yy1, xx2, yy2: [20] [38] [75] [88]
inter [2856.]
ovr: [0.83144105]
gaussian:
weight: [0.2509282]
scrores [0.04622483]
--------------------------------------------------
pos: 5
xx1, yy1, xx2, yy2: [] [] [] []
inter []
ovr: []
gaussian:
weight: []
scrores []
--------------------------------------------------
[0.99       0.98       0.9        0.1925582  0.04622483]
[1 0 3 2]

2.3 fast nms

copy 一波师姐的~ 博客地址,其中通过上三角来过滤不大于最大scores的其他box的讲解很形象。
fast nms来自于 yalact。github链接:https://github.com/dbolya/yolact

def fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, top_k:int=200, second_threshold:bool=False):
        '''
        boxes:  torch.Size([num_dets, 4])
        masks:  torch.Size([num_dets, 32])
        scores: torch.Size([num_classes, num_dets])
        '''
        # step1: 每一类的框按照scores降序排序后取前top_k个
        scores, idx = scores.sort(1, descending=True) 
        # scores为降序排列 
        # idx为原顺序的索引 
        idx = idx[:, :top_k].contiguous() # 取前top_k个框 
        scores = scores[:, :top_k] 
        num_classes, num_dets = idx.size()

        boxes = boxes[idx.view(-1), :].view(num_classes, num_dets, 4) # torch.Size([num_classes, num_dets, 4])
        masks = masks[idx.view(-1), :].view(num_classes, num_dets, -1) # torch.Size([num_classes, num_dets, 32]) 其中32为生成的系数个数
        # step2: 计算每一类中,box与box之间的IoU
        iou = jaccard(boxes, boxes) # torch.Size([num_classes, num_dets, num_dets])
        iou.triu_(diagonal=1) # triu_()取上三角 tril_()取下三角 此处将矩阵的下三角和对角线元素删去
        iou_max, _ = iou.max(dim=1) # 按列取大值 torch.Size([num_classes, num_dets])

        # 过滤掉iou大于阈值的框 
        keep = (iou_max <= iou_threshold) # torch.Size([num_classes, num_dets])

        if second_threshold: # 保证保留的框满足一定的置信度
            keep *= (scores > self.conf_thresh)

        # Assign each kept detection to its corresponding class
        classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep)
        '''
        tensor([[ 0,  0,  0,  ...,  0,  0,  0],
        [ 1,  1,  1,  ...,  1,  1,  1],
        [ 2,  2,  2,  ...,  2,  2,  2],
        ...,
        [77, 77, 77,  ..., 77, 77, 77],
        [78, 78, 78,  ..., 78, 78, 78],
        [79, 79, 79,  ..., 79, 79, 79]])
        '''
        classes = classes[keep]
        boxes = boxes[keep]
        masks = masks[keep]
        scores = scores[keep]
        # Only keep the top cfg.max_num_detections highest scores across all classes
        scores, idx = scores.sort(0, descending=True)
        idx = idx[:cfg.max_num_detections]
        scores = scores[:cfg.max_num_detections]
        classes = classes[idx]e
        boxes = boxes[idx]
        masks = masks[idx]
        return boxes, masks, classes, scores # torch.Size([max_num_detections])

2.4 matrix nms

matrix nms来自于SOLOV2,github链接:https://github.com/WXinlong/SOLO/
matrix nms可以看成 fast nms 与 soft nms的结合版

def matrix_nms(seg_masks, cate_labels, cate_scores, kernel='gaussian', sigma=2.0, sum_masks=None):
    """Matrix NMS for multi-class masks.

    Args:
        seg_masks (Tensor): shape (n, h, w) bool
        cate_labels (Tensor): shape (n), mask labels in descending order
        cate_scores (Tensor): shape (n), mask scores in descending order
        kernel (str):  'linear' or 'gauss' 
        sigma (float): std in gaussian method
        sum_masks (Tensor): The sum of seg_masks

    Returns:
        Tensor: cate_scores_update, tensors of shape (n)
    """
    pdb.set_trace()
    n_samples = len(cate_labels) # 最多 500
    if n_samples == 0:
        return []
    if sum_masks is None:
        sum_masks = seg_masks.sum((1, 2)).float()
    seg_masks = seg_masks.reshape(n_samples, -1).float() # [500, 60800]
    # inter.   注: 矩阵相乘就表示了每一个channel上某一个实例的掩码所在所在位置上的值(1or0)与其他通道的mask所在位置的值相乘(如果他们位置不同,那么就必定是为0的,不能仅仅考虑类别相同!就算相同类别,如果位置不同,那么他们inter也是0,如果位置相同,就涉及到了NMS筛选的范畴)
    inter_matrix = torch.mm(seg_masks, seg_masks.transpose(1, 0)) # [500 , 60800] @ [60800 , 500] = [500, 500]  
    # union.
    sum_masks_x = sum_masks.expand(n_samples, n_samples) # [500, 500]
    # iou.
    iou_matrix = (inter_matrix / (sum_masks_x + sum_masks_x.transpose(1, 0) - inter_matrix)).triu(diagonal=1)
    # label_specific matrix.
    cate_labels_x = cate_labels.expand(n_samples, n_samples) # [500, 500]
    label_matrix = (cate_labels_x == cate_labels_x.transpose(1, 0)).float().triu(diagonal=1) # [500, 500]   每i行的元素(1 or 0)表示和第i个mask类别一样的,并且分数比他低的的mask(triu方法的妙用)

    # IoU compensation
    # fast nms类比。iou_matrix * label_matrix是为了保留同一种label的iou,(因为之前算的iou的inter部分有可能一大一小的实例,但是他们位置上有重叠,因此还有iou并不等于0,要进行惩罚)
    # 而消除不同label的iou(因为nms就是对同一个类别的scores高低的mask/box进行筛选最后剩下一个)
    compensate_iou, _ = (iou_matrix * label_matrix).max(0)  # 按列取最大值,第i列表示第i个mask与其他同种mask的scores比它小的最大的iou值
    compensate_iou = compensate_iou.expand(n_samples, n_samples).transpose(1, 0)

    # IoU decay 
    decay_iou = iou_matrix * label_matrix

    # matrix nms
    if kernel == 'gaussian': 
        decay_matrix = torch.exp(-1 * sigma * (decay_iou ** 2)) # 注意这里的simga是2,原文的0.5,所以是倒数的关系,不仔细看有点坑。
        compensate_matrix = torch.exp(-1 * sigma * (compensate_iou ** 2)) 
        decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0) # 缩减的最小
    elif kernel == 'linear':
        decay_matrix = (1-decay_iou)/(1-compensate_iou)
        decay_coefficient, _ = decay_matrix.min(0)
    else:
        raise NotImplementedError

    # update the score.
    cate_scores_update = cate_scores * decay_coefficient #  soft-nms的方法 让相同的label但是scores低与max的变小。
    pdb.set_trace()
    return cate_scores_update

猜你喜欢

转载自blog.csdn.net/weixin_43823854/article/details/109602236