文章目录
1. IOU
1.1 IOU
参考:知乎
自己没法直接理解多维度多个box之间的iou,索性一步步写下来。
pytoch 源代码
=
# IOU计算
# 假设box1维度为[N,4] box2维度为[M,4]
def iou(self, box1, box2):
N = box1.size(0)
M = box2.size(0)
lt = torch.max( # 左上角的点 注意:是inter的左上角的点,因此才要算MAX
box1[:, :2].unsqueeze(1).expand(N, M, 2), # [N,2]->[N,1,2]->[N,M,2]
box2[:, :2].unsqueeze(0).expand(N, M, 2), # [M,2]->[1,M,2]->[N,M,2]
)
rb = torch.min( # 右下角的点 注意:是inter的右下角的点,因此才要算MIN
box1[:, 2:].unsqueeze(1).expand(N, M, 2),
box2[:, 2:].unsqueeze(0).expand(N, M, 2),
)
wh = rb - lt # [N,M,2]
wh[wh < 0] = 0 # 两个box没有重叠区域 如果两个box没有重合,那么rb和lt就分别是同一个box的右下角和左上角的坐标,因此rb - lt肯定是小于0的
inter = wh[:,:,0] * wh[:,:,1] # [N,M]
area1 = (box1[:,2]-box1[:,0]) * (box1[:,3]-box1[:,1]) # (N,)
area2 = (box2[:,2]-box2[:,0]) * (box2[:,3]-box2[:,1]) # (M,)
area1 = area1.unsqueeze(1).expand(N,M) # (N,M)
area2 = area2.unsqueeze(0).expand(N,M) # (N,M)
iou = inter / (area1+area2-inter)
return iou
自己实现下
import torch
a = torch.randn((2,4))
b = torch.randn((3,4))
tensor([[-1.3839, -2.1049, 0.0442, 0.1294],
[ 3.6880, 1.6080, -0.0313, 0.2779]])
tensor([[ 0.3556, 0.4686, 1.0932, -2.3597],
[ 1.2610, 0.2251, -0.3971, 1.7352],
[-0.5320, -2.6367, 0.6560, 0.2212]])
print(b)
N = a.size(0)
M = b.size(0)
print(N, M)
tensor([[ 0.3556, 0.4686, 1.0932, -2.3597],
[ 1.2610, 0.2251, -0.3971, 1.7352],
[-0.5320, -2.6367, 0.6560, 0.2212]])
2 3
# a1 sigmoid
a1 = torch.sigmoid(a[:, :2].unsqueeze(1).expand(N, M, 2))
a1
tensor([[[0.2004, 0.1086],
[0.2004, 0.1086],
[0.2004, 0.1086]],
[[0.9756, 0.8331],
[0.9756, 0.8331],
[0.9756, 0.8331]]])
# b1 sigmoid
b1 = torch.sigmoid(b[:, :2].unsqueeze(0).expand(N, M, 2))
b1
tensor([[[0.5880, 0.6150],
[0.7792, 0.5560],
[0.3701, 0.0668]],
[[0.5880, 0.6150],
[0.7792, 0.5560],
[0.3701, 0.0668]]])
lt = torch.max(a1, b1)
因此rb部分也是类似的:
torch.max(a1, b1).size()
a2 = torch.sigmoid(a[:, 2:].unsqueeze(1).expand(N, M, 2))
b2 = torch.sigmoid(b[:, 2:].unsqueeze(0).expand(N, M, 2))
rb = torch.min(a2, b2)
print(lt)
print('-'*50)
print(rb)
wh = lt -rb
print('-'*50)
--------------------------------------------------
lt:
tensor([[[0.5880, 0.6150],
[0.7792, 0.5560],
[0.3701, 0.1086]],
[[0.9756, 0.8331],
[0.9756, 0.8331],
[0.9756, 0.8331]]])
--------------------------------------------------
rb:
tensor([[[0.5110, 0.0863],
[0.4020, 0.5323],
[0.5110, 0.5323]],
[[0.4922, 0.0863],
[0.4020, 0.5690],
[0.4922, 0.5551]]])
--------------------------------------------------
wh:
tensor([[[ 0.0769, 0.5287],
[ 0.3772, 0.0237],
[-0.1410, -0.4237]],
[[ 0.4834, 0.7468],
[ 0.5736, 0.2641],
[ 0.4834, 0.2781]]])
print(wh<0)
wh[wh<0] = 0
# 把 True的对于位置的元素改成0
tensor([[[False, False],
[False, False],
[ True, True]],
[[False, False],
[False, False],
[False, False]]])
tensor([[[0.0769, 0.5287],
[0.3772, 0.0237],
[0.0000, 0.0000]],
[[0.4834, 0.7468],
[0.5736, 0.2641],
[0.4834, 0.2781]]])
inter = wh[:,:,0] * wh[:,:,1] # [N,M]
inter
tensor([[0.0407, 0.0090, 0.0000],
[0.3610, 0.1515, 0.1344]])
area1 = (a[:,2]-a[:,0]) * (a[:,3]-a[:,1])
area2 = (b[:,2]-b[:,0]) * (b[:,3]-b[:,1])
print(area1, area2)
area1 = area1.unsqueeze(1).expand(N,M) # (N,M)
area2 = area2.unsqueeze(0).expand(N,M) # (N,M)
print(area1)
print(area2)
iou = inter / (area1+area2-inter)
iou
tensor([3.1906, 4.9471]) tensor([-2.0859, -2.5036, 3.3952])
tensor([[3.1906, 3.1906, 3.1906],
[4.9471, 4.9471, 4.9471]])
tensor([[-2.0859, -2.5036, 3.3952],
[-2.0859, -2.5036, 3.3952]])
tensor([[0.0382, 0.0132, 0.0000],
[0.1444, 0.0661, 0.0164]])
1.1 GIOU
def generalized_box_iou(boxes1, boxes2):
"""
Generalized IoU from https://giou.stanford.edu/
The boxes should be in [x0, y0, x1, y1] format
Returns a [N, M] pairwise matrix, where N = len(boxes1)
and M = len(boxes2)
"""
# degenerate boxes gives inf / nan results
# so do an early check
assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
iou, union = box_iou(boxes1, boxes2)
lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
wh = (rb - lt).clamp(min=0) # [N,M,2]
area = wh[:, :, 0] * wh[:, :, 1]
return iou - (area - union) / area
2. nms
2.1 vanilla nms
参考:知乎,很棒有源码实现,适合理解。
基本步骤
(1)将所有检出的output bbox按cls score划分(如文本检测仅包含文1类,即将output bbox按照其对应的cls score划分为2个集合,1个为bg类,bg类不需要做NMS而已)
(2)在每个集合内根据各个bbox的cls score做降序排列,得到一个降序的list_k
(3)从list_k中top1 cls score开始,计算该bbox_x与list中其他bbox_y的IoU,若IoU大于阈值T,则剔除该bbox_y,最终保留bbox_x,从list_k中取出
(4)对剩余的bbox_x,重复step-3中的迭代操作,直至list_k中所有bbox都完成筛选;
(5)对每个集合的list_k,重复step-3、4中的迭代操作,直至所有list_k都完成筛选;
#coding=utf-8
import numpy as np
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
# tl_x,tl_y,br_x,br_y及score
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
scores = dets[:, 4]
print('areas:', scores)
#计算每个检测框的面积,并对目标检测得分进行降序排序
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
print(areas)
order = scores.argsort()[::-1]
print('order:', order)
keep = [] #保留框的结果集合
print('-'*50)
while order.size > 0:
i = order[0]
print('i:', i)
keep.append(i)#保留该类剩余box中得分最高的一个
# 计算最高得分矩形框与剩余矩形框的相交区域
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
#print(xx1.shape)
print(xx1,yy1,xx2,yy2)
#计算相交的面积,不重叠时面积为0
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
print('w, h: ', w, h)
inter = w * h
print('inter: ', inter)
#计算IoU:重叠面积 /(面积1+面积2-重叠面积)
ovr = inter / (areas[i] + areas[order[1:]] - inter) # 按照order[0]以后的顺序来排areas
print('areas[order[1:]]:', areas[order[1:]])
print('ovr:', ovr)
#保留IoU小于阈值的box
inds = np.where(ovr <= thresh)[0]
print('inds:', inds)
order = order[inds + 1] #注意这里索引加了1,因为ovr数组的长度比order数组的长度少一个
print('inds + 1:', inds + 1)
print('order:', order)
print('keep:', keep)
print('-'*50)
return keep
if __name__ == '__main__':
dets = np.array([[100,120,170,200,0.98],
[20,40,80,90,0.99],
[20,38,82,88,0.96],
[200,380,282,488,0.9],
[19,38,75,91, 0.8]])
print(py_cpu_nms(dets, 0.5))
OUTPUTs:
#coding=utf-8
import numpy as np
def py_cpu_nms(dets, thresh):
"""Pure Python NMS baseline."""
# tl_x,tl_y,br_x,br_y及score
x1 = dets[:, 0]
y1 = dets[:, 1]
x2 = dets[:, 2]
y2 = dets[:, 3]
…
return keep
if __name__ == '__main__':
dets = np.array([[100,120,170,200,0.98],
[20,40,80,90,0.99],
[20,38,82,88,0.96],
[200,380,282,488,0.9],
[19,38,75,91, 0.8]])
print(py_cpu_nms(dets, 0.5))
areas: [0.98 0.99 0.96 0.9 0.8 ]
[5751. 3111. 3213. 9047. 3078.]
order: [1 0 2 3 4]
--------------------------------------------------
i: 1
[100. 20. 200. 20.] [120. 40. 380. 40.] [80. 80. 80. 75.] [90. 88. 90. 90.]
w, h: [ 0. 61. 0. 56.] [ 0. 49. 0. 51.]
inter: [ 0. 2989. 0. 2856.]
areas[order[1:]]: [5751. 3213. 9047. 3078.]
ovr: [0. 0.89625187 0. 0.85688569]
inds: [0 2]
inds + 1: [1 3]
order: [0 3]
keep: [1]
--------------------------------------------------
i: 0
[200.] [380.] [170.] [200.]
w, h: [0.] [0.]
inter: [0.]
areas[order[1:]]: [9047.]
ovr: [0.]
inds: [0]
inds + 1: [1]
order: [3]
keep: [1, 0]
--------------------------------------------------
i: 3
[] [] [] []
w, h: [] []
inter: []
areas[order[1:]]: []
ovr: []
inds: []
inds + 1: []
order: []
keep: [1, 0, 3]
--------------------------------------------------
[1, 0, 3]
2.2. soft NMS
论文:http://cn.arxiv.org/pdf/1704.04503v2
转自:知乎
def py_cpu_softnms(dets, sc, Nt=0.3, sigma=0.5, thresh=0.05, method=2):
"""
py_cpu_softnms
:param dets: boexs 坐标矩阵 format [y1, x1, y2, x2]
:param sc: 每个 boxes 对应的分数
:param Nt: iou 交叠门限
:param sigma: 使用 gaussian 函数的方差
:param thresh: 最后的分数门限
:param method: 使用的方法
:return: 留下的 boxes 的 index
"""
# indexes concatenate boxes with the last column
N = dets.shape[0]
indexes = np.array([np.arange(N)])
dets = np.concatenate((dets, indexes.T), axis=1)
print(N, indexes, dets)
# the order of boxes coordinate is [y1,x1,y2,x2]
y1 = dets[:, 0]
x1 = dets[:, 1]
y2 = dets[:, 2]
x2 = dets[:, 3]
scores = sc
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
print('scores:', scores)
print('areas:', areas)
print('-'*50)
for i in range(N):
# intermediate parameters for later parameters exchange
tBD = dets[i, :].copy()
tscore = scores[i].copy()
tarea = areas[i].copy()
pos = i + 1
print('pos:', pos)
if i != N-1:
maxscore = np.max(scores[pos:], axis=0)
maxpos = np.argmax(scores[pos:], axis=0)
print('maxscore, maxpos:', maxscore, maxpos)
else:
maxscore = scores[-1]
maxpos = 0
# 找到最大的score 置换
if tscore < maxscore:
dets[i, :] = dets[maxpos + i + 1, :]
dets[maxpos + i + 1, :] = tBD
tBD = dets[i, :]
scores[i] = scores[maxpos + i + 1]
scores[maxpos + i + 1] = tscore
tscore = scores[i]
areas[i] = areas[maxpos + i + 1]
areas[maxpos + i + 1] = tarea
tarea = areas[i]
print('tBD', tBD)
print('tarea', tarea)
# IoU calculate
xx1 = np.maximum(dets[i, 0], dets[pos:, 0])
yy1 = np.maximum(dets[i, 1], dets[pos:, 1])
xx2 = np.minimum(dets[i, 2], dets[pos:, 2])
yy2 = np.minimum(dets[i, 3], dets[pos:, 3])
print('xx1, yy1, xx2, yy2:',xx1,yy1,xx2,yy2)
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
print('inter',inter)
ovr = inter / (areas[i] + areas[pos:] - inter)
print('ovr:', ovr)
# Three methods: 1.linear 2.gaussian 3.original NMS
if method == 1: # linear
weight = np.ones(ovr.shape)
weight[ovr > Nt] = weight[ovr > Nt] - ovr[ovr > Nt]
elif method == 2: # gaussian
print("gaussian:")
weight = np.exp(-(ovr * ovr) / sigma)
print('weight:', weight)
else: # original NMS method == 0 如果使用这个效果也是一样的
weight = np.ones(ovr.shape)
weight[ovr > Nt] = 0
scores[pos:] = weight * scores[pos:]
print('scrores', scores[pos:])
print('-'*50)
# select the boxes and keep the corresponding indexes
# if box score falls below threshold, discard the box by swapping with last box
# 注意!! threshold 和 nt是不一样的 所以才能消除
inds = dets[:, 4][scores > thresh]
print(scores)
keep = inds.astype(int)
return keep
if __name__ == '__main__':
dets = np.array([[100,120,170,200],
[20,40,80,90],
[20,38,82,88],
[200,380,282,488],
[19,38,75,91]])
scores = np.array([0.98, 0.99, 0.96, 0.9, 0.8])
print(py_cpu_softnms(dets, scores))
OUTPUTS:
5 [[0 1 2 3 4]] [[100 120 170 200 0]
[ 20 40 80 90 1]
[ 20 38 82 88 2]
[200 380 282 488 3]
[ 19 38 75 91 4]]
scores: [0.98 0.99 0.96 0.9 0.8 ]
areas: [5751 3111 3213 9047 3078]
--------------------------------------------------
pos: 1
maxscore, maxpos: 0.99 0
tBD [20 40 80 90 1]
tarea 3111
xx1, yy1, xx2, yy2: [100 20 200 20] [120 40 380 40] [80 80 80 75] [90 88 90 90]
inter [ 0. 2989. 0. 2856.]
ovr: [0. 0.89625187 0. 0.85688569]
gaussian:
weight: [1. 0.20058146 1. 0.23026921]
scrores [0.98 0.1925582 0.9 0.18421537]
--------------------------------------------------
pos: 2
maxscore, maxpos: 0.9 1
xx1, yy1, xx2, yy2: [100 200 100] [120 380 120] [ 82 170 75] [ 88 200 91]
inter [0. 0. 0.]
ovr: [0. 0. 0.]
gaussian:
weight: [1. 1. 1.]
scrores [0.1925582 0.9 0.18421537]
--------------------------------------------------
pos: 3
maxscore, maxpos: 0.9 0
tBD [200 380 282 488 3]
tarea 9047
xx1, yy1, xx2, yy2: [200 200] [380 380] [82 75] [88 91]
inter [0. 0.]
ovr: [0. 0.]
gaussian:
weight: [1. 1.]
scrores [0.1925582 0.18421537]
--------------------------------------------------
pos: 4
maxscore, maxpos: 0.1842153697915767 0
xx1, yy1, xx2, yy2: [20] [38] [75] [88]
inter [2856.]
ovr: [0.83144105]
gaussian:
weight: [0.2509282]
scrores [0.04622483]
--------------------------------------------------
pos: 5
xx1, yy1, xx2, yy2: [] [] [] []
inter []
ovr: []
gaussian:
weight: []
scrores []
--------------------------------------------------
[0.99 0.98 0.9 0.1925582 0.04622483]
[1 0 3 2]
2.3 fast nms
copy 一波师姐的~ 博客地址,其中通过上三角来过滤不大于最大scores的其他box的讲解很形象。
fast nms来自于 yalact。github链接:https://github.com/dbolya/yolact
def fast_nms(self, boxes, masks, scores, iou_threshold:float=0.5, top_k:int=200, second_threshold:bool=False):
'''
boxes: torch.Size([num_dets, 4])
masks: torch.Size([num_dets, 32])
scores: torch.Size([num_classes, num_dets])
'''
# step1: 每一类的框按照scores降序排序后取前top_k个
scores, idx = scores.sort(1, descending=True)
# scores为降序排列
# idx为原顺序的索引
idx = idx[:, :top_k].contiguous() # 取前top_k个框
scores = scores[:, :top_k]
num_classes, num_dets = idx.size()
boxes = boxes[idx.view(-1), :].view(num_classes, num_dets, 4) # torch.Size([num_classes, num_dets, 4])
masks = masks[idx.view(-1), :].view(num_classes, num_dets, -1) # torch.Size([num_classes, num_dets, 32]) 其中32为生成的系数个数
# step2: 计算每一类中,box与box之间的IoU
iou = jaccard(boxes, boxes) # torch.Size([num_classes, num_dets, num_dets])
iou.triu_(diagonal=1) # triu_()取上三角 tril_()取下三角 此处将矩阵的下三角和对角线元素删去
iou_max, _ = iou.max(dim=1) # 按列取大值 torch.Size([num_classes, num_dets])
# 过滤掉iou大于阈值的框
keep = (iou_max <= iou_threshold) # torch.Size([num_classes, num_dets])
if second_threshold: # 保证保留的框满足一定的置信度
keep *= (scores > self.conf_thresh)
# Assign each kept detection to its corresponding class
classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep)
'''
tensor([[ 0, 0, 0, ..., 0, 0, 0],
[ 1, 1, 1, ..., 1, 1, 1],
[ 2, 2, 2, ..., 2, 2, 2],
...,
[77, 77, 77, ..., 77, 77, 77],
[78, 78, 78, ..., 78, 78, 78],
[79, 79, 79, ..., 79, 79, 79]])
'''
classes = classes[keep]
boxes = boxes[keep]
masks = masks[keep]
scores = scores[keep]
# Only keep the top cfg.max_num_detections highest scores across all classes
scores, idx = scores.sort(0, descending=True)
idx = idx[:cfg.max_num_detections]
scores = scores[:cfg.max_num_detections]
classes = classes[idx]e
boxes = boxes[idx]
masks = masks[idx]
return boxes, masks, classes, scores # torch.Size([max_num_detections])
2.4 matrix nms
matrix nms来自于SOLOV2,github链接:https://github.com/WXinlong/SOLO/
matrix nms可以看成 fast nms 与 soft nms的结合版
def matrix_nms(seg_masks, cate_labels, cate_scores, kernel='gaussian', sigma=2.0, sum_masks=None):
"""Matrix NMS for multi-class masks.
Args:
seg_masks (Tensor): shape (n, h, w) bool
cate_labels (Tensor): shape (n), mask labels in descending order
cate_scores (Tensor): shape (n), mask scores in descending order
kernel (str): 'linear' or 'gauss'
sigma (float): std in gaussian method
sum_masks (Tensor): The sum of seg_masks
Returns:
Tensor: cate_scores_update, tensors of shape (n)
"""
pdb.set_trace()
n_samples = len(cate_labels) # 最多 500
if n_samples == 0:
return []
if sum_masks is None:
sum_masks = seg_masks.sum((1, 2)).float()
seg_masks = seg_masks.reshape(n_samples, -1).float() # [500, 60800]
# inter. 注: 矩阵相乘就表示了每一个channel上某一个实例的掩码所在所在位置上的值(1or0)与其他通道的mask所在位置的值相乘(如果他们位置不同,那么就必定是为0的,不能仅仅考虑类别相同!就算相同类别,如果位置不同,那么他们inter也是0,如果位置相同,就涉及到了NMS筛选的范畴)
inter_matrix = torch.mm(seg_masks, seg_masks.transpose(1, 0)) # [500 , 60800] @ [60800 , 500] = [500, 500]
# union.
sum_masks_x = sum_masks.expand(n_samples, n_samples) # [500, 500]
# iou.
iou_matrix = (inter_matrix / (sum_masks_x + sum_masks_x.transpose(1, 0) - inter_matrix)).triu(diagonal=1)
# label_specific matrix.
cate_labels_x = cate_labels.expand(n_samples, n_samples) # [500, 500]
label_matrix = (cate_labels_x == cate_labels_x.transpose(1, 0)).float().triu(diagonal=1) # [500, 500] 每i行的元素(1 or 0)表示和第i个mask类别一样的,并且分数比他低的的mask(triu方法的妙用)
# IoU compensation
# fast nms类比。iou_matrix * label_matrix是为了保留同一种label的iou,(因为之前算的iou的inter部分有可能一大一小的实例,但是他们位置上有重叠,因此还有iou并不等于0,要进行惩罚)
# 而消除不同label的iou(因为nms就是对同一个类别的scores高低的mask/box进行筛选最后剩下一个)
compensate_iou, _ = (iou_matrix * label_matrix).max(0) # 按列取最大值,第i列表示第i个mask与其他同种mask的scores比它小的最大的iou值
compensate_iou = compensate_iou.expand(n_samples, n_samples).transpose(1, 0)
# IoU decay
decay_iou = iou_matrix * label_matrix
# matrix nms
if kernel == 'gaussian':
decay_matrix = torch.exp(-1 * sigma * (decay_iou ** 2)) # 注意这里的simga是2,原文的0.5,所以是倒数的关系,不仔细看有点坑。
compensate_matrix = torch.exp(-1 * sigma * (compensate_iou ** 2))
decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0) # 缩减的最小
elif kernel == 'linear':
decay_matrix = (1-decay_iou)/(1-compensate_iou)
decay_coefficient, _ = decay_matrix.min(0)
else:
raise NotImplementedError
# update the score.
cate_scores_update = cate_scores * decay_coefficient # soft-nms的方法 让相同的label但是scores低与max的变小。
pdb.set_trace()
return cate_scores_update