文章目录
- 前言
- 一、命令
-
-
- 预测单张图的命令:
- python v8_seg_onnx.py --path 1.jpg --save_path 2.jpg --imgsz 640 --weight best.onnx --device cpu --show_time --save_masks --save_box
- 预测文件夹的命令:
- python v8_seg_onnx.py --path val_dir_name --save_path results_dir_name --imgsz 640 --weight best.onnx --device cpu --show_time --save_masks --save_box
- 注:具体解释参考代码的help和参数示例,除带☆的必要参数外,保存结果的方式save_masks和save_box必须任选其一,否则没有输出结果
-
- 二、代码
- 总结
前言
原创,创作不易请勿抄袭,如需引用请标明出处,有任何问题欢迎评论区或私聊,感谢!
一、命令
预测单张图的命令:
python v8_seg_onnx.py --path 1.jpg --save_path 2.jpg --imgsz 640 --weight best.onnx --device cpu --show_time --save_masks --save_box
预测文件夹的命令:
python v8_seg_onnx.py --path val_dir_name --save_path results_dir_name --imgsz 640 --weight best.onnx --device cpu --show_time --save_masks --save_box
参数解释(必须参数前面带☆):
☆ path:预测图片路径或存储图片的文件夹路径
☆ save_path:预测结果保存路径,如果预测图片的路径是目录,这里也要指定目录,并且目录已存在
☆weight:onnx模型路径,此代码只支持onnx的模型
save_masks:预测结果保存掩码图(默认不保存)
save_box:预测结果保存矩形框(默认不保存)
imgsz:模型输入图像大小(默认640)
device:cpu或者gpu(默认cpu)
show_time:显示前处理、预测、后处理的时间(默认不显示)
conf_thres:预测阈值(默认0.25)
iou_thres:IOU阈值(默认0.7)
注:具体解释参考代码的help和参数示例,除带☆的必要参数外,保存结果的方式save_masks和save_box必须任选其一,否则没有输出结果
二、代码
import argparse
import cv2
import math
import numpy as np
from numpy import array
import onnxruntime as rt
import os
import time
from tqdm import tqdm
class LetterBox:
"""
调整图像大小和填充
"""
def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, stride=32):
self.new_shape = new_shape
self.auto = auto
self.scaleFill = scaleFill
self.scaleup = scaleup
self.stride = stride
def __call__(self, labels=None, image=None):
if labels is None:
labels = {
}
img = labels.get('img') if image is None else image
shape = img.shape[:2] # current shape [height, width]
new_shape = labels.pop('rect_shape', self.new_shape)
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# only scale down, do not scale up (for better val mAP)
if not self.scaleup:
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - \
new_unpad[1] # wh padding
if self.auto: # minimum rectangle
dw, dh = np.mod(dw, self.stride), np.mod(
dh, self.stride) # wh padding
elif self.scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / \
shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if labels.get('ratio_pad'):
labels['ratio_pad'] = (labels['ratio_pad'],
(dw, dh)) # for evaluation
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=(114, 114, 114)) # add border
if len(labels):
labels = self._update_labels(labels, ratio, dw, dh)
labels['img'] = img
labels['resized_shape'] = new_shape
return labels
else:
return img
def _update_labels(self, labels, ratio, padw, padh):
"""Update labels"""
labels['instances'].convert_bbox(format='xyxy')
labels['instances'].denormalize(*labels['img'].shape[:2][::-1])
labels['instances'].scale(*ratio)
labels['instances'].add_padding(padw, padh)
return labels
class non_max_suppression:
"""
非极大值抑制
"""
def __init__(self,
conf_thres=0.25,
iou_thres=0.45,
classes=None,
agnostic=False,
multi_label=False,
labels=(),
max_det=300,
nc=0, # number of classes (optional)
max_time_img=0.05,
max_nms=30000,
max_wh=7680,):
self.conf_thres = conf_thres
self.iou_thres = iou_thres
self.classes = classes
self.agnostic = agnostic
self.multi_label = multi_label
self.labels = labels
self.max_det = max_det
self.nc = nc
self.max_time_img = max_time_img
self.max_nms = max_nms
self.max_wh = max_wh
def __call__(self, prediction):
# assert 0 <= self.conf_thres <= 1, f'Invalid Confidence threshold {self.conf_thres}, valid values are between 0.0 and 1.0'
# assert 0 <= self.iou_thres <= 1, f'Invalid IoU {self.iou_thres}, valid values are between 0.0 and 1.0'
# if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
# prediction = prediction[0] # select only inference output
self.prediction = prediction
bs = self.prediction.shape[0] # batch size
nc = self.nc or (self.prediction.shape[1] - 4) # number of classes
nm = self.prediction.shape[1] - nc - 4
mi = 4 + nc # mask start index
xc = self.prediction[:, 4:mi].max(1) > self.conf_thres # candidates
# Settings
# min_wh = 2 # (pixels) minimum box width and height
time_limit = 0.5 + self.max_time_img * bs # seconds to quit after
self.multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
t = time.time()
output = [np.zeros((0, 6 + nm), dtype=np.uint8)] * bs
for xi, x in enumerate(self.prediction): # image index, image inference
x = x.transpose(1, 0)[xc[xi]] # confidence
box, cls, mask = x[:, :4], x[:, 4:nc+4], x[:, -nm:]
# center_x, center_y, width, height) to (x1, y1, x2, y2)
box = self.xywh2xyxy(box)
if self.multi_label:
i, j = (cls > self.conf_thres).nonzero(as_tuple=False).T
x = np.concatenate(
(box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
else: # best class only
conf, j = cls.max(1).reshape(cls.shape[0],1), cls.argmax(1).reshape(cls.shape[0],1)
x = np.concatenate((box, conf, j.astype(np.float64), mask), 1)[
conf.reshape(-1) > self.conf_thres]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
x = x[(-x[:, 4]).argsort()[:self.max_nms]]
# Batched NMS
c = x[:, 5:6] * (0 if self.agnostic else self.max_wh) # classes
# boxes (offset by class), scores
boxes, scores = x[:, :4] + c, x[:, 4]
i = self.numpy_nms(boxes, scores, self.iou_thres) # NMS
i = i[:self.max_det] # limit detections
output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f'WARNING ⚠️ NMS time limit {
time_limit:.3f}s exceeded')
break # time limit exceeded
return output
def xywh2xyxy(self, x):
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
y[..., 2] = x[..., 0] + x[..., 2] / 2 # bottom right x
y[..., 3] = x[..., 1] + x[..., 3] / 2 # bottom right y
return y
def box_area(self, boxes: array):
"""
:param boxes: [N, 4]
:return: [N]
"""
return (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
def box_iou(self, box1: array, box2: array):
"""
:param box1: [N, 4]
:param box2: [M, 4]
:return: [N, M]
"""
area1 = self.box_area(box1) # N
area2 = self.box_area(box2) # M
# broadcasting, 两个数组各维度大小 从后往前对比一致, 或者 有一维度值为1;
lt = np.maximum(box1[:, np.newaxis, :2], box2[:, :2])
rb = np.minimum(box1[:, np.newaxis, 2:], box2[:, 2:])
wh = rb - lt
wh = np.maximum(0, wh) # [N, M, 2]
inter = wh[:, :, 0] * wh[:, :, 1]
iou = inter / (area1[:, np.newaxis] + area2 - inter)
return iou # NxM
def numpy_nms(self, boxes: array, scores: array, iou_threshold: float):
idxs = scores.argsort() # 按分数 降序排列的索引 [N]
keep = []
while idxs.size > 0: # 统计数组中元素的个数
max_score_index = idxs[-1]
max_score_box = boxes[max_score_index][None, :]
keep.append(max_score_index)
if idxs.size == 1:
break
idxs = idxs[:-1] # 将得分最大框 从索引中删除; 剩余索引对应的框 和 得分最大框 计算IoU;
other_boxes = boxes[idxs] # [?, 4]
ious = self.box_iou(max_score_box, other_boxes) # 一个框和其余框比较 1XM
idxs = idxs[ious[0] <= iou_threshold]
keep = np.array(keep)
return keep
class process_mask:
"""
上采样还原掩码大小
"""
def __init__(self, protos, masks_in, bboxes, shape, upsample=False) -> None:
self.protos = protos
self.masks_in = masks_in
self.bboxes = bboxes
self.shape = shape
self.upsample = upsample
def __call__(self, *args, **kwds):
c, mh, mw = self.protos.shape # CHW
ih, iw = self.shape
# print(self.masks_in.shape)
# print(self.protos.shape)
masks = self.sigmoid_function(
(self.masks_in @ self.protos.astype(np.float64).reshape(c, -1))[0]).reshape(-1, mh, mw) # CHW
downsampled_bboxes = self.bboxes.copy()
downsampled_bboxes[:, 0] *= mw / iw
downsampled_bboxes[:, 2] *= mw / iw
downsampled_bboxes[:, 3] *= mh / ih
downsampled_bboxes[:, 1] *= mh / ih
masks = self.crop_mask(masks, downsampled_bboxes) # CHW
if self.upsample:
masks = (masks*255).astype(np.uint8)
masks = masks.transpose(1, 2, 0)
masks = cv2.resize(masks, kwds['size'])
masks[masks <= (255*0.5)] = 0.0
masks[masks > (255*0.5)] = 1.0
return masks
def sigmoid_function(self, z):
fz = []
for num in z:
fz.append(1/(1 + math.exp(-num)))
return np.array(fz)
def crop_mask(self, masks, boxes):
n, h, w = masks.shape
x1, y1, x2, y2 = np.array_split(
boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
r = np.array(range(w), dtype=np.float64).reshape(
1, 1, -1) # rows shape(1,w,1)
c = np.array(range(h), dtype=np.float64).reshape(
1, -1, 1) # cols shape(h,1,1)
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
class Segmentation_inference:
def __init__(self, model_path, device) -> None:
providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if device != 'cpu' else [
'CPUExecutionProvider']
self.sess = rt.InferenceSession(model_path, providers=providers)
self.input_name = self.sess.get_inputs()[0].name
self.out_name = [output.name for output in self.sess.get_outputs()]
def __call__(self, *args, **kwds):
self.im = cv2.imdecode(np.fromfile(kwds['path'], dtype=np.int8), 1)
# 前处理
time1 = time.time()
self.im0 = self.img_pretreatment(
im=self.im, size=[kwds['imgsz'], kwds['imgsz']])
time2 = time.time()
# 推理
time3 = time.time()
self.preds = self.sess.run(
self.out_name, {
self.input_name: [self.im0]})
time4 = time.time()
# 后处理
time5 = time.time()
self.masks, self.box_list, self.scores_list = self.img_reprocessing(
preds=self.preds, size=(kwds['imgsz'], kwds['imgsz']), conf_thres=kwds['conf_thres'], iou_thres=kwds['iou_thres'])
if type(self.masks) == type(None):
cv2.imwrite(kwds['save_path'], self.im)
return None
time6 = time.time()
# 打印处理时间及保存
if kwds['show_time']:
print(f'\npretreatment——time:{
(time2-time1)*1000}ms')
print(f'inference——time:{
(time4-time3)*1000}ms')
print(f'reprocessing——time:{
(time6-time5)*1000}ms')
print('-----------------------------')
if kwds['save_masks'] and kwds['save_box']: # 同时保存矩形框和掩码
masks_write = self.masks_write(masks=np.array(
self.masks), im_gpu=self.im0, im_shape=self.im.shape, im=self.im.copy())
# ,self.box_list,self.scores_list))
cv2.imwrite(kwds['save_path'], self.box_write(masks_write.copy()))
elif kwds['save_masks']: # 只保存掩码
cv2.imwrite(kwds['save_path'], self.masks_write(masks=np.array(
self.masks), im_gpu=self.im0, im_shape=self.im.shape, im=self.im.copy()))
elif kwds['save_box']: # 只保存矩形框
# ,self.box_list,self.scores_list))
cv2.imwrite(kwds['save_path'], self.box_write(self.im.copy()))
return True
def img_pretreatment(self, im, size=[640, 640], auto=False, stride=32):
"""
前处理
"""
im1 = LetterBox(size, auto, stride=stride)(image=im)
im2 = im1.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
im3 = np.ascontiguousarray(im2) # contiguous
im4 = im3.astype(np.float32)/255.0
return im4
def img_reprocessing(self, preds, size, conf_thres=0.25, iou_thres=0.7):
"""
后处理
"""
nc=len(eval(self.sess.get_modelmeta().custom_metadata_map['names']))
p = non_max_suppression(
conf_thres=conf_thres, iou_thres=iou_thres,nc=nc)(preds[0])
if len(p[0]) == 0:
return None
proto = self.preds[1][-1] if len(self.preds[1]) == 3 else self.preds[1]
masks_list = []
pred_list = []
scores_list = []
for i, pred in enumerate(p):
masks = process_mask(proto[i], pred[:, 6:], pred[:, :4], np.array(
self.im0).shape[-2:], upsample=True)(size=size) # HWC
pred[:, :4] = self.scale_boxes(
self.im0.shape[1:], pred[:, :4], self.im.shape)
masks_list.append(masks)
pred_list.append(pred[:, :4])
scores_list.append(pred[:, 4])
return masks_list, pred_list, scores_list
def box_write(self, image): # ,box_list,scores_list
"""
画矩形框
"""
box_write = image.copy()
for box, scores in zip(self.box_list[0], self.scores_list[0]):
cx = np.mean([int(box[0]), int(box[2])])
cy = np.mean([int(box[1]), int(box[3])])
box_write = cv2.rectangle(box_write, (int(box[0]), int(box[1])), (int(
box[2]), int(box[3])), color=(0, 0, 255), thickness=2)
mess = '%.2f' % scores
h, w = image.shape[:2]
cv2.putText(box_write, mess, (int(cx), int(cy)),
0, 1e-3 * h, (0, 0, 255), 1 // 2)
return box_write
def masks_write(self, masks, im_gpu, im_shape, im, colors=[[[[0.21961, 0.21961, 1.00000]]]], alpha=0.5, retina_masks=False):
"""
保存结果
"""
# cv2.imshow('main',masks[0]*255)
# cv2.waitKey(-1)
if len(masks.shape) == 3:
masks = masks.reshape(
masks.shape[0], masks.shape[1], masks.shape[2], 1)
else:
huaban = np.zeros((masks.shape[1], masks.shape[2]))
masks.astype(np.bool_)
for i in range(masks.shape[3]):
huaban[masks[0][:, :, i] != 0] = 1
masks = huaban.reshape(
masks.shape[0], masks.shape[1], masks.shape[2], 1)
masks_color = masks * colors * alpha # shape(n,h,w,3)
inv_alph_masks = (1 - masks * alpha).cumprod(0) # shape(n,h,w,1)
mcs = (masks_color * inv_alph_masks).sum(0) * \
2 # mask color summand shape(n,h,w,3)
im_gpu = im_gpu[::-1, :, :]
im_gpu = im_gpu.transpose(1, 2, 0) # shape(h,w,3)
im_gpu = im_gpu * inv_alph_masks[-1] + mcs
im_mask = (im_gpu * 255)
im_mask_np = im_mask
im[:] = im_mask_np if retina_masks else self.scale_image(
im_gpu.shape, im_mask_np, im_shape)
for box, scores in zip(self.box_list[0], self.scores_list[0]): # 画置信度
cx = np.mean([int(box[0]), int(box[2])])
cy = np.mean([int(box[1]), int(box[3])])
mess = '%.2f' % scores
h, w = im.shape[:2]
cv2.putText(im, mess, (int(cx), int(cy)), 0,
1e-3 * h, (0, 0, 255), 1 // 2)
return im
def scale_boxes(self, img1_shape, boxes, img0_shape, ratio_pad=None):
"""
缩放矩形框
"""
if ratio_pad is None: # calculate from img0_shape
# gain = old / new
gain = min(img1_shape[0] / img0_shape[0],
img1_shape[1] / img0_shape[1])
pad = (img1_shape[1] - img0_shape[1] * gain) / \
2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
boxes[..., [0, 2]] -= pad[0] # x padding
boxes[..., [1, 3]] -= pad[1] # y padding
boxes[..., :4] /= gain
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(
0, img0_shape[1]) # x1, x2
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(
0, img0_shape[0]) # y1, y2
return boxes
def scale_image(self, im1_shape, masks, im0_shape, ratio_pad=None):
"""
保存结果时缩放图像
"""
# Rescale coordinates (xyxy) from im1_shape to im0_shape
if ratio_pad is None: # calculate from im0_shape
# gain = old / new
gain = min(im1_shape[0] / im0_shape[0],
im1_shape[1] / im0_shape[1])
pad = (im1_shape[1] - im0_shape[1] * gain) / \
2, (im1_shape[0] - im0_shape[0] * gain) / 2 # wh padding
else:
pad = ratio_pad[1]
top, left = int(pad[1]), int(pad[0]) # y, x
bottom, right = int(im1_shape[0] - pad[1]), int(im1_shape[1] - pad[0])
if len(masks.shape) < 2:
raise ValueError(
f'"len of masks shape" should be 2 or 3, but got {
len(masks.shape)}')
masks = masks[top:bottom, left:right]
masks = cv2.resize(masks, (im0_shape[1], im0_shape[0]))
if len(masks.shape) == 2:
masks = masks[:, :, None]
return masks
def parse_opt(known=False):
parser = argparse.ArgumentParser()
parser.add_argument('--path', type=str,
default='val_dir_name', help='Image Path or Image dir path')
parser.add_argument('--save_path', type=str,
default='results_dir_name', help='result Image save Path or dir path')
parser.add_argument('--weight', type=str,
default='best.onnx', help='weights path')
parser.add_argument('--imgsz', type=int,
default=640, help='Input Image Size')
parser.add_argument('--device', type=str, default='cpu',
help='Hardware devices')
parser.add_argument('--save_masks', action='store_true',
default=False, help='save the mask?')
parser.add_argument('--save_box', action='store_true',
default=False, help='save the box?')
parser.add_argument('--show_time', action='store_true',
default=False, help='Output processing time')
parser.add_argument('--conf_thres', type=float,
default=0.25, help='Confidence level threshold')
parser.add_argument('--iou_thres', type=float,
default=0.7, help='IOU threshold')
return parser.parse_args()
def main(opt):
calculate = Segmentation_inference(opt.weight, opt.device)
if not os.path.isdir(opt.path): # 检测单张图片
try:
assert type(calculate(path=opt.path,
save_path=opt.save_path,
save_masks=opt.save_masks,
save_box=opt.save_box,
show_time=opt.show_time,
imgsz=opt.imgsz,
conf_thres=opt.conf_thres,
iou_thres=opt.iou_thres)) != type(None), '没有检测到目标,请适当调低阈值'
except BaseException as f:
print(f)
else: # 检测一个文件夹中的图片
assert os.path.isdir(
opt.save_path), '预测路径为文件夹目录,则保存路径也应该指定为已存在的某个文件夹目录'
for img_path in tqdm(os.listdir(opt.path)):
try:
assert type(calculate(path=os.path.join(opt.path, img_path),
save_path=os.path.join(
opt.save_path, img_path),
save_masks=opt.save_masks,
save_box=opt.save_box,
show_time=opt.show_time,
imgsz=opt.imgsz,
conf_thres=opt.conf_thres,
iou_thres=opt.iou_thres)) != type(None), '没有检测到目标,请适当调低阈值'
except BaseException as f:
print(f)
if '__main__' == __name__:
opt = parse_opt()
main(opt)
总结
人懒,此处省略,有问题欢迎评论区或者私信。。。。。。。.。。.。.。。.。。。。。