Pytorch版本yolov3源码阅读

Pytorch版本yolov3源码阅读

1. 阅读test.py

1.1 参数解读

parser = argparse.ArgumentParser()
parser.add_argument('-batch_size', type=int, default=32, help='size of each image batch')
parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='path to model config file')
parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='path to data config file')
parser.add_argument('-weights_path', type=str, default='checkpoints/yolov3.pt', help='path to weights file')
parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')
parser.add_argument('-iou_thres', type=float, default=0.5, help='iou threshold required to qualify as detected')
parser.add_argument('-conf_thres', type=float, default=0.5, help='object confidence threshold')
parser.add_argument('-nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')
parser.add_argument('-n_cpu', type=int, default=0, help='number of cpu threads to use during batch generation')
parser.add_argument('-img_size', type=int, default=608, help='size of each image dimension')
opt = parser.parse_args()
print(opt)

batch_size: 每个batch大小，跟darknet不太一样，没有subdivision
cfg：网络配置文件
data_config_path: coco.data文件，存储相关信息
weights_path: 权重文件路径
class_path: 类别文件，注意类别的顺序，coco.names
iou_thres: iou阈值
conf_thres: 目标执行度阈值
nms_thres: 非极大抑制阈值
n_cpu: 实用多少个线程来创建batch
img_size: 设置初始图片大小

1.2 data文件解析

def parse_data_config(path):
    """Parses the data configuration file"""
    options = dict()
    options['gpus'] = '0,1'
    options['num_workers'] = '10'
    with open(path, 'r') as fp:
        lines = fp.readlines()
    for line in lines:
        line = line.strip()
        if line == '' or line.startswith('#'):
            continue
        key, value = line.split('=')
        options[key.strip()] = value.strip()
    return options

将data文件中内容存储到options这个dict中，获取的时候就可以对这个对象通过key进行提取value。

1.3 cfg文件解析

def parse_model_config(path):
    """Parses the yolo-v3 layer configuration file and returns module definitions"""
    file = open(path, 'r')
    lines = file.read().split('\n')
    lines = [x for x in lines if x and not x.startswith('#')]
    lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces
    module_defs = []
    for line in lines:
        if line.startswith('['): # This marks the start of a new block
            module_defs.append({})
            module_defs[-1]['type'] = line[1:-1].rstrip()
            if module_defs[-1]['type'] == 'convolutional':
                module_defs[-1]['batch_normalize'] = 0
        else:
            key, value = line.split("=")
            value = value.strip()
            module_defs[-1][key.rstrip()] = value.strip()

    return module_defs

返回的module_defs存储的是所有的网络参数信息，一个list中套了很多个dict.

1.4 根据cfg文件创建模块

def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
    #将第一层内容，也就是网络超参数设定
    hyperparams = module_defs.pop(0)
    
    output_filters = [int(hyperparams['channels'])]

    module_list = nn.ModuleList()
    for i, module_def in enumerate(module_defs):
        #一个时序容器。`Modules` 会以他们传入的顺序被添加到容器中。当然，也可以传入一个`OrderedDict`
        modules = nn.Sequential()
        #根据不同的层进行不同的设计
        if module_def['type'] == 'convolutional':
            bn = int(module_def['batch_normalize'])
            filters = int(module_def['filters'])
            kernel_size = int(module_def['size'])
            pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0
            #将一个 `child module` 添加到当前 `modle`。 被添加的`module`可以通过 `name`属性来获取。
            modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
                                                        out_channels=filters,
                                                        kernel_size=kernel_size,
                                                        stride=int(module_def['stride']),
                                                        padding=pad,
                                                        bias=not bn))
            if bn:
                modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters))
            if module_def['activation'] == 'leaky':
                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))

        elif module_def['type'] == 'upsample':
            # pytorch中的上采样函数
            upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest')
            modules.add_module('upsample_%d' % i, upsample)

        elif module_def['type'] == 'route':
            # 对yolo cfg文件中的route层进行解析
            # eg: route -1, 14
            layers = [int(x) for x in module_def['layers'].split(',')]
            # 将多个层进行以sum的形式合并
            # 这个地方发现与darknet中不同，darknet中是以concate的方式进行的
            filters = sum([output_filters[layer_i] for layer_i in layers])
            modules.add_module('route_%d' % i, EmptyLayer())

        elif module_def['type'] == 'shortcut':
            # eg from yolov3.cfg
            # from=-3
            # activation = linear
            # 未定义activation方式？？？
            filters = output_filters[int(module_def['from'])]
            modules.add_module('shortcut_%d' % i, EmptyLayer())

        elif module_def['type'] == 'yolo':
            anchor_idxs = [int(x) for x in module_def['mask'].split(',')]
            # Extract anchors
            anchors = [float(x) for x in module_def['anchors'].split(',')]
            anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def['classes'])
            img_height = int(hyperparams['height'])
            # Define detection layer
            yolo_layer = YOLOLayer(anchors, num_classes, img_height, anchor_idxs)
            modules.add_module('yolo_%d' % i, yolo_layer)

        # Register module list and number of output filters
        # 将module添加到module_list中进行保存
        module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list

这里开始就涉及到pytorch部分的内容了：

module_list = nn.ModuleList(): 创建一个list，其中存放的是module
nn.Sequential(): 一个时序容器。Modules 会以他们传入的顺序被添加到容器中。当然，也可以传入一个OrderedDict。
add_module(name,module):将一个 child module 添加到当前 modle。被添加的module可以通过 name属性来获取。

1.5 YOLOLayer

class YOLOLayer(nn.Module):

    def __init__(self, anchors, nC, img_dim, anchor_idxs):
        super(YOLOLayer, self).__init__()

        anchors = [(a_w, a_h) for a_w, a_h in anchors]  # (pixels)
        nA = len(anchors)

        self.anchors = anchors
        self.nA = nA  # number of anchors (3)
        self.nC = nC  # number of classes (80)
        self.bbox_attrs = 5 + nC
        self.img_dim = img_dim  # from hyperparams in cfg file, NOT from parser

        if anchor_idxs[0] == (nA * 2):  # 6
            stride = 32
        elif anchor_idxs[0] == nA:  # 3
            stride = 16
        else:
            stride = 8

        # Build anchor grids
        nG = int(self.img_dim / stride)
        self.grid_x = torch.arange(nG).repeat(nG, 1).view([1, 1, nG, nG]).float()
        self.grid_y = torch.arange(nG).repeat(nG, 1).t().view([1, 1, nG, nG]).float()
        self.scaled_anchors = torch.FloatTensor([(a_w / stride, a_h / stride) for a_w, a_h in anchors])
        self.anchor_w = self.scaled_anchors[:, 0:1].view((1, nA, 1, 1))
        self.anchor_h = self.scaled_anchors[:, 1:2].view((1, nA, 1, 1))

    def forward(self, p, targets=None, requestPrecision=False):
        FT = torch.cuda.FloatTensor if p.is_cuda else torch.FloatTensor

        bs = p.shape[0]  # batch size
        nG = p.shape[2]  # number of grid points
        stride = self.img_dim / nG

        if p.is_cuda and not self.grid_x.is_cuda:
            self.grid_x, self.grid_y = self.grid_x.cuda(), self.grid_y.cuda()
            self.anchor_w, self.anchor_h = self.anchor_w.cuda(), self.anchor_h.cuda()

        # p.view(12, 255, 13, 13) -- > (12, 3, 13, 13, 80)  # (bs, anchors, grid, grid, classes + xywh)
        p = p.view(bs, self.nA, self.bbox_attrs, nG, nG).permute(0, 1, 3, 4, 2).contiguous()  # prediction

        # Get outputs
        x = torch.sigmoid(p[..., 0])  # Center x
        y = torch.sigmoid(p[..., 1])  # Center y

        # Width and height (yolo method)
        w = p[..., 2]  # Width
        h = p[..., 3]  # Height
        width = torch.exp(w.data) * self.anchor_w
        height = torch.exp(h.data) * self.anchor_h

        # Width and height (power method)
        # w = torch.sigmoid(p[..., 2])  # Width
        # h = torch.sigmoid(p[..., 3])  # Height
        # width = ((w.data * 2) ** 2) * self.anchor_w
        # height = ((h.data * 2) ** 2) * self.anchor_h

        # Add offset and scale with anchors (in grid space, i.e. 0-13)
        pred_boxes = FT(bs, self.nA, nG, nG, 4)
        pred_conf = p[..., 4]  # Conf
        pred_cls = p[..., 5:]  # Class

        # Training
        if targets is not None:
            MSELoss = nn.MSELoss(size_average=True)
            BCEWithLogitsLoss = nn.BCEWithLogitsLoss(size_average=True)
            CrossEntropyLoss = nn.CrossEntropyLoss()

            if requestPrecision:
                gx = self.grid_x[:, :, :nG, :nG]
                gy = self.grid_y[:, :, :nG, :nG]
                pred_boxes[..., 0] = x.data + gx - width / 2
                pred_boxes[..., 1] = y.data + gy - height / 2
                pred_boxes[..., 2] = x.data + gx + width / 2
                pred_boxes[..., 3] = y.data + gy + height / 2

            tx, ty, tw, th, mask, tcls, TP, FP, FN, TC = \
                build_targets(pred_boxes, pred_conf, pred_cls, targets, self.scaled_anchors, self.nA, self.nC, nG,
                              requestPrecision)
            tcls = tcls[mask]
            if x.is_cuda:
                tx, ty, tw, th, mask, tcls = tx.cuda(), ty.cuda(), tw.cuda(), th.cuda(), mask.cuda(), tcls.cuda()

            # Mask outputs to ignore non-existing objects (but keep confidence predictions)
            nT = sum([len(x) for x in targets])  # number of targets
            nM = mask.sum().float()  # number of anchors (assigned to targets)
            nB = len(targets)  # batch size
            k = nM / nB
            if nM > 0:
                lx = k * MSELoss(x[mask], tx[mask])
                ly = k * MSELoss(y[mask], ty[mask])
                lw = k * MSELoss(w[mask], tw[mask])
                lh = k * MSELoss(h[mask], th[mask])

                # lconf = k * BCEWithLogitsLoss(pred_conf[mask], mask[mask].float())
                lconf = k * BCEWithLogitsLoss(pred_conf, mask.float())

                lcls = k * CrossEntropyLoss(pred_cls[mask], torch.argmax(tcls, 1))
                # lcls = k * BCEWithLogitsLoss(pred_cls[mask], tcls.float())
            else:
                lx, ly, lw, lh, lcls, lconf = FT([0]), FT([0]), FT([0]), FT([0]), FT([0]), FT([0])

            # Add confidence loss for background anchors (noobj)
            #lconf += k * BCEWithLogitsLoss(pred_conf[~mask], mask[~mask].float())

            # Sum loss components
            loss = lx + ly + lw + lh + lconf + lcls

            # Sum False Positives from unassigned anchors
            i = torch.sigmoid(pred_conf[~mask]) > 0.9
            if i.sum() > 0:
                FP_classes = torch.argmax(pred_cls[~mask][i], 1)
                FPe = torch.bincount(FP_classes, minlength=self.nC).float().cpu()  # extra FPs
            else:
                FPe = torch.zeros(self.nC)

            return loss, loss.item(), lx.item(), ly.item(), lw.item(), lh.item(), lconf.item(), lcls.item(), \
                   nT, TP, FP, FPe, FN, TC

        else:
            pred_boxes[..., 0] = x.data + self.grid_x
            pred_boxes[..., 1] = y.data + self.grid_y
            pred_boxes[..., 2] = width
            pred_boxes[..., 3] = height

            # If not in training phase return predictions
            output = torch.cat((pred_boxes.view(bs, -1, 4) * stride,
                                torch.sigmoid(pred_conf.view(bs, -1, 1)), pred_cls.view(bs, -1, self.nC)), -1)
            return output.data

暂且放到这里，之后在做解析

1.6 初始化模型

model = Darknet(opt.cfg, opt.img_size)

转到定义：

class Darknet(nn.Module):
    """YOLOv3 object detection model"""

    def __init__(self, config_path, img_size=416):
        super(Darknet, self).__init__()
        self.module_defs = parse_model_config(config_path)
        self.module_defs[0]['height'] = img_size
        self.hyperparams, self.module_list = create_modules(self.module_defs)
        self.img_size = img_size
        self.loss_names = ['loss', 'x', 'y', 'w', 'h', 'conf', 'cls', 'nT', 'TP', 'FP', 'FPe', 'FN', 'TC']

    def forward(self, x, targets=None, requestPrecision=False):
        is_training = targets is not None
        output = []
        self.losses = defaultdict(float)
        layer_outputs = []

        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
            if module_def['type'] in ['convolutional', 'upsample']:
                x = module(x)
            elif module_def['type'] == 'route':
                layer_i = [int(x) for x in module_def['layers'].split(',')]
                x = torch.cat([layer_outputs[i] for i in layer_i], 1)
            elif module_def['type'] == 'shortcut':
                layer_i = int(module_def['from'])
                x = layer_outputs[-1] + layer_outputs[layer_i]
            elif module_def['type'] == 'yolo':
                # Train phase: get loss
                if is_training:
                    x, *losses = module[0](x, targets, requestPrecision)
                    for name, loss in zip(self.loss_names, losses):
                        self.losses[name] += loss
                # Test phase: Get detections
                else:
                    x = module(x)
                output.append(x)
            layer_outputs.append(x)

        if is_training:
            self.losses['nT'] /= 3
            self.losses['TC'] /= 3
            metrics = torch.zeros(4, len(self.losses['FPe']))  # TP, FP, FN, target_count

            ui = np.unique(self.losses['TC'])[1:]
            for i in ui:
                j = self.losses['TC'] == float(i)
                metrics[0, i] = (self.losses['TP'][j] > 0).sum().float()  # TP
                metrics[1, i] = (self.losses['FP'][j] > 0).sum().float()  # FP
                metrics[2, i] = (self.losses['FN'][j] == 3).sum().float()  # FN
            metrics[3] = metrics.sum(0)
            metrics[1] += self.losses['FPe']

            self.losses['TP'] = metrics[0].sum()
            self.losses['FP'] = metrics[1].sum()
            self.losses['FN'] = metrics[2].sum()
            self.losses['TC'] = 0
            self.losses['metrics'] = metrics

        return sum(output) if is_training else torch.cat(output, 1)

梳理一下属性值，以便更好理解：

module_def: dict类型，存储cfg文件中
hyperparams: 超参数，整个网络需要的参数被存储到改属性中
module_list：整个网络所有的模型加载到pytorch中的nn.ModuleList()
loss_names: 有必要理解一下这里的loss中参数的含义
- loss
- x,y,w,h
- conf
- cls
- nT
- TP,FP,FPe,FN,TC

loss参数含义还不是很明白，留坑，待填坑

1.7 加载权重

都知道，pytorch版的yolov3权重文件是.pt结尾的，darknet版本的yolov3权重文件是.weights结尾的。

所以得知了这个版本可以使用加载weights文件。

# Load weights
if opt.weights_path.endswith('.weights'):  # darknet format
    load_weights(model, opt.weights_path)
elif opt.weights_path.endswith('.pt'):  # pytorch format
    checkpoint = torch.load(opt.weights_path, map_location='cpu')
    model.load_state_dict(checkpoint['model'])
    del checkpoint

1.8 计算mAP

print('Compute mAP...')

correct = 0
targets = None
outputs, mAPs, TP, confidence, pred_class, target_class = [], [], [], [], [], []
for batch_i, (imgs, targets) in enumerate(dataloader):
    imgs = imgs.to(device)

    with torch.no_grad():
        output = model(imgs)
        output = non_max_suppression(output, conf_thres=opt.conf_thres, nms_thres=opt.nms_thres)

    # Compute average precision for each sample
    for sample_i in range(len(targets)):
        correct = []

        # Get labels for sample where width is not zero (dummies)
        annotations = targets[sample_i]
        # Extract detections
        detections = output[sample_i]

        if detections is None:
            # If there are no detections but there are annotations mask as zero AP
            if annotations.size(0) != 0:
                mAPs.append(0)
            continue

        # Get detections sorted by decreasing confidence scores
        detections = detections[np.argsort(-detections[:, 4])]

        # If no annotations add number of detections as incorrect
        if annotations.size(0) == 0:
            target_cls = []
            #correct.extend([0 for _ in range(len(detections))])
            mAPs.append(0)
            continue
        else:
            target_cls = annotations[:, 0]

            # Extract target boxes as (x1, y1, x2, y2)
            target_boxes = xywh2xyxy(annotations[:, 1:5])
            target_boxes *= opt.img_size

            detected = []
            for *pred_bbox, conf, obj_conf, obj_pred in detections:

                pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1)
                # Compute iou with target boxes
                iou = bbox_iou(pred_bbox, target_boxes)
                # Extract index of largest overlap
                best_i = np.argmax(iou)
                # If overlap exceeds threshold and classification is correct mark as correct
                if iou[best_i] > opt.iou_thres and obj_pred == annotations[best_i, 0] and best_i not in detected:
                    correct.append(1)
                    detected.append(best_i)
                else:
                    correct.append(0)

        # Compute Average Precision (AP) per class
        AP = ap_per_class(tp=correct, conf=detections[:, 4], pred_cls=detections[:, 6], target_cls=target_cls)

        # Compute mean AP for this image
        mAP = AP.mean()

        # Append image mAP to list
        mAPs.append(mAP)

        # Print image mAP and running mean mAP
        print('+ Sample [%d/%d] AP: %.4f (%.4f)' % (len(mAPs), len(dataloader) * opt.batch_size, mAP, np.mean(mAPs)))

print('Mean Average Precision: %.4f' % np.mean(mAPs))

留坑，待填

2. 阅读train.py

2.1 参数解读

parser = argparse.ArgumentParser()
parser.add_argument('-epochs', type=int, default=68, help='number of epochs')
parser.add_argument('-batch_size', type=int, default=12, help='size of each image batch')
parser.add_argument('-data_config_path', type=str, default='cfg/coco.data', help='data config file path')
parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')
parser.add_argument('-resume', default=False, help='resume training flag')
opt = parser.parse_args()
print(opt)

epochs 设置循环的参数
batch_size: 设置batch
data_config_path: data文件位置
cfg: 记录cfg文件的位置
img_size: 设置图片大小
resume: 是否恢复训练（True or False）

2.2 随机初始化

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
if cuda:
    torch.cuda.manual_seed(0)
    torch.cuda.manual_seed_all(0)
    torch.backends.cudnn.benchmark = True

2.3 设置优化器

optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3,momentum=.9, weight_decay=5e-4, nesterov=True)

使用SGD优化器，learning_rate=0.001,momentum=0.9,weight_decay=5e-4,使用nesterov动量

2.4 更新优化器

根据当前epoch来确定使用哪一个lr:

        # Update scheduler (automatic)
        # scheduler.step()

        # Update scheduler (manual)
        if epoch < 54:
            lr = 1e-3
        elif epoch < 61:
            lr = 1e-4
        else:
            lr = 1e-5
        for g in optimizer.param_groups:
            g['lr'] = lr

可以自动更新参数，也可以手工更新参数。

2.5 loss指标

mean_precision:

            # Precision
            precision = metrics[0] / (metrics[0] + metrics[1] + 1e-16)
            k = (metrics[0] + metrics[1]) > 0
            if k.sum() > 0:
                mean_precision = precision[k].mean()
            else:
                mean_precision = 0

mean_recall:

            # Recall
            recall = metrics[0] / (metrics[0] + metrics[2] + 1e-16)
            k = (metrics[0] + metrics[2]) > 0
            if k.sum() > 0:
                mean_recall = recall[k].mean()
            else:
                mean_recall = 0

然后将所有指标写到results.txt文件中

2.6 checkpoint相关

checkpoint参数：epoch, best_loss,model,optimizer

latest.pt: 最新的权重文件

best.pt: 当前最好的权重文件

        # Save latest checkpoint
        checkpoint = {'epoch': epoch,
                      'best_loss': best_loss,
                      'model': model.state_dict(),
                      'optimizer': optimizer.state_dict()}
        torch.save(checkpoint, 'checkpoints/latest.pt')

        # Save best checkpoint
        if best_loss == loss_per_target:
            os.system('cp checkpoints/latest.pt checkpoints/best.pt')

        # Save backup checkpoint
        if (epoch > 0) & (epoch % 5 == 0):
            os.system('cp checkpoints/latest.pt checkpoints/backup' + str(epoch) + '.pt')

3. 阅读detect.py

3.1 参数解读

parser.add_argument('-image_folder', type=str, default='data/samples', help='path to images')
parser.add_argument('-output_folder', type=str, default='output', help='path to outputs')
parser.add_argument('-plot_flag', type=bool, default=True)
parser.add_argument('-txt_out', type=bool, default=False)
parser.add_argument('-cfg', type=str, default='cfg/yolov3.cfg', help='cfg file path')
parser.add_argument('-class_path', type=str, default='data/coco.names', help='path to class label file')
parser.add_argument('-conf_thres', type=float, default=0.50, help='object confidence threshold')
parser.add_argument('-nms_thres', type=float, default=0.45, help='iou threshold for non-maximum suppression')
parser.add_argument('-batch_size', type=int, default=1, help='size of the batches')
parser.add_argument('-img_size', type=int, default=32 * 13, help='size of each image dimension')
opt = parser.parse_args()
print(opt)

image_folder: data/samples, 待检测的图片的文件夹
output_folder: output,结果输出文件
plot_flag: True or False, 添加bbox, 保存图片
txt_out: True or False, 是否保存图片检测结果
cfg: cfg文件路径
class_path: 类别名称文件位置
conf_thres, nms_thres: 目标检测置信度，非极大抑制阈值
batch_size: 一般设置为1，选用默认的即可
img_size: 设置加载图片时候的图片大小

3.2 预测框的获取

        # Get detections
        with torch.no_grad():
            chip = torch.from_numpy(img).unsqueeze(0).to(device)
            pred = model(chip)
            pred = pred[pred[:, :, 4] > opt.conf_thres]

            if len(pred) > 0:
                detections = non_max_suppression(pred.unsqueeze(0), opt.conf_thres, opt.nms_thres)
                img_detections.extend(detections)
                imgs.extend(img_paths)

获取预测框，非极大值抑制。

3.2 核心-迭代图片画出预测框

# Iterate through images and save plot of detections
    for img_i, (path, detections) in enumerate(zip(imgs, img_detections)):
        print("image %g: '%s'" % (img_i, path))

        if opt.plot_flag:
            img = cv2.imread(path)

        # The amount of padding that was added
        pad_x = max(img.shape[0] - img.shape[1], 0) * (opt.img_size / max(img.shape))
        pad_y = max(img.shape[1] - img.shape[0], 0) * (opt.img_size / max(img.shape))
        # Image height and width after padding is removed
        unpad_h = opt.img_size - pad_y
        unpad_w = opt.img_size - pad_x

        # Draw bounding boxes and labels of detections
        if detections is not None:
            unique_classes = detections[:, -1].cpu().unique()
            bbox_colors = random.sample(color_list, len(unique_classes))

            # write results to .txt file
            results_img_path = os.path.join(opt.output_folder, path.split('/')[-1])
            results_txt_path = results_img_path + '.txt'
            if os.path.isfile(results_txt_path):
                os.remove(results_txt_path)

            for i in unique_classes:
                n = (detections[:, -1].cpu() == i).sum()
                print('%g %ss' % (n, classes[int(i)]))

            for x1, y1, x2, y2, conf, cls_conf, cls_pred in detections:
                # Rescale coordinates to original dimensions
                box_h = ((y2 - y1) / unpad_h) * img.shape[0]
                box_w = ((x2 - x1) / unpad_w) * img.shape[1]
                y1 = (((y1 - pad_y // 2) / unpad_h) * img.shape[0]).round().item()
                x1 = (((x1 - pad_x // 2) / unpad_w) * img.shape[1]).round().item()
                x2 = (x1 + box_w).round().item()
                y2 = (y1 + box_h).round().item()
                x1, y1, x2, y2 = max(x1, 0), max(y1, 0), max(x2, 0), max(y2, 0)

                # write to file
                if opt.txt_out:
                    with open(results_txt_path, 'a') as file:
                        file.write(('%g %g %g %g %g %g \n') % (x1, y1, x2, y2, cls_pred, cls_conf * conf))

                if opt.plot_flag:
                    # Add the bbox to the plot
                    label = '%s %.2f' % (classes[int(cls_pred)], conf)
                    color = bbox_colors[int(np.where(unique_classes == int(cls_pred))[0])]
                    plot_one_box([x1, y1, x2, y2], img, label=label, color=color)

        if opt.plot_flag:
            # Save generated image with detections
            cv2.imwrite(results_img_path.replace('.bmp', '.jpg').replace('.tif', '.jpg'), img)

Pytorch版本yolov3源码阅读

Pytorch版本yolov3源码阅读

1. 阅读test.py

1.1 参数解读

1.2 data文件解析

1.3 cfg文件解析

1.4 根据cfg文件创建模块

1.5 YOLOLayer

1.6 初始化模型

1.7 加载权重

1.8 计算mAP

2. 阅读train.py

2.1 参数解读

2.2 随机初始化

2.3 设置优化器

2.4 更新优化器

2.5 loss指标

2.6 checkpoint相关

3. 阅读detect.py

3.1 参数解读

3.2 预测框的获取

3.2 核心-迭代图片画出预测框

猜你喜欢