SSD码源解读

结构图


ssd_pascal.py 

from __future__ import print_function  
import caffe  
from caffe.model_libs import *  
from google.protobuf import text_format  
  
import math  
import os  
import shutil  
import stat  
import subprocess  
import sys  

'''给基准网络后面增加额外的卷积层(为了避免此处的卷积层的名称和基准网络卷积层的名称重复,
这里可以用基准网络最后一个层的名称进行开始命名),这一部分的具体实现方法可以对照文件
~/caffe/python/caffe/model_libs.py查看,SSD的实现是ssd_pascal.py和model_libs.py两个
文件控制,剩下的则是caffe底层代码中编写各个功能模块''' 
def AddExtraLayers(net, use_batchnorm=True, lr_mult=1):  
    use_relu = True  
   
    from_layer = net.keys()[-1]
    out_layer = "conv6_1"  
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 1, 0, 1, lr_mult=lr_mult)  
     
    from_layer = out_layer  
    out_layer = "conv6_2"  
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 512, 3, 1, 2, lr_mult=lr_mult)  

    from_layer = out_layer  
    out_layer = "conv7_1"  
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1, lr_mult=lr_mult)  

    from_layer = out_layer  
    out_layer = "conv7_2"  
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 2, lr_mult=lr_mult)  

    from_layer = out_layer  
    out_layer = "conv8_1"  
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1, lr_mult=lr_mult)  

    from_layer = out_layer  
    out_layer = "conv8_2"  
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1, lr_mult=lr_mult)  

    from_layer = out_layer  
    out_layer = "conv9_1"  
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1, lr_mult=lr_mult)  

    from_layer = out_layer  
    out_layer = "conv9_2"  
    ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1, lr_mult=lr_mult)  

    return net  
 
caffe_root = os.getcwd() #获取caffe的根目录  
run_soon = True          #Ture表示生成所有训练文件之后即开始训练  

'''Ture, 表示接着上次中断的地方继续训练
   False,表示直接从预训练好的基准模型那里开始训练 '''
resume_training   = True
remove_old_models = False #是否移除旧的模型训练文件
  
train_data = "examples/VOC0712/VOC0712_trainval_lmdb"
test_data  = "examples/VOC0712/VOC0712_test_lmdb"

resize_width  = 300
resize_height = 300
resize = "{}x{}".format(resize_width, resize_height)

#########################################数据增广#################################################
#具体可查看~/caffe/src/caffe/util/sampler.cpp 
#'min_jaccard_overlap': 0.1--->0.3--->0.5--->0.7--->0.9--->1.0
batch_sampler = [  
        {  
                'sampler': {  
                        },  
                'max_trials': 1,  
                'max_sample': 1,  
        },  
        {  
                'sampler': {  
                        'min_scale': 0.3,  
                        'max_scale': 1.0,  
                        'min_aspect_ratio': 0.5,  
                        'max_aspect_ratio': 2.0,  
                        },  
                'sample_constraint': {  
                        'min_jaccard_overlap': 0.1,  
                        },  
                'max_trials': 50,  
                'max_sample': 1,  
        },  
        {  
                'sampler': {  
                        'min_scale': 0.3,  
                        'max_scale': 1.0,  
                        'min_aspect_ratio': 0.5,  
                        'max_aspect_ratio': 2.0,  
                        },  
                'sample_constraint': {  
                        'min_jaccard_overlap': 0.3,  
                        },  
                'max_trials': 50,  
                'max_sample': 1,  
        },  
        {  
                'sampler': {  
                        'min_scale': 0.3,  
                        'max_scale': 1.0,  
                        'min_aspect_ratio': 0.5,  
                        'max_aspect_ratio': 2.0,  
                        },  
                'sample_constraint': {  
                        'min_jaccard_overlap': 0.5,  
                        },  
                'max_trials': 50,  
                'max_sample': 1,  
        },  
        {  
                'sampler': {  
                        'min_scale': 0.3,  
                        'max_scale': 1.0,  
                        'min_aspect_ratio': 0.5,  
                        'max_aspect_ratio': 2.0,  
                        },  
                'sample_constraint': {  
                        'min_jaccard_overlap': 0.7,  
                        },  
                'max_trials': 50,  
                'max_sample': 1,  
        },  
        {  
                'sampler': {  
                        'min_scale': 0.3,  
                        'max_scale': 1.0,  
                        'min_aspect_ratio': 0.5,  
                        'max_aspect_ratio': 2.0,  
                        },  
                'sample_constraint': {  
                        'min_jaccard_overlap': 0.9,  
                        },  
                'max_trials': 50,  
                'max_sample': 1,  
        },  
        {  
                'sampler': {  
                        'min_scale': 0.3,  
                        'max_scale': 1.0,  
                        'min_aspect_ratio': 0.5,  
                        'max_aspect_ratio': 2.0,  
                        },  
                'sample_constraint': {  
                        'max_jaccard_overlap': 1.0,  
                        },  
                'max_trials': 50,  
                'max_sample': 1,  
        },  
  
        ]    
#######################################训练转换参数###########################################
#可查看~/caffe/src/caffe/proto/caffe.proto  
train_transform_param = {  
        'mirror': True,  
        'mean_value': [104, 117, 123],        #均值  
		'force_color': True,                  #检测灰度图像
        'resize_param': {  
                'prob': 1,                    #表示使用此调整策略的可能性 
                'resize_mode': P.Resize.WARP, #重定义大小的模式  
                'height': resize_height,  
                'width': resize_width,  
                'interp_mode': [              #调整大小的插值模式
                        P.Resize.LINEAR,  
                        P.Resize.AREA,  
                        P.Resize.NEAREST,  
                        P.Resize.CUBIC,  
                        P.Resize.LANCZOS4,  
                        ],  
                },  	
		
		#存储数据转换器用于失真策略的参数的消息
        'distort_param': {  
                'brightness_prob': 0.5,  #调整亮度的概率
                'brightness_delta': 32,  #添加到[-delta,delta]内像素值数量,范围[0,255]
                'contrast_prob': 0.5,    #调整对比度的概率  
                'contrast_lower': 0.5,   #随机对比因子的下界
                'contrast_upper': 1.5,   #随机对比因子的上界
                'hue_prob': 0.5,         #调整色调的概率
                'hue_delta': 18,         #添加到[-delta,delta]内的色调通道数量,范围[0,180]
                'saturation_prob': 0.5,  #调整饱和的概率
                'saturation_lower': 0.5, #随机饱和因子的下界
                'saturation_upper': 1.5, #随机饱和因子的上界  
                'random_order_prob': 0.0,#随机排列图像通道的概率
                },
		
		#存储数据转换器用于扩展策略的参数的消息	
        'expand_param': {  
                'prob': 0.5,             #使用扩展策略的可能性
                'max_expand_ratio': 4.0, #扩大图像的比例
                }, 
		
		#给定注释的条件		
        'emit_constraint': {  
            'emit_type': caffe_pb2.EmitConstraint.CENTER,#类型定义为枚举,此处选定为CENTER  
            }  
        }  

#测试转换参数		
test_transform_param = {
        'mean_value': [104, 117, 123],
		'force_color': True,
        'resize_param': {  
                'prob': 1,  
                'resize_mode': P.Resize.WARP,  
                'height': resize_height,  
                'width': resize_width,  
                'interp_mode': [P.Resize.LINEAR],  
                },  
  
        }  
#######################################参数路径修改############################################		

#是否对所有新添加的图层使用批量标准,目前只有非批量规范版本已经测试过
use_batchnorm = False

#基础学习率,用于下面的计算以改变初始学习率
lr_mult = 1   

#因上面use_batchnorm=false,故调整初始学习率只需改这里,最终的学习率为0.001 
if use_batchnorm:  
    base_lr = 0.0004  
else:  
    #当batch_size = 1, num_gpus = 1时的学习率  
    base_lr = 0.00004
  
#存储模型.prototxt文件的目录 
save_dir = "models/VGGNet/VOC0712/{}".format(job_name)  
#存储模型快照的目录 
snapshot_dir = "models/VGGNet/VOC0712/{}".format(job_name)  
#存储作业脚本和日志文件的目录
job_dir = "jobs/VGGNet/VOC0712/{}".format(job_name)  
#存储检测结果的目录 
output_result_dir = "{}/data/VOCdevkit/results/VOC2007/{}/Main".format(os.environ['HOME'], job_name)  
  
#模型定义文件 
train_net_file = "{}/train.prototxt".format(save_dir)  
test_net_file = "{}/test.prototxt".format(save_dir)  
deploy_net_file = "{}/deploy.prototxt".format(save_dir)  
solver_file = "{}/solver.prototxt".format(save_dir)  
#快照前缀
snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)  
#作业脚本路径
job_file = "{}/{}.sh".format(job_dir, model_name)  
  
#存储测试图像的名称和大小
name_size_file = "data/VOC0712/test_name_size.txt"  
#预训练模型的路径 
pretrain_model = "models/VGGNet/VGG_ILSVRC_16_layers_fc_reduced.caffemodel"  
#存储LabelMapItem 
label_map_file = "data/VOC0712/labelmap_voc.prototxt"  
  
#多框损失层MultiBoxLoss的参数,在~/caffe/src/caffe/proto/caffe.proto可查找具体定义  
num_classes = 21  
share_location = True   #边框在不同的类中共享位置  
background_label_id=0   #背景标签的类别编号为0
train_on_diff_gt = True #是否考虑困难的ground truth 

'''在SoftmaxWithLoss和SigmoidCrossEntropyLoss图层中实现,表示如何规范跨越批次,
空间维度或其他维度聚集的损失层的损失。按照批次中的示例数量乘以空间维度,在计算归
一化因子时,不会忽略接收忽略标签的输出。
四种类型:
1.FULL,除以不带ignore_label的输出位置总数,如果未设置ignore_label,则表现为FULL;
2.VALID;
3.BATCH_SIZE,除以批量大小;
4.NONE,不要规范化损失  '''
normalization_mode = P.Loss.VALID 
 
'''bbox的编码方式,在cell的中心或者顶点处生成bbox,见PriorBoxParameter参数定义,
三种类型:CORNER,CENTER_SIZE,CORNER_SIZE  '''
code_type = P.PriorBox.CENTER_SIZE 
 
'''true,表示在匹配期间忽略跨边界bbox。跨界bbox是一个在图像区域之外的bbox。即将超出
图像的预测边框剔除,False表示不剔除,否则特征图边界点产生的先验框就没有任何意义'''
ignore_cross_boundary_bbox = False    
 
'''训练期间的挖掘类型,三种类型:
NONE,        表示什么都不使用,这样会导致正负样本的严重不均衡;
MAX_NEGATIVE,表示根据分数选择底片;
HARD_EXAMPLE,表示选择基于“在线硬示例挖掘的基于训练区域的对象探测器”的硬实例
此类型即为SSD原文中所使用的Hard_negative_mining(负硬挖掘)策略  ''' 
mining_type = P.MultiBoxLoss.MAX_NEGATIVE   
  
neg_pos_ratio = 3.                     #负/正比率,即文中所说的1:3 
loc_weight = (neg_pos_ratio + 1.) / 4. #位置损失的权重

#存储MultiBoxLossLayer使用的参数
multibox_loss_param = {
    'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,#位置损失类型,定义为枚举,有L2和SMOOTH_L1两种类型
    'conf_loss_type': P.MultiBoxLoss.SOFTMAX, #置信损失类型,定义为枚举,有SOFTMAX和LOGISTIC两种
    'loc_weight': loc_weight,  
    'num_classes': num_classes,  
    'share_location': share_location,  
	
	#训练的匹配方法,定义为枚举,有BIPARTITE和PER_PREDICTION两种,
	#如果match_type为PER_PREDICTION(即每张图预测),则使用overlap_threshold来确定额外的匹配bbox
    'match_type': P.MultiBoxLoss.PER_PREDICTION,

    'overlap_threshold': 0.5,      #IoU阀值 
    'use_prior_for_matching': True,#是否使用先验匹配  
    'background_label_id': background_label_id,
    'use_difficult_gt': train_on_diff_gt,
    'mining_type': mining_type, 
    'neg_pos_ratio': neg_pos_ratio, 
    'neg_overlap': 0.5,            #重叠小于0.5定义为负样本,Faster R-CNN设置为0.3
    'code_type': code_type,   
    'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox,  
    } 
	
#存储由损失层共享的参数	
loss_param = {     
    'normalization': normalization_mode,  
    }    
 
min_dim = 300 #输入图像的最小尺寸 
 
# conv4_3 ==> 38 x 38  
# fc7     ==> 19 x 19  
# conv6_2 ==> 10 x 10  
# conv7_2 ==> 5 x 5  
# conv8_2 ==> 3 x 3  
# conv9_2 ==> 1 x 1 
#( pool6  ==> 1 x 1 )

#prior_box来源层,可以更改,很多改进都是基于此处的调整
mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']

#论文中所说Smin=0.2,Smax=0.9的初始值,经过下面的运算即可得到min_sizes,max_sizes
min_ratio = 20 
max_ratio = 90  

#math.floor()函数表示:求一个最接近它的整数,其值≤这个浮点数 
#下面for循环给ratio取值时起一个间距作用,可用具体数值代替,这里=(90-20)/(6-2)=17
step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2))) 

#经过以下运算得到min_sizes和max_sizes
min_sizes = []    
max_sizes = []  

#从min_ratio至max_ratio+1每隔step=17取一个值赋值给ratio。  
for ratio in xrange(min_ratio, max_ratio + 1, step):  

 #min_sizes.append()函数把括号内每次得到的值依次给了min_sizes  
  min_sizes.append(min_dim * ratio / 100.)  
  max_sizes.append(min_dim * (ratio + step) / 100.)  

min_sizes = [min_dim * 10 / 100.] + min_sizes  
max_sizes = [min_dim * 20 / 100.] + max_sizes  

'''计算卷积层产生的prior_box距离原图的步长,先验框中心点的坐标会乘以step,相当于
从feature map位置映射回原图位置,比如conv4_3输出特征图大小为38*38,而输入的图片
为300*300,所以38*8约等于300,所以映射步长为8。这是针对300*300的训练图片'''
steps = [8, 16, 32, 64, 100, 300]

'''横纵比,六种尺度对应六个产生prior_box的卷积层。具体可查看生成的train.prototxt
文件一一对应每层的aspect_ratio参数,此参数在caffe.proto中有定义,关于aspect_ratios
如何把其内容传递给了aspect_ratio,在model_libs.py文件中有详细定义'''  
aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]  

'''对卷积层conv4_3做归一化。model_libs.py里产生了normallize层,
具体的层定义见~/caffe/src/layers/Normalize_layer.cpp,
这里每个数对应每个先验层,只要哪个层对应的数不为-1则产生normal'''  
normalizations = [20, -1, -1, -1, -1, -1]

'''用于对之前的bbox进行编码/解码的方差。有两种选择,据参数code_type的选择决定,
由于上面已经将code_type选定。有人理解为变量variance用来对bbox的回归目标进行放大,
从而加快对应滤波器参数的收敛。除以variance是对预测box和真实box的误差进行放大,
从而增加loss,增大梯度,加快收敛。另外,top_data += top[0]->offset(0, 1);
已经使指针指向新的地址,所以variance不会覆盖前面的结果。
prior_variance在model_libs.py中传递给了variance变量,
然后利用prior_box_layer.cpp将其运算定义至priorbox_layer层中,具体可查看
train.prototxt中的每一个先验卷积层层中产生先验框的层中,即**_mbox_priorbox'''
if code_type == P.PriorBox.CENTER_SIZE:
  prior_variance = [0.1, 0.1, 0.2, 0.2]  
else:  
  prior_variance = [0.1]  

#以下两个参数产生的结果均在prior_box_layer.cpp中实现。  
#true,表示会翻转每个宽高比。如纵横比r,会产生纵横比‘1.0/r’,即{1,2,3,1/2,1/3}
flip = True  
clip = False #true,表示将先验框裁剪为[0,1] 
  
# 定义要使用的GPU。  
gpus = "0,1,2,3"          #多块GPU的编号,若只有一块,这里只需保留0,否则会出错 
gpulist = gpus.split(",") #获取GPU的列表  
num_gpus = len(gpulist)   #获取GPU编号 
  
# 将小批量分成不同的GPU
batch_size = 32           #设置训练样本输入的数量,不要超出内存就好
accum_batch_size = 32     #这里与batch_size相搭配产生下面的iter_size

'''若iter_size=1,则前向传播一次后进行一次反向传递,若=2,则两次前传后进行一次反传,
这样做是减少每次传播所占用的内存空间,有的硬件不行的话就无法训练,
但是增加iter会使训练时间增加,但是总的迭代次数不变''' 
iter_size = accum_batch_size / batch_size 
solver_mode = P.Solver.CPU  
device_id = 0  
batch_size_per_device = batch_size #批次传递 

if num_gpus > 0: 
  
  #若有多块GPU则将这些训练任务均分给多块GPU训练,从而加快训练速度 
  batch_size_per_device = int(math.ceil(float(batch_size) / num_gpus))  
  
  #多块GPU的iter_size大小计算,上面的是一块的时候
  iter_size = int(math.ceil(float(accum_batch_size) / (batch_size_per_device * num_gpus)))  
  solver_mode = P.Solver.GPU  
  device_id = int(gpulist[0])  
 
#若损失层的参数NormalizationMode选择NONE,即没有归一化模式,则基础学习率为本文件
#之上的 base_lr=0.0004 除以 batch_size_per_device=32 得到新的 base_lr=1.25*10^(-5) 
if normalization_mode == P.Loss.NONE:
  base_lr /= batch_size_per_device  
  
'''同理根据不同的归一化模式选择不同的base_lr。上面有normalization_mode = P.Loss.VALID,
而loc_weight = (neg_pos_ratio + 1.) / 4 == 1,所以新的base_lr=25*0.0004=0.001,
这就是为什么最后生成的solver.prototxt文件中的base_lr=0.001的原因,
所以如果训练发散想通过减小base_lr来实验,则要更改最上面的base_lr=0.0004才可以'''
elif normalization_mode == P.Loss.VALID:
  base_lr *= 25. / loc_weight #base_lr=25*0.0004=0.001
elif normalization_mode == P.Loss.FULL:  
  base_lr *= 2000.            #base_lr=2000*0.0004=0.8(每幅图像大概有2000个先验bbox) 
  
#评估整个测试集  
num_test_image = 4952#整个测试集图像的数量 
test_batch_size = 8  #理想情况下test_batch_size应被num_test_image整除,否则mAP会略微偏离真实值 

'''计算每测试迭代多少次可以覆盖整个测试集,和分类网络中的是一致的。4952/8=619,
如果测试图片除以test_batch_size不等于整数,那么这里会取一个近似整数'''
test_iter = int(math.ceil(float(num_test_image) / test_batch_size)) 
 
#solver.prototxt文件中的各参数的取值   
solver_param = {
    
	#训练参数  
    'base_lr': base_lr,                  #把上面的solver拿下来 
    'weight_decay': 0.0005,  
    'lr_policy': "multistep",  
    'stepvalue': [80000, 100000, 120000],#多步衰减  
    'gamma': 0.1,  
    'momentum': 0.9,  
    'iter_size': iter_size,  
    'max_iter': 120000,  
    'snapshot': 80000,  
    'display': 10,  
    'average_loss': 10,  
    'type': "SGD",  
    'solver_mode': solver_mode,  
    'device_id': device_id,  
    'debug_info': False,  
    'snapshot_after_train': True,  
    
	# 测试参数  
    'test_iter': [test_iter],  
    'test_interval': 10000, #测试10000次输出一次测试结果  
    'eval_type': "detection",  
    'ap_version': "11point",  
    'test_initialization': False,  
    }

#生成检测输出的参数
det_out_param = {
    'num_classes': num_classes,
    'share_location': share_location,           #位置共享
    'background_label_id': background_label_id,
	
	#非最大抑制参数,阀值为0.45,top_k表示最后要保留的bbox最大数量
    'nms_param': {'nms_threshold': 0.45, 'top_k': 400},   
	
	#用于保存检测结果的参数,在caffe.proto中的SaveOutputParameter有定义
    'save_output_param': {    
        'output_directory': output_result_dir,  #输出目录  
        'output_name_prefix': "comp4_det_test_",#输出名称前缀 

		#输出格式,有VOC-PASCAL VOC输出格式;COCO-MS COCO输出格式
        'output_format': "VOC",  
        'label_map_file': label_map_file,       #标签映射文件
        'name_size_file': name_size_file,       #test_name_size.txt,表示测试图片大小  
        'num_test_image': num_test_image, 
        },  
    'keep_top_k': 200,#nms后每个图像要保留的bbox总数
    'confidence_threshold': 0.01,#只考虑可信度大于此阈值的检测,若没有则考虑所有的框 
    'code_type': code_type,#bbox的编码方式 
    }  
  
#评估检测结果的参数,见caffe.proto文件中的DetectionEvaluateParameter定义 
det_eval_param = {
    'num_classes': num_classes, 
    'background_label_id': background_label_id,
    'overlap_threshold': 0.5,        #重叠阀值,0.5  
    'evaluate_difficult_gt': False,  #如果为true,也要考虑难以评估的grountruth
    'name_size_file': name_size_file,#test_name_size.txt路径  
    }  
  
########以下部分不要修改########

#检查所有训练验证过程必须有的文件与数据提供 
check_if_exist(train_data)  
check_if_exist(test_data)  
check_if_exist(label_map_file)  
check_if_exist(pretrain_model)  
make_if_not_exist(save_dir)  
make_if_not_exist(job_dir)  
make_if_not_exist(snapshot_dir)  
  
#创建训练网络,在model_libs.py中完成
net = caffe.NetSpec()

'''调用model_libs.py中的CreateAnnotatedDataLayer()函数,创建标注数据传递层,将括号中的
参数传递进去。model_libs.py文件中提供了四种基础网络,即VGG、ZF、ResNet101和ResNet152'''
net.data, net.label = CreateAnnotatedDataLayer(train_data, batch_size=batch_size_per_device,  
        train=True, output_label=True, label_map_file=label_map_file,  
        transform_param=train_transform_param, batch_sampler=batch_sampler)  
		
'''调用model_libs.py中的VGGNetBody()函数创建截断的VGG基础网络。参数传递进去。model_libs.py文
件中提供了四种基础网络,即VGG、ZF、ResNet101和ResNet152。可以分别查看不同基础网络的调用方式
这些参数分别表示:
from_layer表示此基础网络的数据源来自data层的输出;
fully_conv=Ture表示使用全卷积;
reduced=True表示可发现是负责选用全卷积层的某几个参数的取值和最后选择不同参数的全链接层;
dilated=True是否需要fc6和fc7间的pool5层及选择其参数还有配合reduced共同选择全卷积层的参数选择;
dropout表示是否需要dropout层 '''  
VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,  dropout=False)

#添加特征提取的层,即调用我们本文件最上面定义的需要额外添加的几个层,即conv6_1,conv6_2等等  
AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)  
  
'''调用CreateMultiBoxHead()函数创建先验框的提取及匹配等层数,下面这些参数其实我们在上面全
部都有解释,见caffe.proto和model_libs.py以及该层对应的cpp文件。这些层包括conv_mbox_conf、
conv_mbox_loc、对应前两者的perm和flat层、还有conv_mbox_priorbox先验框产生层等 '''
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,  
        use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,  
        aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations,  
        num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,  
        prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult)  
  
#创建MultiBoxLossLayer(损失层),包括置信损失和位置损失的叠加,见multibox_loss_layer.cpp
name = "mbox_loss"  
mbox_layers.append(net.label)  

'''参数propagate_down,指定是否反向传播到每个底部。若未指定,Caffe会自动推断每个输入是
否需要反向传播来计算参数梯度。如果对某些输入设置为true,则强制向这些输入反向传播; 
如果对某些输入设置为false,则会跳过对这些输入的反向传播。大小必须是0或等于底部的数量。
具体解读cpp文件中的参数propagate_down[0]~[3]'''
net[name] = L.MultiBoxLoss(*mbox_layers, multibox_loss_param=multibox_loss_param,  
        loss_param=loss_param, include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),  
        propagate_down=[True, True, False, False]) 
  
with open(train_net_file, 'w') as f: #打开文件将上面编辑的这些层写入到prototxt文件中
    print('name: "{}_train"'.format(model_name), file=f)  
    print(net.to_proto(), file=f)  
shutil.copy(train_net_file, job_dir) #将train.prototxt复制一份到目录job_dir  
  
#创建测试网络。前一部分基本上与训练网络一致 
net = caffe.NetSpec()  
net.data, net.label = CreateAnnotatedDataLayer(test_data, batch_size=test_batch_size,  
        train=False, output_label=True, label_map_file=label_map_file,  
        transform_param=test_transform_param)  
  
VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,  
    dropout=False)  
  
AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)  
  
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,  
        use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,  
        aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations,  
        num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,  
        prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult)  
  
conf_name = "mbox_conf"  #置信的交叉验证 
if multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.SOFTMAX:  
  reshape_name = "{}_reshape".format(conf_name)  
  net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes]))  
  softmax_name = "{}_softmax".format(conf_name)  
  net[softmax_name] = L.Softmax(net[reshape_name], axis=2)  
  flatten_name = "{}_flatten".format(conf_name)  
  net[flatten_name] = L.Flatten(net[softmax_name], axis=1)  
  mbox_layers[1] = net[flatten_name]  
elif multibox_loss_param["conf_loss_type"] == P.MultiBoxLoss.LOGISTIC:  
  sigmoid_name = "{}_sigmoid".format(conf_name)  
  net[sigmoid_name] = L.Sigmoid(net[conf_name])  
  mbox_layers[1] = net[sigmoid_name]  
  
#下面这一部分是test网络独有的,为检测输出和评估网络
net.detection_out = L.DetectionOutput(*mbox_layers,  
    detection_output_param=det_out_param,  
    include=dict(phase=caffe_pb2.Phase.Value('TEST')))  
net.detection_eval = L.DetectionEvaluate(net.detection_out, net.label,  
    detection_evaluate_param=det_eval_param,  
    include=dict(phase=caffe_pb2.Phase.Value('TEST')))  
  
with open(test_net_file, 'w') as f: #写入test.txt
    print('name: "{}_test"'.format(model_name), file=f)  
    print(net.to_proto(), file=f)  
shutil.copy(test_net_file, job_dir)  
  
#创建deploy网络,从测试网中删除第一层和最后一层
deploy_net = net  
with open(deploy_net_file, 'w') as f:  
    net_param = deploy_net.to_proto()  
    
	# 从测试网中删除第一个(AnnotatedData)和最后一个(DetectionEvaluate)层
    del net_param.layer[0] #删除首层  
    del net_param.layer[-1] #删除末层 
    net_param.name = '{}_deploy'.format(model_name) #创建网络名 
    net_param.input.extend(['data']) #输入扩展为data  
	
	#deploy.prototxt文件中特有的输入数据维度信息,这里应该为[1,3,300,300]
    net_param.input_shape.extend([  caffe_pb2.BlobShape(dim=[1, 3, resize_height, resize_width])]) 
    print(net_param, file=f) #输出到文件  
shutil.copy(deploy_net_file, job_dir) #复制一份到job_dir中
  
#创建Slover.prototxt
solver = caffe_pb2.SolverParameter(
		#将上面定义的solver参数统统拿下来
        train_net=train_net_file,  
        test_net=[test_net_file],  
        snapshot_prefix=snapshot_prefix,  
        **solver_param)  
  
with open(solver_file, 'w') as f:#将拿下来的参数写入solver.prototxt 
    print(solver, file=f)  
shutil.copy(solver_file, job_dir) #复制一份到job_dir中
  
max_iter = 0 #最大迭代次数首先初始化为0  

#找到最近的快照,若训练中断,再次训练首先寻找上次中断时保存的模型继续训练  
for file in os.listdir(snapshot_dir):   
  if file.endswith(".solverstate"):#如果存在此模型,则继续往下训练 
    basename = os.path.splitext(file)[0]  
    iter = int(basename.split("{}_iter_".format(model_name))[1])  
    if iter > max_iter:#若已迭代的次数大于max_iter,则赋值给max_iter
      max_iter = iter  
  
#以下为训练命令
train_src_param = '--weights="{}" \\\n'.format(pretrain_model) #加载与训练微调模型命令

#下面两个if目的是随着训练的推进,max_iter随之逐渐增大,保存新一次的模型删除上一次的模型  
if resume_training:  
  if max_iter > 0:  
    #权重的初始参数即从我们定义的imagenet训练vgg16模型中获取
    train_src_param = '--snapshot="{}_iter_{}.solverstate" \\\n'.format(snapshot_prefix, max_iter)
  
#删除任何小于max_iter的快照。
if remove_old_models:  
  for file in os.listdir(snapshot_dir):#遍历查找模型文件 
    if file.endswith(".solverstate"):#找到后缀为solverstate的模型文件
      basename = os.path.splitext(file)[0]  
      iter = int(basename.split("{}_iter_".format(model_name))[1]) #获取已迭代的次数  
      if max_iter > iter:#如果迭代满足条件,则下一条语句去删除
        os.remove("{}/{}".format(snapshot_dir, file))  
    if file.endswith(".caffemodel"):#找到后缀为caffemodel的模型文件 
      basename = os.path.splitext(file)[0]  
      iter = int(basename.split("{}_iter_".format(model_name))[1]) #获取迭代次数iter 
      if max_iter > iter:#判断如果满足条件则删除已存在的模型
        os.remove("{}/{}".format(snapshot_dir, file))  
  
#创建工作文件  
with open(job_file, 'w') as f: #将训练文件写入执行文件中生成.sh可执行文件后执行命令训练
  f.write('cd {}\n'.format(caffe_root))  
  f.write('./build/tools/caffe train \\\n')  
  f.write('--solver="{}" \\\n'.format(solver_file))  
  f.write(train_src_param)  
  if solver_param['solver_mode'] == P.Solver.GPU:  
    f.write('--gpu {} 2>&1 | tee {}/{}.log\n'.format(gpus, job_dir, model_name))  
  else:  
    f.write('2>&1 | tee {}/{}.log\n'.format(job_dir, model_name))  
  
#复制此脚本到job_dir
py_file = os.path.abspath(__file__)  
shutil.copy(py_file, job_dir)  
  
#运行  
os.chmod(job_file, stat.S_IRWXU)  
if run_soon:  
  subprocess.call(job_file, shell=True)  

train.prototxt

name: "VGG_VOC0712_SSD_300x300_train"

##########################数据增强&路径&参数设置##########################################
layer {
  name: "data"
  type: "AnnotatedData"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  
  #以下是转换参数设置,见~/caffe/src/caffe/proto/caffe.proto 
  transform_param {
    mirror: true #是否做镜像

	#代表3个通道的均值
    mean_value: 104.0 #B
    mean_value: 117.0 #G
    mean_value: 123.0 #R
    resize_param {
      prob: 1.0 #调整图片大小的可能性
      resize_mode: WARP #拉伸
      height: 300
      width: 300
	  
	  #插值模式,用于调整大小,定义为枚举类型
      interp_mode: LINEAR
      interp_mode: AREA
      interp_mode: NEAREST
      interp_mode: CUBIC
      interp_mode: LANCZOS4
    }
	
	#给定注释的条件,类型定义为枚举,类型为CENTER 
    emit_constraint {
      emit_type: CENTER 
    }
  }
  
  #LMDB格式训练数据的参数
  data_param {
    source: "data/VAP/trainval_lmdb"
    batch_size: 1
    backend: LMDB
  }
  
  #训练标签的数据
  annotated_data_param {
    batch_sampler {
      max_sample: 1
      max_trials: 1 #迭代次数
    }
	
	#共6个batch_sampler,因为SSD用了6个层来输出bbox
	#1 IoU=0.1
    batch_sampler {
      sampler {
        min_scale: 0.300000011921 #box和原图的面积比
        max_scale: 1.0
        min_aspect_ratio: 0.5     #纵横比
        max_aspect_ratio: 2.0
      }
      sample_constraint {
        min_jaccard_overlap: 0.10000000149
      }
      max_sample: 1
      max_trials: 50
    }
	
	#2 IoU=0.3
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      }
      sample_constraint {
        min_jaccard_overlap: 0.300000011921
      }
      max_sample: 1
      max_trials: 50
    }
	
	#3 IoU=0.5
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      }
      sample_constraint {
        min_jaccard_overlap: 0.5
      }
      max_sample: 1
      max_trials: 50
    }
	
	#4 IoU=0.7
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      }
      sample_constraint {
        min_jaccard_overlap: 0.699999988079
      }
      max_sample: 1
      max_trials: 50
    }
	
	#5 IoU=0.9
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      }
      sample_constraint {
        min_jaccard_overlap: 0.899999976158
      }
      max_sample: 1
      max_trials: 50
    }
	
	#6 IoU=1.0
    batch_sampler {
      sampler {
        min_scale: 0.300000011921
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      }
      sample_constraint {
        max_jaccard_overlap: 1.0
      }
      max_sample: 1
      max_trials: 50
    }
	
    label_map_file: "data/VAP/labelmap_voc.prototxt"
  }
}

###############################网络结构###################################################
#bottom为输入,top为输出,name与bottom同名
'''-------------------------------------------------------------------------------------
	lr_mult 和 decay_mult  开头--->conv2_2都是0 0 0 0;conv3_1--->末尾都是1 1 2 0
	weight_filler 全部都是{type: "xavier"}
	bias_filler   全部都是{type: "constant"  value: 0.0}
----------------------------------------------------------------------------------------
	conv1_1  [num_output] [pad] [kernel_size] = 64  1  3
	relu1_1
	conv1_2  [num_output] [pad] [kernel_size] = 64  1  3
	relu1_2
	pool     [pool] [kernel_size] [stride] [pad] = MAX  2  2  0
----------------------------------------------------------------------------------------
	conv2_1  [num_output] [pad] [kernel_size] = 128  1  3
	relu2_1
	conv2_2  [num_output] [pad] [kernel_size] = 128  1  3
	relu2_2
	poo2     [pool] [kernel_size] [stride] [pad] = MAX  2  2  0
----------------------------------------------------------------------------------------
	conv3_1  [num_output] [pad] [kernel_size] = 256  1  3
	relu3_1
	conv3_2  [num_output] [pad] [kernel_size] = 256  1  3
	relu3_2
	conv3_3  [num_output] [pad] [kernel_size] = 256  1  3
	relu3_3
	poo3     [pool] [kernel_size] [stride] [pad] = MAX  2  2  0
----------------------------------------------------------------------------------------
	conv4_1  [num_output] [pad] [kernel_size] = 512  1  3
	relu4_1
	conv4_2  [num_output] [pad] [kernel_size] = 512  1  3
	relu4_2
 -1-conv4_3  [num_output] [pad] [kernel_size] = 512  1  3                   38*38
	relu4_3
	poo4     [pool] [kernel_size] [stride] [pad] = MAX  2  2  0
----------------------------------------------------------------------------------------
	conv5_1  [num_output] [pad] [kernel_size] = 512  1  3
	relu5_1
	conv5_2  [num_output] [pad] [kernel_size] = 512  1  3
	relu5_2
	conv5_3  [num_output] [pad] [kernel_size] = 512  1  3
	relu5_3
	poo5     [pool] [kernel_size] [stride] [pad] = MAX  3  1  1
----------------------------------------------------------------------------------------
	fc6      [num_output] [pad] [kernel_size] = 1024  6  3
	relu6
----------------------------------------------------------------------------------------
 -2-fc7      [num_output] [pad] [kernel_size] = 1024  0  1                  19*19
	relu7
----------------------------------------------------------------------------------------
	conv6_1  [num_output] [pad] [kernel_size] [stride] = 256  0  1  1
	conv6_1_relu
 -3-conv6_2  [num_output] [pad] [kernel_size] [stride] = 512  1  3  2       10*10
	conv6_2_relu
----------------------------------------------------------------------------------------
	conv7_1  [num_output] [pad] [kernel_size] [stride] = 128  0  1  1
	conv7_1_relu
 -4-conv7_2  [num_output] [pad] [kernel_size] [stride] = 256  1  3  2        5*5
	conv7_2_relu
----------------------------------------------------------------------------------------
	conv8_1  [num_output] [pad] [kernel_size] [stride] = 128  0  1  1
	conv8_1_relu
 -5-conv8_2  [num_output] [pad] [kernel_size] [stride] = 256  1  3  2        3*3
	conv8_2_relu
----------------------------------------------------------------------------------------
 -6-pool6    [pool]=AVE   [global_pooling]=true                              1*1
------------------------------------------------------------------------------------'''
layer {
  name: "conv1_1"
  type: "Convolution"
  bottom: "data"
  top: "conv1_1"
  param {
  
  '''学习率,最终学习率要乘以solver.prototxt文件中的base_lr,若有两个 lr_mult, 第一个
     表示weight的学习率,第二个表示bias的学习率,一般bias的学习率是weight学习率的2倍 '''
    lr_mult: 0.0 
	
    decay_mult: 0.0 #权值衰减,为避免过拟合,需要对cost function加入规范项
  }
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 64 #卷积核(filter)的个数
    pad: 1
    kernel_size: 3
	
	#权值初始化,类型有constant,xavier,gaussian
    weight_filler {
      type: "xavier"
    }
	
	#偏置项的初始化。一般设置为constant, 值全为0。
    bias_filler { 
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu1_1"
  type: "ReLU"
  bottom: "conv1_1"
  top: "conv1_1"
}

layer {
  name: "conv1_2"
  type: "Convolution"
  bottom: "conv1_1"
  top: "conv1_2"
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  }
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 64 
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu1_2"
  type: "ReLU"
  bottom: "conv1_2"
  top: "conv1_2"
}

layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1_2"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv2_1"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2_1"
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  }
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu2_1"
  type: "ReLU"
  bottom: "conv2_1"
  top: "conv2_1"
}

layer {
  name: "conv2_2"
  type: "Convolution"
  bottom: "conv2_1"
  top: "conv2_2"
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  }
  param {
    lr_mult: 0.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu2_2"
  type: "ReLU"
  bottom: "conv2_2"
  top: "conv2_2"
}

layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2_2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv3_1"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu3_1"
  type: "ReLU"
  bottom: "conv3_1"
  top: "conv3_1"
}

layer {
  name: "conv3_2"
  type: "Convolution"
  bottom: "conv3_1"
  top: "conv3_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu3_2"
  type: "ReLU"
  bottom: "conv3_2"
  top: "conv3_2"
}

layer {
  name: "conv3_3"
  type: "Convolution"
  bottom: "conv3_2"
  top: "conv3_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu3_3"
  type: "ReLU"
  bottom: "conv3_3"
  top: "conv3_3"
}

layer {
  name: "pool3"
  type: "Pooling"
  bottom: "conv3_3"
  top: "pool3"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv4_1"
  type: "Convolution"
  bottom: "pool3"
  top: "conv4_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu4_1"
  type: "ReLU"
  bottom: "conv4_1"
  top: "conv4_1"
}

layer {
  name: "conv4_2"
  type: "Convolution"
  bottom: "conv4_1"
  top: "conv4_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu4_2"
  type: "ReLU"
  bottom: "conv4_2"
  top: "conv4_2"
}

##########################第1个bbox输出层########################################
layer {
  name: "conv4_3"
  type: "Convolution"
  bottom: "conv4_2"
  top: "conv4_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu4_3"
  type: "ReLU"
  bottom: "conv4_3"
  top: "conv4_3"
}

layer {
  name: "pool4"
  type: "Pooling"
  bottom: "conv4_3"
  top: "pool4"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}

layer {
  name: "conv5_1"
  type: "Convolution"
  bottom: "pool4"
  top: "conv5_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu5_1"
  type: "ReLU"
  bottom: "conv5_1"
  top: "conv5_1"
}

layer {
  name: "conv5_2"
  type: "Convolution"
  bottom: "conv5_1"
  top: "conv5_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu5_2"
  type: "ReLU"
  bottom: "conv5_2"
  top: "conv5_2"
}

layer {
  name: "conv5_3"
  type: "Convolution"
  bottom: "conv5_2"
  top: "conv5_3"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu5_3"
  type: "ReLU"
  bottom: "conv5_3"
  top: "conv5_3"
}

layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5_3"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 1
    pad: 1
  }
}

layer {
  name: "fc6"
  type: "Convolution"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 1024
    pad: 6
    kernel_size: 3
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
    dilation: 6
  }
}

layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}

##########################第2个bbox输出层################################
layer {
  name: "fc7"
  type: "Convolution"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 1024
    kernel_size: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}

layer {
  name: "conv6_1"
  type: "Convolution"
  bottom: "fc7"
  top: "conv6_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 256
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv6_1_relu"
  type: "ReLU"
  bottom: "conv6_1"
  top: "conv6_1"
}

##############################第3个bbox输出层######################################
layer {
  name: "conv6_2"
  type: "Convolution"
  bottom: "conv6_1"
  top: "conv6_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 512
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv6_2_relu"
  type: "ReLU"
  bottom: "conv6_2"
  top: "conv6_2"
}

layer {
  name: "conv7_1"
  type: "Convolution"
  bottom: "conv6_2"
  top: "conv7_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv7_1_relu"
  type: "ReLU"
  bottom: "conv7_1"
  top: "conv7_1"
}

################################第4个bbox输出层########################################
layer {
  name: "conv7_2"
  type: "Convolution"
  bottom: "conv7_1"
  top: "conv7_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv7_2_relu"
  type: "ReLU"
  bottom: "conv7_2"
  top: "conv7_2"
}

layer {
  name: "conv8_1"
  type: "Convolution"
  bottom: "conv7_2"
  top: "conv8_1"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 128
    pad: 0
    kernel_size: 1
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv8_1_relu"
  type: "ReLU"
  bottom: "conv8_1"
  top: "conv8_1"
}

##############################第5个bbox输出层#######################################
layer {
  name: "conv8_2"
  type: "Convolution"
  bottom: "conv8_1"
  top: "conv8_2"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    stride: 2
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv8_2_relu"
  type: "ReLU"
  bottom: "conv8_2"
  top: "conv8_2"
}

################################第6个bbox输出层#######################################
layer {
  name: "pool6"
  type: "Pooling"
  bottom: "conv8_2"
  top: "pool6"
  pooling_param {
    pool: AVE
    global_pooling: true
  }
}


#########################对bbox输出层做最后处理########################################
 '''------------------------------------------------------------------------------------
	conv4_3--->conv4_3_norm  
	
	conv4_3_norm--->conv4_3_norm_mbox_loc    [num_output] = 12
	conv4_3_norm_mbox_loc_perm
	conv4_3_norm_mbox_loc_flat
	
	conv4_3_norm--->conv4_3_norm_mbox_conf   [num_output] = 6
	conv4_3_norm_mbox_conf_perm
	conv4_3_norm_mbox_conf_flat
	
	conv4_3_norm--->conv4_3_norm_mbox_priorbox  [min_size] = 30 [aspect_ratio] = 2
----------------------------------------------------------------------------------------
	fc7--->fc7_mbox_loc    [num_output] = 24
	fc7_mbox_loc_perm
	fc7_mbox_loc_flat  
	
	fc7--->fc7_mbox_conf   [num_output] = 12
	fc7_mbox_conf_perm
	fc7_mbox_conf_flat
	
	fc7--->fc7_mbox_priorbox  [min_size~max_size] = 60~114 [aspect_ratio] = 2,3
----------------------------------------------------------------------------------------
	conv6_2--->conv6_2_mbox_loc    [num_output] = 24
	conv6_2_mbox_loc_perm
	conv6_2_mbox_loc_flat
	
	conv6_2--->conv6_2_mbox_conf   [num_output] = 12
	conv6_2_mbox_conf_perm
	conv6_2_mbox_conf_flat
	
	conv6_2-->conv6_2_mbox_priorbox [min_size~max_size] = 114~168 [aspect_ratio] = 2,3
----------------------------------------------------------------------------------------
	conv7_2--->conv7_2_mbox_loc    [num_output] = 24
	conv7_2_mbox_loc_perm
	conv7_2_mbox_loc_flat
	
	conv7_2--->conv7_2_mbox_conf   [num_output] = 12
	conv7_2_mbox_conf_perm
	conv7_2_mbox_conf_flat
	
	conv7_2-->conv7_2_mbox_priorbox [min_size~max_size] = 168~222 [aspect_ratio] = 2,3
----------------------------------------------------------------------------------------
	conv8_2--->conv8_2_mbox_loc    [num_output] = 24
	conv8_2_mbox_loc_perm
	conv8_2_mbox_loc_flat
	
	conv8_2--->conv8_2_mbox_conf   [num_output] = 12
	conv8_2_mbox_conf_perm
	conv8_2_mbox_conf_flat
	
	conv8_2--->conv8_2_mbox_priorbox [min_size~max_size] = 222~276 [aspect_ratio] = 2,3
----------------------------------------------------------------------------------------
	pool6--->pool6_mbox_loc    [num_output] = 24
	pool6_mbox_loc_perm
	pool6_mbox_loc_flat
	
	pool6--->pool6_mbox_conf   [num_output] = 12
	pool6_mbox_conf_perm
	pool6_mbox_conf_flat
	
	pool6--->pool6_mbox_priorbox [min_size~max_size] = 276~330 [aspect_ratio] = 2,3
----------------------------------------------------------------------------------------	
	..._mbox_loc_flat---->mbox_loc
	..._mbox_conf_flat--->mbox_conf          --->mbox_loss
	..._mbox_priorbox---->mbox_priorbox
-------------------------------------------------------------------------------------'''

'''为什么只在conv4_3进行Normalize操作,SSD作者weiliu给出答案,That was discovered 
in my other paper (ParseNet) that conv4_3 has different scale from other layers. 
That is why I add L2 normalization for conv4_3 only  
为什么scale_filler {type: "constant" value: 20.0},SSD作者weiliu给出答案,The L2 trick 
is only needed when your base network is not trained with BN. 20 is some empirical
value that seems to work well. You can check the ParseNet paper for more details.'''
layer {
  name: "conv4_3_norm"
  type: "Normalize"
  bottom: "conv4_3"
  top: "conv4_3_norm"
  norm_param {
    across_spatial: false
    scale_filler {
      type: "constant"
      value: 20.0
    }
    channel_shared: false #是否在各个通道间共享上述系数
  }
}

layer {
  name: "conv4_3_norm_mbox_loc"
  type: "Convolution"
  bottom: "conv4_3_norm"
  top: "conv4_3_norm_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 12
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

'''Permute起到重新排序的作用,snpe和caffe之间的数据存储次序不同,需进行交换才能进行计算,
比如卷积后的维度是32×24×19×19,那么经过交换层
permute_param {
    order: 0   #0--->0位置不变
    order: 2   #1--->2
    order: 3   #2--->3
    order: 1   #3--->1
	}
就变成32×19×19×24,即第0个位置不变,2和3位置前进一格,第1个位置换到最后'''
layer {
  name: "conv4_3_norm_mbox_loc_perm"
  type: "Permute"
  bottom: "conv4_3_norm_mbox_loc"
  top: "conv4_3_norm_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

#Flatten的作用是将32×19×19×24变成32*8664,32是batch_size的大小
layer {
  name: "conv4_3_norm_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv4_3_norm_mbox_loc_perm"
  top: "conv4_3_norm_mbox_loc_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "conv4_3_norm_mbox_conf"
  type: "Convolution"
  bottom: "conv4_3_norm"
  top: "conv4_3_norm_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 6
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv4_3_norm_mbox_conf_perm"
  type: "Permute"
  bottom: "conv4_3_norm_mbox_conf"
  top: "conv4_3_norm_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "conv4_3_norm_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv4_3_norm_mbox_conf_perm"
  top: "conv4_3_norm_mbox_conf_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "conv4_3_norm_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv4_3_norm"
  bottom: "data"
  top: "conv4_3_norm_mbox_priorbox"
  prior_box_param {
    min_size: 30.0
    aspect_ratio: 2.0
    flip: true
    clip: true
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
  }
}

layer {
  name: "fc7_mbox_loc"
  type: "Convolution"
  bottom: "fc7"
  top: "fc7_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 24
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "fc7_mbox_loc_perm"
  type: "Permute"
  bottom: "fc7_mbox_loc"
  top: "fc7_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "fc7_mbox_loc_flat"
  type: "Flatten"
  bottom: "fc7_mbox_loc_perm"
  top: "fc7_mbox_loc_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "fc7_mbox_conf"
  type: "Convolution"
  bottom: "fc7"
  top: "fc7_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 12
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "fc7_mbox_conf_perm"
  type: "Permute"
  bottom: "fc7_mbox_conf"
  top: "fc7_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "fc7_mbox_conf_flat"
  type: "Flatten"
  bottom: "fc7_mbox_conf_perm"
  top: "fc7_mbox_conf_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "fc7_mbox_priorbox"
  type: "PriorBox"
  bottom: "fc7"
  bottom: "data"
  top: "fc7_mbox_priorbox"
  prior_box_param {
    min_size: 60.0
    max_size: 114.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    flip: true
    clip: true
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
  }
}

layer {
  name: "conv6_2_mbox_loc"
  type: "Convolution"
  bottom: "conv6_2"
  top: "conv6_2_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 24
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv6_2_mbox_loc_perm"
  type: "Permute"
  bottom: "conv6_2_mbox_loc"
  top: "conv6_2_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "conv6_2_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv6_2_mbox_loc_perm"
  top: "conv6_2_mbox_loc_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "conv6_2_mbox_conf"
  type: "Convolution"
  bottom: "conv6_2"
  top: "conv6_2_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 12
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv6_2_mbox_conf_perm"
  type: "Permute"
  bottom: "conv6_2_mbox_conf"
  top: "conv6_2_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "conv6_2_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv6_2_mbox_conf_perm"
  top: "conv6_2_mbox_conf_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "conv6_2_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv6_2"
  bottom: "data"
  top: "conv6_2_mbox_priorbox"
  prior_box_param {
    min_size: 114.0
    max_size: 168.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    flip: true
    clip: true
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
  }
}

layer {
  name: "conv7_2_mbox_loc"
  type: "Convolution"
  bottom: "conv7_2"
  top: "conv7_2_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 24
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv7_2_mbox_loc_perm"
  type: "Permute"
  bottom: "conv7_2_mbox_loc"
  top: "conv7_2_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "conv7_2_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv7_2_mbox_loc_perm"
  top: "conv7_2_mbox_loc_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "conv7_2_mbox_conf"
  type: "Convolution"
  bottom: "conv7_2"
  top: "conv7_2_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 12
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv7_2_mbox_conf_perm"
  type: "Permute"
  bottom: "conv7_2_mbox_conf"
  top: "conv7_2_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "conv7_2_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv7_2_mbox_conf_perm"
  top: "conv7_2_mbox_conf_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "conv7_2_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv7_2"
  bottom: "data"
  top: "conv7_2_mbox_priorbox"
  prior_box_param {
    min_size: 168.0
    max_size: 222.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    flip: true
    clip: true
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
  }
}

layer {
  name: "conv8_2_mbox_loc"
  type: "Convolution"
  bottom: "conv8_2"
  top: "conv8_2_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 24
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv8_2_mbox_loc_perm"
  type: "Permute"
  bottom: "conv8_2_mbox_loc"
  top: "conv8_2_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "conv8_2_mbox_loc_flat"
  type: "Flatten"
  bottom: "conv8_2_mbox_loc_perm"
  top: "conv8_2_mbox_loc_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "conv8_2_mbox_conf"
  type: "Convolution"
  bottom: "conv8_2"
  top: "conv8_2_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 12
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "conv8_2_mbox_conf_perm"
  type: "Permute"
  bottom: "conv8_2_mbox_conf"
  top: "conv8_2_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "conv8_2_mbox_conf_flat"
  type: "Flatten"
  bottom: "conv8_2_mbox_conf_perm"
  top: "conv8_2_mbox_conf_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "conv8_2_mbox_priorbox"
  type: "PriorBox"
  bottom: "conv8_2"
  bottom: "data"
  top: "conv8_2_mbox_priorbox"
  prior_box_param {
    min_size: 222.0
    max_size: 276.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    flip: true
    clip: true
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
  }
}

layer {
  name: "pool6_mbox_loc"
  type: "Convolution"
  bottom: "pool6"
  top: "pool6_mbox_loc"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 24
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "pool6_mbox_loc_perm"
  type: "Permute"
  bottom: "pool6_mbox_loc"
  top: "pool6_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "pool6_mbox_loc_flat"
  type: "Flatten"
  bottom: "pool6_mbox_loc_perm"
  top: "pool6_mbox_loc_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "pool6_mbox_conf"
  type: "Convolution"
  bottom: "pool6"
  top: "pool6_mbox_conf"
  param {
    lr_mult: 1.0
    decay_mult: 1.0
  }
  param {
    lr_mult: 2.0
    decay_mult: 0.0
  }
  convolution_param {
    num_output: 12
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    }
    bias_filler {
      type: "constant"
      value: 0.0
    }
  }
}

layer {
  name: "pool6_mbox_conf_perm"
  type: "Permute"
  bottom: "pool6_mbox_conf"
  top: "pool6_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
  }
}

layer {
  name: "pool6_mbox_conf_flat"
  type: "Flatten"
  bottom: "pool6_mbox_conf_perm"
  top: "pool6_mbox_conf_flat"
  flatten_param {
    axis: 1
  }
}

layer {
  name: "pool6_mbox_priorbox"
  type: "PriorBox"
  bottom: "pool6"
  bottom: "data"
  top: "pool6_mbox_priorbox"
  prior_box_param {
    min_size: 276.0
    max_size: 330.0
    aspect_ratio: 2.0
    aspect_ratio: 3.0
    flip: true
    clip: true
    variance: 0.10000000149
    variance: 0.10000000149
    variance: 0.20000000298
    variance: 0.20000000298
  }
}

###########################################################################
#Concat,mbox_loc/mbox_conf/mbox_priorbox拼接形成一个层,类似googleNet的Inception操作,通道合并而非数值相加
layer {
  name: "mbox_loc"
  type: "Concat"
  bottom: "conv4_3_norm_mbox_loc_flat"
  bottom: "fc7_mbox_loc_flat"
  bottom: "conv6_2_mbox_loc_flat"
  bottom: "conv7_2_mbox_loc_flat"
  bottom: "conv8_2_mbox_loc_flat"
  bottom: "pool6_mbox_loc_flat"
  top: "mbox_loc"
  concat_param {
    axis: 1
  }
}

layer {
  name: "mbox_conf"
  type: "Concat"
  bottom: "conv4_3_norm_mbox_conf_flat"
  bottom: "fc7_mbox_conf_flat"
  bottom: "conv6_2_mbox_conf_flat"
  bottom: "conv7_2_mbox_conf_flat"
  bottom: "conv8_2_mbox_conf_flat"
  bottom: "pool6_mbox_conf_flat"
  top: "mbox_conf"
  concat_param {
    axis: 1
  }
}

layer {
  name: "mbox_priorbox"
  type: "Concat"
  bottom: "conv4_3_norm_mbox_priorbox"
  bottom: "fc7_mbox_priorbox"
  bottom: "conv6_2_mbox_priorbox"
  bottom: "conv7_2_mbox_priorbox"
  bottom: "conv8_2_mbox_priorbox"
  bottom: "pool6_mbox_priorbox"
  top: "mbox_priorbox"
  concat_param {
    axis: 2
  }
}

layer {
  name: "mbox_loss"
  type: "MultiBoxLoss"
  bottom: "mbox_loc"
  bottom: "mbox_conf"
  bottom: "mbox_priorbox"
  bottom: "label"
  top: "mbox_loss"
  include {
    phase: TRAIN
  }
  propagate_down: true
  propagate_down: true
  propagate_down: false
  propagate_down: false
  loss_param {
    normalization: VALID
  }
  multibox_loss_param {
    loc_loss_type: SMOOTH_L1
    conf_loss_type: SOFTMAX
    loc_weight: 1.0
    num_classes: 2
    share_location: true
    match_type: PER_PREDICTION
    overlap_threshold: 0.5
    use_prior_for_matching: true
    background_label_id: 0
    use_difficult_gt: true
    do_neg_mining: true
    neg_pos_ratio: 3.0
    neg_overlap: 0.5
    code_type: CENTER_SIZE
  }
}

slover.prototxt解读

train_net: "models/VGGNet/VOC0712/SSD_300x300/train.prototxt"
test_net: "models/VGGNet/VOC0712/SSD_300x300/test.prototxt"

#每次预测的迭代次数,一般test_iter*batch_size=所有test样本数,这样一次预测就可以覆盖所有test样本
test_iter: 35 

#训练时每迭代10000次进行一次预测
test_interval: 10000 

#开始的学习率
base_lr: 0.000010000000475 

#每10打印一次
display: 10 

#最大迭代次数,告诉网络何时停止训练,太小达不到收敛,太大会导致震荡
max_iter: 60000 

'''lr_policy的策略
exp:返回base_lr*gamma^iter,iter为当前迭代次数
inv:还需设置一个power,返回 base_lr*(1+gamma*iter)^(-power)
step:还需设置一个stepsize,返回base_lr*gamma^(floor(iter/stepsize)),iter当前迭代次数
poly:学习率进行多项式误差, 返回base_lr(1-iter/max_iter)^(power)
fixed:保持base_lr不变
sigmoid:学习率进行sigmod衰减,返回base_lr(1/(1+ exp(-gamma*(iter-stepsize))))
multistep:还需设置一个stepvalue,上面的step均匀等间隔变化,multistep根据stepvalue值变化'''
lr_policy: "step" 

#学习率变化的比率
gamma: 0.10000000149 

#网络的冲量;学习的参数,不用变;上一次梯度更新的权重(找到的三个不一样的说法...)
momentum: 0.899999976158 

#权衰量,用于防止过拟合
weight_decay: 0.000500000023749 

stepsize: 40000  #每迭代___次降低学习率---乘以gamma,即变为原来的0.1
snapshot: 40000  #每迭代___次,保存一次快照

snapshot_prefix: "models/VGGNet/VOC0712/SSD_300x300/VGG_VOC0712_SSD_300x300"

solver_mode: CPU #CPU or GPU
device_id: 0 #若选GPU,用的是哪一块GPU

debug_info: false

snapshot_after_train: true #表示在训练完后把最后一次的训练结果保存下来

test_initialization: false #表示可以用上次保存的snapshot来继续训练

average_loss: 10 # 取多次forward的loss作平均,进行显示输出

'''iter_size*batch_size=实际的batch_size。相当于读取batch_size*iter_size个图像才
做一下gradient_decent(梯度下降),此参数可规避因gpu不足而导致的batch_size的限制,
因为可用多个iteration做到很大的batch,即使单次batch有限 '''
iter_size: 1 


#梯度下降优化算法,随机梯度下降SGD,批量梯度下降BGD,小批量梯度下降MBGD
type: "SGD" 

eval_type: "detection" #统计整体的mAP值

'''average precision version,有三种:
1.11point:只用到准确率,11-point interpolated average precision,默认召回率的间隔是{0,0.1, 0.2, …, 0.9, 1.0}
VOC2007 style for computing AP.
2.MaxIntegral:使用准确率、召回率;VOC2012or ILSVRC style for computing AP. 召回率的间隔是从后往前计算
3.Integral:与MaxIntegral差不多,只是从前往后计算;为默认参数 '''
ap_version: "11point"

猜你喜欢

转载自blog.csdn.net/jh0lmes/article/details/80560319
今日推荐