对预训练模型的迁移引用【1】中的提法,分为两种形式
- 只训练最后fc层的freeze and train
- 以预训练模型为初始参数,训练所有层的finetune
这里只讨论网络结构的变更
finetune
model_ft = models.resnet18(pretrained=True) num_ftrs = model_ft.fc.in_features #最后fc层的输入 model_ft.fc = nn.Linear(num_ftrs, NUM_CLASSES) #NUM_CLASSES是自己数据的类别
model_ft = models.vgg16(pretrained=True) num_ftrs = model_ft.classifier[6].in_features feature_model = list(model_ft.classifier.children()) feature_model.pop() feature_model.append(nn.Linear(num_ftrs, NUM_CLASSES)) model_ft.classifier = nn.Sequential(*feature_model)如果在基础网络的基础上还要再增加层数,可用【2】中mian.py的方法
num_ftrs = model_ft.fc.in_features feature_model = list(model_ft.fc.children()) feature_model.append(nn.Linear(num_ftrs, cf.feature_size)) feature_model.append(nn.BatchNorm1d(cf.feature_size)) feature_model.append(nn.ReLU(inplace=True)) feature_model.append(nn.Linear(cf.feature_size, len(dset_classes))) model_ft.fc = nn.Sequential(*feature_model)
【2】中还提到了,特征提取的方法
if(args.net_type == 'alexnet' or args.net_type == 'vggnet'): feature_map = list(checkpoint['model'].module.classifier.children()) feature_map.pop() new_classifier = nn.Sequential(*feature_map) extractor = copy.deepcopy(checkpoint['model']) extractor.module.classifier = new_classifier elif (args.net_type) == 'resnet'): feature_map = list(model.module.children()) feature_map.pop() extractor = nn.Sequential(*feature_map)
for param in model_conv.parameters(): #params have requires_grad=True by default param.requires_grad = False num_ftrs = model_conv.fc.in_features model_conv.fc = nn.Linear(num_ftrs, num_class)
以防止在反向传播的过程中,改变前面层的参数
【3】中说明了一下,随着训练的进行,learning_rate应该进行一定的衰减,以免在梯度下降过程中,在接近的时候local optimum的时候错过。
def lr_scheduler(optimizer, epoch, init_lr=0.001, lr_decay_epoch=7): """Decay learning rate by a factor of 0.1 every lr_decay_epoch epochs.""" lr = init_lr * (0.1**(epoch // lr_decay_epoch)) if epoch % lr_decay_epoch == 0: print('LR is set to {}'.format(lr)) for param_group in optimizer.param_groups: param_group['lr'] = lr return optimizer
【1】