DispNet中Caffe自定义层解读(二)——DataAugmentation

版权声明:如需转载请评论告知并标明出处 https://blog.csdn.net/ShuqiaoS/article/details/83376142

DispNet中Caffe自定义层解读(二)——DataAugmentation

这一系列博文记录了博主在学习DispNet过程中遇到的自定义Caffe层的笔记。这一部分是DataAugmentation层,其主要功能是:根据要求对输入的数据进行扩张,从而从数据的角度上尽量缓解过拟合的问题。更新于2018.10.25。

详细功能介绍

用法

具体来说,DataAugmentation层的用法有两种:

第一种,层内设置了参数,此时运算的时候会读取层内设置的参数,这种情况下输入只有一个(待扩张的数据),输出可以有两个(第一个输出为数据扩张后的结果,第二个输出为该层的参数);

第二种,层内没有设置参数,扩张的参数通过输入提供,此时输入有两个(第一个为待扩张的数据,第二个为参数),输出只有一个(扩张后的数据)。

功能

DataAugmentation支持以下几种数据扩张方式:

  1. 空间变换(spatial transform):可设定的参数有是否镜像、是否旋转、是否缩放、是否translate(包括直接指定或只指定x或y一个方向);
  2. 颜色变换(chromatic transform):可设定的参数有亮度、gamma、对比度、颜色;
  3. 效果变换(effect transform):可设定的参数有雾面尺寸(fog size)、雾面程度(fog amount)、运动模糊角度、运动模糊尺寸、阴影角度、阴影距离、阴影强度、噪声;
  4. 颜色特征变换(chromatic eigen transform)。

调用方式

调用方式1:层中定义扩张参数

layer {
  name: "DataAugmentation"
  type: "DataAugmentation"
  bottom: "input_blob"
  top: "output_blob1"
  top: "output_blob2"
  propagate_down: false 
  augmentation_param {
    max_multiplier: 1
    augment_during_test: false
    recompute_mean: 1000
    mean_per_pixel: false
    translate {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0
      spread: 0.4
      prob: 1.0
    }
    zoom {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: 0.2
      spread: 0.4
      prob: 1.0
    }
    squeeze {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: 0
      spread: 0.3
      prob: 1.0
    }
    lmult_pow {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: -0.2
      spread: 0.4
      prob: 1.0
    }
    lmult_mult {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: 0.0
      spread: 0.4
      prob: 1.0
    }
    lmult_add {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0
      spread: 0.03
      prob: 1.0
    }
    sat_pow {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: 0
      spread: 0.4
      prob: 1.0
    }
    sat_mult {
      rand_type: "uniform_bernoulli"
      exp: true
      mean: -0.3
      spread: 0.5
      prob: 1.0
    }
    sat_add {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0
      spread: 0.03
      prob: 1.0
    }
    col_pow {
      rand_type: "gaussian_bernoulli"
      exp: true
      mean: 0
      spread: 0.4
      prob: 1.0
    }
    col_mult {
      rand_type: "gaussian_bernoulli"
      exp: true
      mean: 0
      spread: 0.2
      prob: 1.0
    }
    col_add {
      rand_type: "gaussian_bernoulli"
      exp: false
      mean: 0
      spread: 0.02
      prob: 1.0
    }
    ladd_pow {
      rand_type: "gaussian_bernoulli"
      exp: true
      mean: 0
      spread: 0.4
      prob: 1.0
    }
    ladd_mult {
      rand_type: "gaussian_bernoulli"
      exp: true
      mean: 0.0
      spread: 0.4
      prob: 1.0
    }
    ladd_add {
      rand_type: "gaussian_bernoulli"
      exp: false
      mean: 0
      spread: 0.04
      prob: 1.0
    }
    col_rotate {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0
      spread: 1
      prob: 1.0
    }
    crop_width: 960
    crop_height: 256
    chromatic_eigvec: 0.51
    chromatic_eigvec: 0.56
    chromatic_eigvec: 0.65
    chromatic_eigvec: 0.79
    chromatic_eigvec: 0.01
    chromatic_eigvec: -0.62
    chromatic_eigvec: 0.35
    chromatic_eigvec: -0.83
    chromatic_eigvec: 0.44
    noise {
      rand_type: "uniform_bernoulli"
      exp: false
      mean: 0.03
      spread: 0.03
      prob: 1.0
    }
  }
}

调用方式2:通过其他数据扩张层提供参数

layer {
  name: "DataAugmentation"
  type: "DataAugmentation"
  bottom: "input_blob"
  bottom: "input_augmented_blob"
  top: "output_blob"
  propagate_down: false 
  propagate_down: false 
  augmentation_param {
    max_multiplier: 1
    augment_during_test: false
    recompute_mean: 1000
    mean_per_pixel: false
    crop_width: 960
    crop_height: 256
    chromatic_eigvec: 0.51
    chromatic_eigvec: 0.56
    chromatic_eigvec: 0.65
    chromatic_eigvec: 0.79
    chromatic_eigvec: 0.01
    chromatic_eigvec: -0.62
    chromatic_eigvec: 0.35
    chromatic_eigvec: -0.83
    chromatic_eigvec: 0.44
  }
}

data_augmentation_layer.hpp

定义了层所需要的变量和函数,其中由于处理的是原始图像数据,因此不支持反向计算:

virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
                              const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)  { for(int i=0; i<propagate_down.size(); i++) if(propagate_down[i]) LOG(FATAL) << "DataAugmentationLayer cannot do backward."; return; }
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
                              const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)  { for(int i=0; i<propagate_down.size(); i++) if(propagate_down[i]) LOG(FATAL) << "DataAugmentationLayer cannot do backward."; return; }

data_augmentation_layer.cpp

数据分割的计算和处理只在gpu上运行,因此cpp文件仅定义数据及参数的读取和必要的尺寸设置,具体的分割方式在cu文件中定义。

LayerSetUp

用于从prototxt文件中读取层的设置参数。

template <typename Dtype>
void DataAugmentationLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top)
{
  // TODO This won't work when applying a net to images of size different from what the net was trained on
  aug_ = this->layer_param_.augmentation_param();			//读取数据扩张层的参数到aug_中
  this->layer_param_.set_reshape_every_iter(false);
  LOG(WARNING) << "DataAugmentationLayer only runs Reshape on setup";
  if (this->blobs_.size() > 0)
    LOG(INFO) << "Skipping data mean blob initialization";
  else {
    if (aug_.recompute_mean()) {
      LOG(INFO) << "Recompute mean";
      this->blobs_.resize(3);
      this->blobs_[1].reset(new Blob<Dtype>());
      this->layer_param_.add_param();
      this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
      this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.);
      this->blobs_[2].reset(new Blob<Dtype>());
      this->layer_param_.add_param();
      this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
      this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.);      
    } 
    else {  
      LOG(INFO) << "Do not recompute mean";
      this->blobs_.resize(1);
    }
    this->blobs_[0].reset(new Blob<Dtype>(1, 1, 1, 1));      
    // Never backpropagate
    this->param_propagate_down_.resize(this->blobs_.size(), false);
    this->layer_param_.add_param();
    this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_lr_mult(0.);
    this->layer_param_.mutable_param(this->layer_param_.param_size()-1)->set_decay_mult(0.); 
//     LOG(INFO) << "DEBUG: this->layer_param_.param_size()=" << this->layer_param_.param_size();
//     LOG(INFO) << "DEBUG: Writing layer_param";
    WriteProtoToTextFile(this->layer_param_, "/misc/lmbraid17/sceneflownet/dosovits/matlab/test/message.prototxt");
//     LOG(INFO) << "DEBUG: Finished writing layer_param";
  } 
}

Reshape

用于设定输出的尺寸等必要信息。

扫描二维码关注公众号,回复: 3950069 查看本文章
template <typename Dtype>
void DataAugmentationLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top)
{
	//检查输入和输出的blob个数是否满足函数要求
    LOG(WARNING) << "Reshape of Augmentation layer should only be called once? Check this";
    CHECK_GE(bottom.size(), 1) << "Data augmentation layer takes one or two input blobs.";
    CHECK_LE(bottom.size(), 2) << "Data augmentation layer takes one or two input blobs.";
    CHECK_GE(top.size(), 1) << "Data augmentation layer outputs one or two output blobs.";
    CHECK_LE(top.size(), 2) << "Data augmentation layer outputs one or two output blobs.";

	//从输入中读取尺寸参数
    const int num = bottom[0]->num();
    const int channels = bottom[0]->channels();
    const int height = bottom[0]->height();
    const int width = bottom[0]->width();

    output_params_=(top.size()>1);
    input_params_=(bottom.size()>1);		//用于判断是层内定义扩张参数还是从另一个数据扩张层获得
    aug_ = this->layer_param_.augmentation_param();
    discount_coeff_schedule_ = this->layer_param_.coeff_schedule_param();

    //判断层中是否给出了要截取的宽和高:如果给出,进行裁减(要求裁减后的尺寸小于原尺寸);如果没有,保持原尺寸。
    do_cropping_ = (aug_.has_crop_width() && aug_.has_crop_height());
    if (!do_cropping_)
    {
        cropped_width_ = width;
        cropped_height_ = height;
        LOG(WARNING) << "Please enter crop size if you want to perform augmentation";
    }
    else
    {
        cropped_width_ = aug_.crop_width();    CHECK_GE(width, cropped_width_)   << "crop width greater than original";
        cropped_height_ = aug_.crop_height();  CHECK_GE(height, cropped_height_) << "crop height greater than original";
    }

    //给定输出的尺寸
    top[0]->Reshape(num, channels, cropped_height_, cropped_width_);

    //确定需要分割的参数
    AugmentationCoeff coeff;
    num_params_ = coeff.GetDescriptor()->field_count();

    //如果这一层的参数是由另一个数据扩张层提供的,则读取这个(第二个)输入blob
    if (input_params_) {		//如果有两个输入,则根据第二个输入确定参数
        LOG(INFO) << "Receiving " << num_params_ << " augmentation params";
        all_coeffs_.ReshapeLike(*bottom[1]);		//ReshapeLike:与后面的blob维度相同
    } else		//否则,新建参数
        all_coeffs_.Reshape(num, num_params_, 1, 1); //create

    //如果要求有超过两个输出,第二个输出根据前面的要求确定尺寸
    if (output_params_) {
        top[1]->ReshapeLike(all_coeffs_);
        LOG(INFO) << "Emitting " << num_params_ << " augmentation params";
    }

    //一个batch需要用的参数变换矩阵缓存
    coeff_matrices_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tTransMat)));
    
    coeff_chromatic_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tChromaticCoeffs)));
    coeff_chromatic_eigen_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tChromaticEigenCoeffs)));
    coeff_effect_.reset(new SyncedMemory(num * sizeof(typename AugmentationLayerBase<Dtype>::tEffectCoeffs)));

    chromatic_eigenspace_.reset(new SyncedMemory(sizeof(typename AugmentationLayerBase<Dtype>::tChromaticEigenSpace)));

    //计算数据均值
    if (aug_.recompute_mean()) {		//如果需要重新计算真值
      ones_.Reshape(1, 1, cropped_height_, cropped_width_);
      caffe_set(ones_.count(), Dtype(1), ones_.mutable_cpu_data());
      this->blobs_[1]->Reshape(1, channels, cropped_height_, cropped_width_);
      this->blobs_[2]->Reshape(1, channels, 1, 1);
    }
    else if(aug_.mean().size()==3 && !aug_.mean_per_pixel())
    {
      ones_.Reshape(1, 1, cropped_height_, cropped_width_);
      caffe_set(ones_.count(), Dtype(1), ones_.mutable_cpu_data());

      LOG(INFO) << "Using predefined per-pixel mean from proto";
      pixel_rgb_mean_from_proto_.Reshape(1,3,1,1);
      for(int i=0; i<3; i++)
          pixel_rgb_mean_from_proto_.mutable_cpu_data()[i]=aug_.mean().Get(i);
    }
    
    noise_.reset(new SyncedMemory(top[0]->count() / top[0]->num() * sizeof(Dtype)));

    *(this->blobs_[0]->mutable_cpu_data()) = 0;
    
//     LOG(INFO) << "DEBUG: Reshape done";
}

data_augmentation_layer.cu

用于定义在gpu上的运算。

Forward_gpu

首先做扩张前的检查工作:

  1. 确定输入和输出的尺寸;
  2. 检查输入与输出的num是否相等;
  3. 检查是否有NAN和特别大的值。

随后进行扩张(程序设定只有在设定了截取的条件下扩张数据):

  1. 如果没有输入的扩张参数,就根据要求(层参数)生成一组,并将所做的变换记录在log中;
  2. 依照参数求得变换矩阵;
  3. 根据需要,依变换矩阵扩张数据。

如果没有设置截取条件,则直接将输入复制给输出。

对扩张后的数据进行归一化处理(减去均值)。

猜你喜欢

转载自blog.csdn.net/ShuqiaoS/article/details/83376142