src/caffe/layers/conv_layer.cpp

   1 // Copyright 2013 Yangqing Jia
   2
   3 #include <vector>
   4
   5 #include "caffe/layer.hpp"
   6 #include "caffe/vision_layers.hpp"
   7 #include "caffe/util/im2col.hpp"
   8 #include "caffe/filler.hpp"
   9 #include "caffe/util/math_functions.hpp"
  10
  11 namespace caffe {
  12
  13 template <typename Dtype>
  14 void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
  15       vector<Blob<Dtype>*>* top) {
  16   CHECK_EQ(bottom.size(), 1) << "Im2col Layer takes a single blob as input.";
  17   CHECK_EQ(top->size(), 1) << "Im2col Layer takes a single blob as output.";
  18   KSIZE_ = this->layer_param_.kernelsize();
  19   STRIDE_ = this->layer_param_.stride();
  20   GROUP_ = this->layer_param_.group();
  21   NUM_ = bottom[0]->num();
  22   CHANNELS_ = bottom[0]->channels();
  23   HEIGHT_ = bottom[0]->height();
  24   WIDTH_ = bottom[0]->width();
  25   NUM_OUTPUT_ = this->layer_param_.num_output();
  26   CHECK_EQ(CHANNELS_ % GROUP_, 0);
  27   // The im2col result buffer would only hold one image at a time to avoid
  28   // overly large memory usage.
  29   int height_out = (HEIGHT_ - KSIZE_) / STRIDE_ + 1;
  30   int width_out = (WIDTH_ - KSIZE_) / STRIDE_ + 1;
  31   col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, height_out, width_out);
  32   // Set the parameters
  33   CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0)
  34       << "Number of output should be multiples of group.";
  35   biasterm_ = this->layer_param_.biasterm();
  36   // Figure out the dimensions for individual gemms.
  37   M_ = NUM_OUTPUT_ / GROUP_;
  38   K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_;
  39   N_ = height_out * width_out;
  40   (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out);
  41   if (biasterm_) {
  42     this->blobs_.resize(2);
  43   } else {
  44     this->blobs_.resize(1);
  45   }
  46   // Intialize the weight
  47   this->blobs_[0].Reshape(1, 1, NUM_OUTPUT_, K_);
  48   // fill the weights
  49   shared_ptr<Filler<Dtype> > weight_filler(
  50       GetFiller<Dtype>(this->layer_param_.weight_filler()));
  51   weight_filler->Fill(&this->blobs_[0]);
  52   // If necessary, intiialize and fill the bias term
  53   if (biasterm_) {
  54     this->blobs_[1].Reshape(1, 1, 1, NUM_OUTPUT_);
  55     shared_ptr<Filler<Dtype> > bias_filler(
  56         GetFiller<Dtype>(this->layer_param_.bias_filler()));
  57     bias_filler->Fill(&this->blobs_[1]);
  58     bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
  59     Dtype* bias_multiplier_data =
  60         reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data());
  61     for (int i = 0; i < N_; ++i) {
  62         bias_multiplier_data[i] = 1.;
  63     }
  64   }
  65 };
  66
  67
  68 template <typename Dtype>
  69 void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
  70       vector<Blob<Dtype>*>* top) {
  71   const Dtype* bottom_data = bottom[0]->cpu_data();
  72   Dtype* top_data = (*top)[0]->mutable_cpu_data();
  73   Dtype* col_data = col_buffer_.mutable_cpu_data();
  74   const Dtype* weight = this->blobs_[0].cpu_data();
  75   int weight_offset = M_ * K_;
  76   int col_offset = K_ * N_;
  77   int top_offset = M_ * N_;
  78   for (int n = 0; n < NUM_; ++n) {
  79     // First, im2col
  80     im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
  81         WIDTH_, KSIZE_, STRIDE_, col_data);
  82     // Second, innerproduct with groups
  83     for (int g = 0; g < GROUP_; ++g) {
  84       caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
  85         (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
  86         (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
  87     }
  88     // third, add bias
  89     if (biasterm_) {
  90       caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
  91           N_, 1, (Dtype)1., this->blobs_[1].cpu_data(),
  92           reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
  93           (Dtype)1., top_data + (*top)[0]->offset(n));
  94     }
  95   }
  96 }
  97
  98 template <typename Dtype>
  99 void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 100       vector<Blob<Dtype>*>* top) {
 101   const Dtype* bottom_data = bottom[0]->gpu_data();
 102   Dtype* top_data = (*top)[0]->mutable_gpu_data();
 103   Dtype* col_data = col_buffer_.mutable_gpu_data();
 104   const Dtype* weight = this->blobs_[0].gpu_data();
 105   int weight_offset = M_ * K_;
 106   int col_offset = K_ * N_;
 107   int top_offset = M_ * N_;
 108   for (int n = 0; n < NUM_; ++n) {
 109     // First, im2col
 110     im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
 111         WIDTH_, KSIZE_, STRIDE_, col_data);
 112     // Second, innerproduct with groups
 113     for (int g = 0; g < GROUP_; ++g) {
 114       caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
 115         (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
 116         (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
 117     }
 118     // third, add bias
 119     if (biasterm_) {
 120       caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
 121           N_, 1, (Dtype)1., this->blobs_[1].gpu_data(),
 122           reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
 123           (Dtype)1., top_data + (*top)[0]->offset(n));
 124     }
 125   }
 126 }
 127
 128 template <typename Dtype>
 129 Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 130       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
 131   const Dtype* top_diff = top[0]->cpu_diff();
 132   const Dtype* weight = this->blobs_[0].cpu_data();
 133   Dtype* weight_diff = this->blobs_[0].mutable_cpu_diff();
 134   const Dtype* bottom_data = (*bottom)[0]->cpu_data();
 135   Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
 136   Dtype* col_data = col_buffer_.mutable_cpu_data();
 137   Dtype* col_diff = col_buffer_.mutable_cpu_diff();
 138   // bias gradient if necessary
 139   Dtype* bias_diff = NULL;
 140
 141   if (biasterm_) {
 142     bias_diff = this->blobs_[1].mutable_cpu_diff();
 143     memset(bias_diff, 0., sizeof(Dtype) * this->blobs_[1].count());
 144     for (int n = 0; n < NUM_; ++n) {
 145       caffe_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
 146           1., top_diff + top[0]->offset(n),
 147           reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
 148           bias_diff);
 149     }
 150   }
 151
 152   int weight_offset = M_ * K_;
 153   int col_offset = K_ * N_;
 154   int top_offset = M_ * N_;
 155   memset(weight_diff, 0., sizeof(Dtype) * this->blobs_[0].count());
 156   for (int n = 0; n < NUM_; ++n) {
 157     // since we saved memory in the forward pass by not storing all col data,
 158     // we will need to recompute them.
 159     im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
 160         WIDTH_, KSIZE_, STRIDE_, col_data);
 161     // gradient w.r.t. weight. Note that we will accumulate diffs.
 162     for (int g = 0; g < GROUP_; ++g) {
 163       caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
 164         (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
 165         col_data + col_offset * g, (Dtype)1.,
 166         weight_diff + weight_offset * g);
 167     }
 168     // gradient w.r.t. bottom data, if necessary
 169     if (propagate_down) {
 170       for (int g = 0; g < GROUP_; ++g) {
 171         caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
 172           (Dtype)1., weight + weight_offset * g,
 173           top_diff + top[0]->offset(n) + top_offset * g,
 174           (Dtype)0., col_diff + col_offset * g);
 175       }
 176       // col2im back to the data
 177       col2im_cpu(col_diff, CHANNELS_, HEIGHT_,
 178         WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
 179     }
 180   }
 181   return Dtype(0.);
 182 }
 183
 184 template <typename Dtype>
 185 Dtype ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 186       const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
 187   const Dtype* top_diff = top[0]->gpu_diff();
 188   const Dtype* weight = this->blobs_[0].gpu_data();
 189   Dtype* weight_diff = this->blobs_[0].mutable_gpu_diff();
 190   const Dtype* bottom_data = (*bottom)[0]->gpu_data();
 191   Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
 192   Dtype* col_data = col_buffer_.mutable_gpu_data();
 193   Dtype* col_diff = col_buffer_.mutable_gpu_diff();
 194   // bias gradient if necessary
 195   Dtype* bias_diff = NULL;
 196
 197   if (biasterm_) {
 198     bias_diff = this->blobs_[1].mutable_gpu_diff();
 199     CUDA_CHECK(cudaMemset(bias_diff, 0.,
 200         sizeof(Dtype) * this->blobs_[1].count()));
 201     for (int n = 0; n < NUM_; ++n) {
 202       caffe_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
 203           1., top_diff + top[0]->offset(n),
 204           reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
 205           1., bias_diff);
 206     }
 207   }
 208
 209   int weight_offset = M_ * K_;
 210   int col_offset = K_ * N_;
 211   int top_offset = M_ * N_;
 212   CUDA_CHECK(cudaMemset(weight_diff, 0.,
 213       sizeof(Dtype) * this->blobs_[0].count()));
 214   for (int n = 0; n < NUM_; ++n) {
 215     // since we saved memory in the forward pass by not storing all col data,
 216     // we will need to recompute them.
 217     im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
 218         WIDTH_, KSIZE_, STRIDE_, col_data);
 219     // gradient w.r.t. weight. Note that we will accumulate diffs.
 220     for (int g = 0; g < GROUP_; ++g) {
 221       caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
 222         (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
 223         col_data + col_offset * g, (Dtype)1.,
 224         weight_diff + weight_offset * g);
 225     }
 226     // gradient w.r.t. bottom data, if necessary
 227     if (propagate_down) {
 228       for (int g = 0; g < GROUP_; ++g) {
 229         caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
 230           (Dtype)1., weight + weight_offset * g,
 231           top_diff + top[0]->offset(n) + top_offset * g,
 232           (Dtype)0., col_diff + col_offset * g);
 233       }
 234       // col2im back to the data
 235       col2im_gpu(col_diff, CHANNELS_, HEIGHT_,
 236         WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
 237     }
 238   }
 239   return Dtype(0.);
 240 }
 241
 242 INSTANTIATE_CLASS(ConvolutionLayer);
 243
 244 }  // namespace caffe