1 // Copyright 2013 Yangqing Jia
3 #include <vector>
5 #include "caffe/layer.hpp"
6 #include "caffe/vision_layers.hpp"
7 #include "caffe/util/im2col.hpp"
8 #include "caffe/filler.hpp"
9 #include "caffe/util/math_functions.hpp"
11 namespace caffe {
13 template <typename Dtype>
14 void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
15 vector<Blob<Dtype>*>* top) {
16 CHECK_EQ(bottom.size(), 1) << "Im2col Layer takes a single blob as input.";
17 CHECK_EQ(top->size(), 1) << "Im2col Layer takes a single blob as output.";
18 KSIZE_ = this->layer_param_.kernelsize();
19 STRIDE_ = this->layer_param_.stride();
20 GROUP_ = this->layer_param_.group();
21 NUM_ = bottom[0]->num();
22 CHANNELS_ = bottom[0]->channels();
23 HEIGHT_ = bottom[0]->height();
24 WIDTH_ = bottom[0]->width();
25 NUM_OUTPUT_ = this->layer_param_.num_output();
26 CHECK_EQ(CHANNELS_ % GROUP_, 0);
27 // The im2col result buffer would only hold one image at a time to avoid
28 // overly large memory usage.
29 int height_out = (HEIGHT_ - KSIZE_) / STRIDE_ + 1;
30 int width_out = (WIDTH_ - KSIZE_) / STRIDE_ + 1;
31 col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, height_out, width_out);
32 // Set the parameters
33 CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0)
34 << "Number of output should be multiples of group.";
35 biasterm_ = this->layer_param_.biasterm();
36 // Figure out the dimensions for individual gemms.
37 M_ = NUM_OUTPUT_ / GROUP_;
38 K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_;
39 N_ = height_out * width_out;
40 (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out);
41 if (biasterm_) {
42 this->blobs_.resize(2);
43 } else {
44 this->blobs_.resize(1);
45 }
46 // Intialize the weight
47 this->blobs_[0].Reshape(1, 1, NUM_OUTPUT_, K_);
48 // fill the weights
49 shared_ptr<Filler<Dtype> > weight_filler(
50 GetFiller<Dtype>(this->layer_param_.weight_filler()));
51 weight_filler->Fill(&this->blobs_[0]);
52 // If necessary, intiialize and fill the bias term
53 if (biasterm_) {
54 this->blobs_[1].Reshape(1, 1, 1, NUM_OUTPUT_);
55 shared_ptr<Filler<Dtype> > bias_filler(
56 GetFiller<Dtype>(this->layer_param_.bias_filler()));
57 bias_filler->Fill(&this->blobs_[1]);
58 bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
59 Dtype* bias_multiplier_data =
60 reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data());
61 for (int i = 0; i < N_; ++i) {
62 bias_multiplier_data[i] = 1.;
63 }
64 }
65 };
68 template <typename Dtype>
69 void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
70 vector<Blob<Dtype>*>* top) {
71 const Dtype* bottom_data = bottom[0]->cpu_data();
72 Dtype* top_data = (*top)[0]->mutable_cpu_data();
73 Dtype* col_data = col_buffer_.mutable_cpu_data();
74 const Dtype* weight = this->blobs_[0].cpu_data();
75 int weight_offset = M_ * K_;
76 int col_offset = K_ * N_;
77 int top_offset = M_ * N_;
78 for (int n = 0; n < NUM_; ++n) {
79 // First, im2col
80 im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
81 WIDTH_, KSIZE_, STRIDE_, col_data);
82 // Second, innerproduct with groups
83 for (int g = 0; g < GROUP_; ++g) {
84 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
85 (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
86 (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
87 }
88 // third, add bias
89 if (biasterm_) {
90 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
91 N_, 1, (Dtype)1., this->blobs_[1].cpu_data(),
92 reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
93 (Dtype)1., top_data + (*top)[0]->offset(n));
94 }
95 }
96 }
98 template <typename Dtype>
99 void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
100 vector<Blob<Dtype>*>* top) {
101 const Dtype* bottom_data = bottom[0]->gpu_data();
102 Dtype* top_data = (*top)[0]->mutable_gpu_data();
103 Dtype* col_data = col_buffer_.mutable_gpu_data();
104 const Dtype* weight = this->blobs_[0].gpu_data();
105 int weight_offset = M_ * K_;
106 int col_offset = K_ * N_;
107 int top_offset = M_ * N_;
108 for (int n = 0; n < NUM_; ++n) {
109 // First, im2col
110 im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
111 WIDTH_, KSIZE_, STRIDE_, col_data);
112 // Second, innerproduct with groups
113 for (int g = 0; g < GROUP_; ++g) {
114 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
115 (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
116 (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
117 }
118 // third, add bias
119 if (biasterm_) {
120 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
121 N_, 1, (Dtype)1., this->blobs_[1].gpu_data(),
122 reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
123 (Dtype)1., top_data + (*top)[0]->offset(n));
124 }
125 }
126 }
128 template <typename Dtype>
129 Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
130 const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
131 const Dtype* top_diff = top[0]->cpu_diff();
132 const Dtype* weight = this->blobs_[0].cpu_data();
133 Dtype* weight_diff = this->blobs_[0].mutable_cpu_diff();
134 const Dtype* bottom_data = (*bottom)[0]->cpu_data();
135 Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
136 Dtype* col_data = col_buffer_.mutable_cpu_data();
137 Dtype* col_diff = col_buffer_.mutable_cpu_diff();
138 // bias gradient if necessary
139 Dtype* bias_diff = NULL;
141 if (biasterm_) {
142 bias_diff = this->blobs_[1].mutable_cpu_diff();
143 memset(bias_diff, 0., sizeof(Dtype) * this->blobs_[1].count());
144 for (int n = 0; n < NUM_; ++n) {
145 caffe_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
146 1., top_diff + top[0]->offset(n),
147 reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
148 bias_diff);
149 }
150 }
152 int weight_offset = M_ * K_;
153 int col_offset = K_ * N_;
154 int top_offset = M_ * N_;
155 memset(weight_diff, 0., sizeof(Dtype) * this->blobs_[0].count());
156 for (int n = 0; n < NUM_; ++n) {
157 // since we saved memory in the forward pass by not storing all col data,
158 // we will need to recompute them.
159 im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
160 WIDTH_, KSIZE_, STRIDE_, col_data);
161 // gradient w.r.t. weight. Note that we will accumulate diffs.
162 for (int g = 0; g < GROUP_; ++g) {
163 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
164 (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
165 col_data + col_offset * g, (Dtype)1.,
166 weight_diff + weight_offset * g);
167 }
168 // gradient w.r.t. bottom data, if necessary
169 if (propagate_down) {
170 for (int g = 0; g < GROUP_; ++g) {
171 caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
172 (Dtype)1., weight + weight_offset * g,
173 top_diff + top[0]->offset(n) + top_offset * g,
174 (Dtype)0., col_diff + col_offset * g);
175 }
176 // col2im back to the data
177 col2im_cpu(col_diff, CHANNELS_, HEIGHT_,
178 WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
179 }
180 }
181 return Dtype(0.);
182 }
184 template <typename Dtype>
185 Dtype ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
186 const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
187 const Dtype* top_diff = top[0]->gpu_diff();
188 const Dtype* weight = this->blobs_[0].gpu_data();
189 Dtype* weight_diff = this->blobs_[0].mutable_gpu_diff();
190 const Dtype* bottom_data = (*bottom)[0]->gpu_data();
191 Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
192 Dtype* col_data = col_buffer_.mutable_gpu_data();
193 Dtype* col_diff = col_buffer_.mutable_gpu_diff();
194 // bias gradient if necessary
195 Dtype* bias_diff = NULL;
197 if (biasterm_) {
198 bias_diff = this->blobs_[1].mutable_gpu_diff();
199 CUDA_CHECK(cudaMemset(bias_diff, 0.,
200 sizeof(Dtype) * this->blobs_[1].count()));
201 for (int n = 0; n < NUM_; ++n) {
202 caffe_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
203 1., top_diff + top[0]->offset(n),
204 reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
205 1., bias_diff);
206 }
207 }
209 int weight_offset = M_ * K_;
210 int col_offset = K_ * N_;
211 int top_offset = M_ * N_;
212 CUDA_CHECK(cudaMemset(weight_diff, 0.,
213 sizeof(Dtype) * this->blobs_[0].count()));
214 for (int n = 0; n < NUM_; ++n) {
215 // since we saved memory in the forward pass by not storing all col data,
216 // we will need to recompute them.
217 im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
218 WIDTH_, KSIZE_, STRIDE_, col_data);
219 // gradient w.r.t. weight. Note that we will accumulate diffs.
220 for (int g = 0; g < GROUP_; ++g) {
221 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
222 (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
223 col_data + col_offset * g, (Dtype)1.,
224 weight_diff + weight_offset * g);
225 }
226 // gradient w.r.t. bottom data, if necessary
227 if (propagate_down) {
228 for (int g = 0; g < GROUP_; ++g) {
229 caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
230 (Dtype)1., weight + weight_offset * g,
231 top_diff + top[0]->offset(n) + top_offset * g,
232 (Dtype)0., col_diff + col_offset * g);
233 }
234 // col2im back to the data
235 col2im_gpu(col_diff, CHANNELS_, HEIGHT_,
236 WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
237 }
238 }
239 return Dtype(0.);
240 }
242 INSTANTIATE_CLASS(ConvolutionLayer);
244 } // namespace caffe