1 // Copyright 2013 Yangqing Jia
3 #include <vector>
5 #include "caffe/layer.hpp"
6 #include "caffe/vision_layers.hpp"
7 #include "caffe/util/im2col.hpp"
8 #include "caffe/filler.hpp"
9 #include "caffe/util/math_functions.hpp"
11 namespace caffe {
13 template <typename Dtype>
14 void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
15 vector<Blob<Dtype>*>* top) {
16 CHECK_EQ(bottom.size(), 1) << "Conv Layer takes a single blob as input.";
17 CHECK_EQ(top->size(), 1) << "Conv Layer takes a single blob as output.";
18 KSIZE_ = this->layer_param_.kernelsize();
19 STRIDE_ = this->layer_param_.stride();
20 GROUP_ = this->layer_param_.group();
21 NUM_ = bottom[0]->num();
22 CHANNELS_ = bottom[0]->channels();
23 HEIGHT_ = bottom[0]->height();
24 WIDTH_ = bottom[0]->width();
25 NUM_OUTPUT_ = this->layer_param_.num_output();
26 CHECK_GT(NUM_OUTPUT_, 0);
27 CHECK_EQ(CHANNELS_ % GROUP_, 0);
28 // The im2col result buffer would only hold one image at a time to avoid
29 // overly large memory usage.
30 int height_out = (HEIGHT_ - KSIZE_) / STRIDE_ + 1;
31 int width_out = (WIDTH_ - KSIZE_) / STRIDE_ + 1;
32 col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, height_out, width_out);
33 // Set the parameters
34 CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0)
35 << "Number of output should be multiples of group.";
36 biasterm_ = this->layer_param_.biasterm();
37 // Figure out the dimensions for individual gemms.
38 M_ = NUM_OUTPUT_ / GROUP_;
39 K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_;
40 N_ = height_out * width_out;
41 (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out);
42 // Check if we need to set up the weights
43 if (this->blobs_.size() > 0) {
44 LOG(INFO) << "Skipping parameter initialization";
45 } else {
46 if (biasterm_) {
47 this->blobs_.resize(2);
48 } else {
49 this->blobs_.resize(1);
50 }
51 // Intialize the weight
52 this->blobs_[0].reset(
53 new Blob<Dtype>(NUM_OUTPUT_, CHANNELS_ / GROUP_, KSIZE_, KSIZE_));
54 // fill the weights
55 shared_ptr<Filler<Dtype> > weight_filler(
56 GetFiller<Dtype>(this->layer_param_.weight_filler()));
57 weight_filler->Fill(this->blobs_[0].get());
58 // If necessary, intiialize and fill the bias term
59 if (biasterm_) {
60 this->blobs_[1].reset(new Blob<Dtype>(1, 1, 1, NUM_OUTPUT_));
61 shared_ptr<Filler<Dtype> > bias_filler(
62 GetFiller<Dtype>(this->layer_param_.bias_filler()));
63 bias_filler->Fill(this->blobs_[1].get());
64 }
65 }
66 // Set up the bias filler
67 if (biasterm_) {
68 bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
69 Dtype* bias_multiplier_data =
70 reinterpret_cast<Dtype*>(bias_multiplier_->mutable_cpu_data());
71 for (int i = 0; i < N_; ++i) {
72 bias_multiplier_data[i] = 1.;
73 }
74 }
75 };
78 template <typename Dtype>
79 void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
80 vector<Blob<Dtype>*>* top) {
81 const Dtype* bottom_data = bottom[0]->cpu_data();
82 Dtype* top_data = (*top)[0]->mutable_cpu_data();
83 Dtype* col_data = col_buffer_.mutable_cpu_data();
84 const Dtype* weight = this->blobs_[0]->cpu_data();
85 int weight_offset = M_ * K_;
86 int col_offset = K_ * N_;
87 int top_offset = M_ * N_;
88 for (int n = 0; n < NUM_; ++n) {
89 // First, im2col
90 im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
91 WIDTH_, KSIZE_, STRIDE_, col_data);
92 // Second, innerproduct with groups
93 for (int g = 0; g < GROUP_; ++g) {
94 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
95 (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
96 (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
97 }
98 // third, add bias
99 if (biasterm_) {
100 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
101 N_, 1, (Dtype)1., this->blobs_[1]->cpu_data(),
102 reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()),
103 (Dtype)1., top_data + (*top)[0]->offset(n));
104 }
105 }
106 }
108 template <typename Dtype>
109 void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
110 vector<Blob<Dtype>*>* top) {
111 const Dtype* bottom_data = bottom[0]->gpu_data();
112 Dtype* top_data = (*top)[0]->mutable_gpu_data();
113 Dtype* col_data = col_buffer_.mutable_gpu_data();
114 const Dtype* weight = this->blobs_[0]->gpu_data();
115 int weight_offset = M_ * K_;
116 int col_offset = K_ * N_;
117 int top_offset = M_ * N_;
118 for (int n = 0; n < NUM_; ++n) {
119 // First, im2col
120 im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
121 WIDTH_, KSIZE_, STRIDE_, col_data);
122 // Second, innerproduct with groups
123 for (int g = 0; g < GROUP_; ++g) {
124 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
125 (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
126 (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
127 }
128 // third, add bias
129 if (biasterm_) {
130 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
131 N_, 1, (Dtype)1., this->blobs_[1]->gpu_data(),
132 reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
133 (Dtype)1., top_data + (*top)[0]->offset(n));
134 }
135 }
136 }
138 template <typename Dtype>
139 Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
140 const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
141 const Dtype* top_diff = top[0]->cpu_diff();
142 const Dtype* weight = this->blobs_[0]->cpu_data();
143 Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
144 const Dtype* bottom_data = (*bottom)[0]->cpu_data();
145 Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
146 Dtype* col_data = col_buffer_.mutable_cpu_data();
147 Dtype* col_diff = col_buffer_.mutable_cpu_diff();
148 // bias gradient if necessary
149 Dtype* bias_diff = NULL;
151 if (biasterm_) {
152 bias_diff = this->blobs_[1]->mutable_cpu_diff();
153 memset(bias_diff, 0, sizeof(Dtype) * this->blobs_[1]->count());
154 for (int n = 0; n < NUM_; ++n) {
155 caffe_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
156 1., top_diff + top[0]->offset(n),
157 reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), 1.,
158 bias_diff);
159 }
160 }
162 int weight_offset = M_ * K_;
163 int col_offset = K_ * N_;
164 int top_offset = M_ * N_;
165 memset(weight_diff, 0, sizeof(Dtype) * this->blobs_[0]->count());
166 for (int n = 0; n < NUM_; ++n) {
167 // since we saved memory in the forward pass by not storing all col data,
168 // we will need to recompute them.
169 im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
170 WIDTH_, KSIZE_, STRIDE_, col_data);
171 // gradient w.r.t. weight. Note that we will accumulate diffs.
172 for (int g = 0; g < GROUP_; ++g) {
173 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
174 (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
175 col_data + col_offset * g, (Dtype)1.,
176 weight_diff + weight_offset * g);
177 }
178 // gradient w.r.t. bottom data, if necessary
179 if (propagate_down) {
180 for (int g = 0; g < GROUP_; ++g) {
181 caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
182 (Dtype)1., weight + weight_offset * g,
183 top_diff + top[0]->offset(n) + top_offset * g,
184 (Dtype)0., col_diff + col_offset * g);
185 }
186 // col2im back to the data
187 col2im_cpu(col_diff, CHANNELS_, HEIGHT_,
188 WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
189 }
190 }
191 return Dtype(0.);
192 }
194 template <typename Dtype>
195 Dtype ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
196 const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
197 const Dtype* top_diff = top[0]->gpu_diff();
198 const Dtype* weight = this->blobs_[0]->gpu_data();
199 Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
200 const Dtype* bottom_data = (*bottom)[0]->gpu_data();
201 Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
202 Dtype* col_data = col_buffer_.mutable_gpu_data();
203 Dtype* col_diff = col_buffer_.mutable_gpu_diff();
204 // bias gradient if necessary
205 Dtype* bias_diff = NULL;
207 if (biasterm_) {
208 bias_diff = this->blobs_[1]->mutable_gpu_diff();
209 CUDA_CHECK(cudaMemset(bias_diff, 0.,
210 sizeof(Dtype) * this->blobs_[1]->count()));
211 for (int n = 0; n < NUM_; ++n) {
212 caffe_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
213 1., top_diff + top[0]->offset(n),
214 reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
215 1., bias_diff);
216 }
217 }
219 int weight_offset = M_ * K_;
220 int col_offset = K_ * N_;
221 int top_offset = M_ * N_;
222 CUDA_CHECK(cudaMemset(weight_diff, 0.,
223 sizeof(Dtype) * this->blobs_[0]->count()));
224 for (int n = 0; n < NUM_; ++n) {
225 // since we saved memory in the forward pass by not storing all col data,
226 // we will need to recompute them.
227 im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
228 WIDTH_, KSIZE_, STRIDE_, col_data);
229 // gradient w.r.t. weight. Note that we will accumulate diffs.
230 for (int g = 0; g < GROUP_; ++g) {
231 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
232 (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
233 col_data + col_offset * g, (Dtype)1.,
234 weight_diff + weight_offset * g);
235 }
236 // gradient w.r.t. bottom data, if necessary
237 if (propagate_down) {
238 for (int g = 0; g < GROUP_; ++g) {
239 caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
240 (Dtype)1., weight + weight_offset * g,
241 top_diff + top[0]->offset(n) + top_offset * g,
242 (Dtype)0., col_diff + col_offset * g);
243 }
244 // col2im back to the data
245 col2im_gpu(col_diff, CHANNELS_, HEIGHT_,
246 WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
247 }
248 }
249 return Dtype(0.);
250 }
252 INSTANTIATE_CLASS(ConvolutionLayer);
254 } // namespace caffe