1 #include "caffe/layer.hpp"
2 #include "caffe/vision_layers.hpp"
3 #include "caffe/util/im2col.hpp"
4 #include "caffe/filler.hpp"
5 #include "caffe/util/math_functions.hpp"
7 namespace caffe {
9 template <typename Dtype>
10 void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
11 vector<Blob<Dtype>*>* top) {
12 CHECK_EQ(bottom.size(), 1) << "Im2col Layer takes a single blob as input.";
13 CHECK_EQ(top->size(), 1) << "Im2col Layer takes a single blob as output.";
14 KSIZE_ = this->layer_param_.kernelsize();
15 STRIDE_ = this->layer_param_.stride();
16 GROUP_ = this->layer_param_.group();
17 NUM_ = bottom[0]->num();
18 CHANNELS_ = bottom[0]->channels();
19 HEIGHT_ = bottom[0]->height();
20 WIDTH_ = bottom[0]->width();
21 NUM_OUTPUT_ = this->layer_param_.num_output();
22 CHECK_EQ(CHANNELS_ % GROUP_, 0);
23 // The im2col result buffer would only hold one image at a time to avoid
24 // overly large memory usage.
25 int height_out = (HEIGHT_ - KSIZE_) / STRIDE_ + 1;
26 int width_out = (WIDTH_ - KSIZE_) / STRIDE_ + 1;
27 col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, height_out, width_out);
28 // Set the parameters
29 CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0)
30 << "Number of output should be multiples of group.";
31 biasterm_ = this->layer_param_.biasterm();
32 // Figure out the dimensions for individual gemms.
33 M_ = NUM_OUTPUT_ / GROUP_;
34 K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_;
35 N_ = height_out * width_out;
36 (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out);
37 if (biasterm_) {
38 this->blobs_.resize(2);
39 } else {
40 this->blobs_.resize(1);
41 }
42 // Intialize the weight
43 this->blobs_[0].Reshape(1, 1, NUM_OUTPUT_, K_);
44 // fill the weights
45 shared_ptr<Filler<Dtype> > weight_filler(
46 GetFiller<Dtype>(this->layer_param_.weight_filler()));
47 weight_filler->Fill(&this->blobs_[0]);
48 // If necessary, intiialize and fill the bias term
49 if (biasterm_) {
50 this->blobs_[1].Reshape(1, 1, 1, NUM_OUTPUT_);
51 shared_ptr<Filler<Dtype> > bias_filler(
52 GetFiller<Dtype>(this->layer_param_.bias_filler()));
53 bias_filler->Fill(&this->blobs_[1]);
54 bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
55 Dtype* bias_multiplier_data = (Dtype*)bias_multiplier_->mutable_cpu_data();
56 for (int i = 0; i < N_; ++i) {
57 bias_multiplier_data[i] = 1.;
58 }
59 }
60 };
63 template <typename Dtype>
64 void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
65 vector<Blob<Dtype>*>* top) {
66 const Dtype* bottom_data = bottom[0]->cpu_data();
67 Dtype* top_data = (*top)[0]->mutable_cpu_data();
68 Dtype* col_data = col_buffer_.mutable_cpu_data();
69 const Dtype* weight = this->blobs_[0].cpu_data();
70 int weight_offset = M_ * K_;
71 int col_offset = K_ * N_;
72 int top_offset = M_ * N_;
73 for (int n = 0; n < NUM_; ++n) {
74 // First, im2col
75 im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
76 WIDTH_, KSIZE_, STRIDE_, col_data);
77 // Second, innerproduct with groups
78 for (int g = 0; g < GROUP_; ++g) {
79 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
80 (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
81 (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
82 }
83 // third, add bias
84 if (biasterm_) {
85 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
86 N_, 1, (Dtype)1., this->blobs_[1].cpu_data(),
87 (Dtype*)bias_multiplier_->cpu_data(), (Dtype)1.,
88 top_data + (*top)[0]->offset(n));
89 }
90 }
91 }
93 template <typename Dtype>
94 void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
95 vector<Blob<Dtype>*>* top) {
96 const Dtype* bottom_data = bottom[0]->gpu_data();
97 Dtype* top_data = (*top)[0]->mutable_gpu_data();
98 Dtype* col_data = col_buffer_.mutable_gpu_data();
99 const Dtype* weight = this->blobs_[0].gpu_data();
100 int weight_offset = M_ * K_;
101 int col_offset = K_ * N_;
102 int top_offset = M_ * N_;
103 for (int n = 0; n < NUM_; ++n) {
104 // First, im2col
105 im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
106 WIDTH_, KSIZE_, STRIDE_, col_data);
107 // Second, innerproduct with groups
108 for (int g = 0; g < GROUP_; ++g) {
109 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
110 (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
111 (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
112 }
113 // third, add bias
114 if (biasterm_) {
115 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
116 N_, 1, (Dtype)1., this->blobs_[1].gpu_data(),
117 (Dtype*)bias_multiplier_->gpu_data(), (Dtype)1.,
118 top_data + (*top)[0]->offset(n));
119 }
120 }
121 }
123 template <typename Dtype>
124 Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
125 const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
126 const Dtype* top_diff = top[0]->cpu_diff();
127 const Dtype* weight = this->blobs_[0].cpu_data();
128 Dtype* weight_diff = this->blobs_[0].mutable_cpu_diff();
129 const Dtype* bottom_data = (*bottom)[0]->cpu_data();
130 Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
131 Dtype* col_data = col_buffer_.mutable_cpu_data();
132 Dtype* col_diff = col_buffer_.mutable_cpu_diff();
133 // bias gradient if necessary
134 Dtype* bias_diff = NULL;
136 if (biasterm_) {
137 bias_diff = this->blobs_[1].mutable_cpu_diff();
138 memset(bias_diff, 0., sizeof(Dtype) * this->blobs_[1].count());
139 for (int n = 0; n < NUM_; ++n) {
140 caffe_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
141 1., top_diff + top[0]->offset(n),
142 (Dtype*)bias_multiplier_->cpu_data(), 1., bias_diff);
143 }
144 }
146 int weight_offset = M_ * K_;
147 int col_offset = K_ * N_;
148 int top_offset = M_ * N_;
149 memset(weight_diff, 0., sizeof(Dtype) * this->blobs_[0].count());
150 for (int n = 0; n < NUM_; ++n) {
151 // since we saved memory in the forward pass by not storing all col data,
152 // we will need to recompute them.
153 im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
154 WIDTH_, KSIZE_, STRIDE_, col_data);
155 // gradient w.r.t. weight. Note that we will accumulate diffs.
156 for (int g = 0; g < GROUP_; ++g) {
157 caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
158 (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
159 col_data + col_offset * g, (Dtype)1.,
160 weight_diff + weight_offset * g);
161 }
162 // gradient w.r.t. bottom data, if necessary
163 if (propagate_down) {
164 for (int g = 0; g < GROUP_; ++g) {
165 caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
166 (Dtype)1., weight + weight_offset * g,
167 top_diff + top[0]->offset(n) + top_offset * g,
168 (Dtype)0., col_diff + col_offset * g);
169 }
170 // col2im back to the data
171 col2im_cpu(col_diff, CHANNELS_, HEIGHT_,
172 WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
173 }
174 }
175 return Dtype(0.);
176 }
178 template <typename Dtype>
179 Dtype ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
180 const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
181 const Dtype* top_diff = top[0]->gpu_diff();
182 const Dtype* weight = this->blobs_[0].gpu_data();
183 Dtype* weight_diff = this->blobs_[0].mutable_gpu_diff();
184 const Dtype* bottom_data = (*bottom)[0]->gpu_data();
185 Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
186 Dtype* col_data = col_buffer_.mutable_gpu_data();
187 Dtype* col_diff = col_buffer_.mutable_gpu_diff();
188 // bias gradient if necessary
189 Dtype* bias_diff = NULL;
191 if (biasterm_) {
192 bias_diff = this->blobs_[1].mutable_gpu_diff();
193 CUDA_CHECK(cudaMemset(bias_diff, 0.,
194 sizeof(Dtype) * this->blobs_[1].count()));
195 for (int n = 0; n < NUM_; ++n) {
196 caffe_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
197 1., top_diff + top[0]->offset(n),
198 (Dtype*)bias_multiplier_->gpu_data(), 1., bias_diff);
199 }
200 }
202 int weight_offset = M_ * K_;
203 int col_offset = K_ * N_;
204 int top_offset = M_ * N_;
205 CUDA_CHECK(cudaMemset(weight_diff, 0.,
206 sizeof(Dtype) * this->blobs_[0].count()));
207 for (int n = 0; n < NUM_; ++n) {
208 // since we saved memory in the forward pass by not storing all col data,
209 // we will need to recompute them.
210 im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
211 WIDTH_, KSIZE_, STRIDE_, col_data);
212 // gradient w.r.t. weight. Note that we will accumulate diffs.
213 for (int g = 0; g < GROUP_; ++g) {
214 caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
215 (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
216 col_data + col_offset * g, (Dtype)1.,
217 weight_diff + weight_offset * g);
218 }
219 // gradient w.r.t. bottom data, if necessary
220 if (propagate_down) {
221 for (int g = 0; g < GROUP_; ++g) {
222 caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
223 (Dtype)1., weight + weight_offset * g,
224 top_diff + top[0]->offset(n) + top_offset * g,
225 (Dtype)0., col_diff + col_offset * g);
226 }
227 // col2im back to the data
228 col2im_gpu(col_diff, CHANNELS_, HEIGHT_,
229 WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
230 }
231 }
232 return Dtype(0.);
233 }
235 INSTANTIATE_CLASS(ConvolutionLayer);
237 } // namespace caffe