convolution layer. I might have broken something in im2col.
authorYangqing Jia <jiayq84@gmail.com>
Fri, 20 Sep 2013 23:39:25 +0000 (16:39 -0700)
committerYangqing Jia <jiayq84@gmail.com>
Fri, 20 Sep 2013 23:39:25 +0000 (16:39 -0700)
src/caffeine/blob.hpp
src/caffeine/layers/conv_layer.cpp [new file with mode: 0644]
src/caffeine/layers/inner_product_layer.cpp
src/caffeine/test/test_convolution_layer.cpp [new file with mode: 0644]
src/caffeine/test/test_gradient_check_util.hpp
src/caffeine/vision_layers.hpp

index acef4845c3a35d8f88158704897822e55f77248b..1457ee1d9d5d0ec07e2f4ebe37303002492623fd 100644 (file)
@@ -38,7 +38,7 @@ class Blob {
       const int w) const {
     return *(cpu_diff() + offset(n, c, h, w));
   }
-  
+
   const Dtype* cpu_data() const;
   const Dtype* gpu_data() const;
   const Dtype* cpu_diff() const;
diff --git a/src/caffeine/layers/conv_layer.cpp b/src/caffeine/layers/conv_layer.cpp
new file mode 100644 (file)
index 0000000..8670d81
--- /dev/null
@@ -0,0 +1,237 @@
+#include "caffeine/layer.hpp"
+#include "caffeine/vision_layers.hpp"
+#include "caffeine/util/im2col.hpp"
+#include "caffeine/filler.hpp"
+#include "caffeine/util/math_functions.hpp"
+
+namespace caffeine {
+
+template <typename Dtype>
+void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  CHECK_EQ(bottom.size(), 1) << "Im2col Layer takes a single blob as input.";
+  CHECK_EQ(top->size(), 1) << "Im2col Layer takes a single blob as output.";
+  KSIZE_ = this->layer_param_.kernelsize();
+  STRIDE_ = this->layer_param_.stride();
+  GROUP_ = this->layer_param_.group();
+  NUM_ = bottom[0]->num();
+  CHANNELS_ = bottom[0]->channels();
+  HEIGHT_ = bottom[0]->height();
+  WIDTH_ = bottom[0]->width();
+  NUM_OUTPUT_ = this->layer_param_.num_output();
+  CHECK_EQ(CHANNELS_ % GROUP_, 0);
+  // The im2col result buffer would only hold one image at a time to avoid
+  // overly large memory usage.
+  int height_out = (HEIGHT_ - KSIZE_) / STRIDE_ + 1;
+  int width_out = (WIDTH_ - KSIZE_) / STRIDE_ + 1;
+  col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, height_out, width_out);
+  // Set the parameters
+  CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0)
+      << "Number of output should be multiples of group.";
+  biasterm_ = this->layer_param_.biasterm();
+  // Figure out the dimensions for individual gemms. 
+  M_ = NUM_OUTPUT_ / GROUP_;
+  K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_;
+  N_ = height_out * width_out;
+  (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out);
+  if (biasterm_) {
+    this->blobs_.resize(2);
+  } else {
+    this->blobs_.resize(1);
+  }
+  // Intialize the weight
+  this->blobs_[0].Reshape(1, 1, NUM_OUTPUT_, K_);
+  // fill the weights
+  shared_ptr<Filler<Dtype> > weight_filler(
+      GetFiller<Dtype>(this->layer_param_.weight_filler()));
+  weight_filler->Fill(&this->blobs_[0]);
+  // If necessary, intiialize and fill the bias term
+  if (biasterm_) {
+    this->blobs_[1].Reshape(1, 1, 1, NUM_OUTPUT_);
+    shared_ptr<Filler<Dtype> > bias_filler(
+        GetFiller<Dtype>(this->layer_param_.bias_filler()));
+    bias_filler->Fill(&this->blobs_[1]);
+    bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
+    Dtype* bias_multiplier_data = (Dtype*)bias_multiplier_->mutable_cpu_data();
+    for (int i = 0; i < N_; ++i) {
+        bias_multiplier_data[i] = 1.;
+    }
+  }
+};
+
+
+template <typename Dtype>
+void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->cpu_data();
+  Dtype* top_data = (*top)[0]->mutable_cpu_data();
+  Dtype* col_data = col_buffer_.mutable_cpu_data();
+  const Dtype* weight = this->blobs_[0].cpu_data();
+  int weight_offset = M_ * K_;
+  int col_offset = K_ * N_;
+  int top_offset = M_ * N_;
+  for (int n = 0; n < NUM_; ++n) {
+    // First, im2col
+    im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
+        WIDTH_, KSIZE_, STRIDE_, col_data);
+    // Second, innerproduct with groups
+    for (int g = 0; g < GROUP_; ++g) {
+      caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+        (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+        (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
+    }
+    // third, add bias
+    if (biasterm_) {
+      caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+          N_, 1, (Dtype)1., this->blobs_[1].cpu_data(),
+          (Dtype*)bias_multiplier_->cpu_data(), (Dtype)1.,
+          top_data + (*top)[0]->offset(n));
+    }
+  }
+}
+
+template <typename Dtype>
+void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top) {
+  const Dtype* bottom_data = bottom[0]->gpu_data();
+  Dtype* top_data = (*top)[0]->mutable_gpu_data();
+  Dtype* col_data = col_buffer_.mutable_gpu_data();
+  const Dtype* weight = this->blobs_[0].gpu_data();
+  int weight_offset = M_ * K_;
+  int col_offset = K_ * N_;
+  int top_offset = M_ * N_;
+  for (int n = 0; n < NUM_; ++n) {
+    // First, im2col
+    im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
+        WIDTH_, KSIZE_, STRIDE_, col_data);
+    // Second, innerproduct with groups
+    for (int g = 0; g < GROUP_; ++g) {
+      caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+        (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+        (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
+    }
+    // third, add bias
+    if (biasterm_) {
+      caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+          N_, 1, (Dtype)1., this->blobs_[1].gpu_data(),
+          (Dtype*)bias_multiplier_->gpu_data(), (Dtype)1.,
+          top_data + (*top)[0]->offset(n));
+    }
+  }
+}
+
+template <typename Dtype>
+Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const Dtype* top_diff = top[0]->cpu_diff();
+  const Dtype* weight = this->blobs_[0].cpu_data();
+  Dtype* weight_diff = this->blobs_[0].mutable_cpu_diff();
+  const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+  Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+  Dtype* col_data = col_buffer_.mutable_cpu_data();
+  Dtype* col_diff = col_buffer_.mutable_cpu_diff();
+  // bias gradient if necessary
+  Dtype* bias_diff = NULL;
+
+  if (biasterm_) {
+    bias_diff = this->blobs_[1].mutable_cpu_diff();
+    memset(bias_diff, 0., sizeof(Dtype) * this->blobs_[1].count());
+    for (int n = 0; n < NUM_; ++n) {
+      caffeine_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+        1., top_diff + top[0]->offset(n),
+        (Dtype*)bias_multiplier_->cpu_data(), 1., bias_diff);
+    }
+  }
+
+  int weight_offset = M_ * K_;
+  int col_offset = K_ * N_;
+  int top_offset = M_ * N_;
+  memset(weight_diff, 0., sizeof(Dtype) * this->blobs_[0].count());
+  for (int n = 0; n < NUM_; ++n) {
+    // since we saved memory in the forward pass by not storing all col data,
+    // we will need to recompute them.
+    im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
+        WIDTH_, KSIZE_, STRIDE_, col_data);
+    // gradient w.r.t. weight. Note that we will accumulate diffs.
+    for (int g = 0; g < GROUP_; ++g) {
+      caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+        (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
+        col_data + col_offset * g, (Dtype)1.,
+        weight_diff + weight_offset * g);
+    }
+    // gradient w.r.t. bottom data, if necessary
+    if (propagate_down) {
+      for (int g = 0; g < GROUP_; ++g) {
+        caffeine_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+          (Dtype)1., weight + weight_offset * g,
+          top_diff + top[0]->offset(n) + top_offset * g,
+          (Dtype)0., col_diff + col_offset * g);
+      }
+      // col2im back to the data
+      col2im_cpu(col_diff, CHANNELS_, HEIGHT_,
+        WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
+    }
+  }
+  return Dtype(0.);
+}
+
+template <typename Dtype>
+Dtype ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+  const Dtype* top_diff = top[0]->gpu_diff();
+  const Dtype* weight = this->blobs_[0].gpu_data();
+  Dtype* weight_diff = this->blobs_[0].mutable_gpu_diff();
+  const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+  Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+  Dtype* col_data = col_buffer_.mutable_gpu_data();
+  Dtype* col_diff = col_buffer_.mutable_gpu_diff();
+  // bias gradient if necessary
+  Dtype* bias_diff = NULL;
+
+  if (biasterm_) {
+    bias_diff = this->blobs_[1].mutable_gpu_diff();
+    CUDA_CHECK(cudaMemset(bias_diff, 0.,
+        sizeof(Dtype) * this->blobs_[1].count()));
+    for (int n = 0; n < NUM_; ++n) {
+      caffeine_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+        1., top_diff + top[0]->offset(n),
+        (Dtype*)bias_multiplier_->gpu_data(), 1., bias_diff);
+    }
+  }
+
+  int weight_offset = M_ * K_;
+  int col_offset = K_ * N_;
+  int top_offset = M_ * N_;
+  CUDA_CHECK(cudaMemset(weight_diff, 0.,
+      sizeof(Dtype) * this->blobs_[0].count()));
+  for (int n = 0; n < NUM_; ++n) {
+    // since we saved memory in the forward pass by not storing all col data,
+    // we will need to recompute them.
+    im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
+        WIDTH_, KSIZE_, STRIDE_, col_data);
+    // gradient w.r.t. weight. Note that we will accumulate diffs.
+    for (int g = 0; g < GROUP_; ++g) {
+      caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+        (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
+        col_data + col_offset * g, (Dtype)1.,
+        weight_diff + weight_offset * g);
+    }
+    // gradient w.r.t. bottom data, if necessary
+    if (propagate_down) {
+      for (int g = 0; g < GROUP_; ++g) {
+        caffeine_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+          (Dtype)1., weight + weight_offset * g,
+          top_diff + top[0]->offset(n) + top_offset * g,
+          (Dtype)0., col_diff + col_offset * g);
+      }
+      // col2im back to the data
+      col2im_gpu(col_diff, CHANNELS_, HEIGHT_,
+        WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
+    }
+  }
+  return Dtype(0.);
+}
+
+INSTANTIATE_CLASS(ConvolutionLayer);
+
+}  // namespace caffeine
index a16439c2d5d4c5ba3f5a998c5f2de318e9e24562..2f7af89fa1e38aed6ee2f82c4732dd9d17c5a2ed 100644 (file)
@@ -28,7 +28,7 @@ void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
     this->blobs_.resize(1);
   }
   // Intialize the weight
-  this->blobs_[0].Reshape(1, 1, K_, N_);
+  this->blobs_[0].Reshape(1, 1, N_, K_);
   // fill the weights
   shared_ptr<Filler<Dtype> > weight_filler(
       GetFiller<Dtype>(this->layer_param_.weight_filler()));
diff --git a/src/caffeine/test/test_convolution_layer.cpp b/src/caffeine/test/test_convolution_layer.cpp
new file mode 100644 (file)
index 0000000..15fcfa2
--- /dev/null
@@ -0,0 +1,163 @@
+#include <cstring>
+#include <cuda_runtime.h>
+
+#include "gtest/gtest.h"
+#include "caffeine/blob.hpp"
+#include "caffeine/common.hpp"
+#include "caffeine/filler.hpp"
+#include "caffeine/vision_layers.hpp"
+#include "caffeine/test/test_gradient_check_util.hpp"
+
+#include "caffeine/test/test_caffeine_main.hpp"
+
+namespace caffeine {
+extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+
+template <typename Dtype>
+class ConvolutionLayerTest : public ::testing::Test {
+ protected:
+  ConvolutionLayerTest()
+      : blob_bottom_(new Blob<Dtype>()),
+        blob_top_(new Blob<Dtype>()) {};
+  virtual void SetUp() {
+    blob_bottom_->Reshape(2, 3, 6, 5);
+    // fill the values
+    FillerParameter filler_param;
+    filler_param.set_value(1.);
+    GaussianFiller<Dtype> filler(filler_param);
+    filler.Fill(this->blob_bottom_);
+    blob_bottom_vec_.push_back(blob_bottom_);
+    blob_top_vec_.push_back(blob_top_);
+  };
+
+  virtual ~ConvolutionLayerTest() { delete blob_bottom_; delete blob_top_; }
+  Blob<Dtype>* const blob_bottom_;
+  Blob<Dtype>* const blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+typedef ::testing::Types<float, double> Dtypes;
+TYPED_TEST_CASE(ConvolutionLayerTest, Dtypes);
+
+TYPED_TEST(ConvolutionLayerTest, TestSetup) {
+  LayerParameter layer_param;
+  layer_param.set_kernelsize(3);
+  layer_param.set_stride(2);
+  layer_param.set_num_output(4);
+  shared_ptr<Layer<TypeParam> > layer(
+      new ConvolutionLayer<TypeParam>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 4);
+  EXPECT_EQ(this->blob_top_->height(), 2);
+  EXPECT_EQ(this->blob_top_->width(), 2);
+  // setting group should not change the shape
+  layer_param.set_num_output(3);
+  layer_param.set_group(3);
+  layer.reset(new ConvolutionLayer<TypeParam>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  EXPECT_EQ(this->blob_top_->num(), 2);
+  EXPECT_EQ(this->blob_top_->channels(), 3);
+  EXPECT_EQ(this->blob_top_->height(), 2);
+  EXPECT_EQ(this->blob_top_->width(), 2);
+}
+
+TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) {
+  // We will simply see if the convolution layer carries out averaging well.
+  FillerParameter filler_param;
+  filler_param.set_value(1.);
+  ConstantFiller<TypeParam> filler(filler_param);
+  filler.Fill(this->blob_bottom_);
+  LayerParameter layer_param;
+  layer_param.set_kernelsize(3);
+  layer_param.set_stride(2);
+  layer_param.set_num_output(4);
+  layer_param.mutable_weight_filler()->set_type("constant");
+  layer_param.mutable_weight_filler()->set_value(1);
+  layer_param.mutable_bias_filler()->set_type("constant");
+  layer_param.mutable_bias_filler()->set_value(0.1);
+  shared_ptr<Layer<TypeParam> > layer(
+      new ConvolutionLayer<TypeParam>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  Caffeine::set_mode(Caffeine::CPU);
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // After the convolution, the output should all have output values 27.1
+  const TypeParam* top_data = this->blob_top_->cpu_data();
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    EXPECT_GE(top_data[i], 27.1 - 1e-4);
+    EXPECT_LE(top_data[i], 27.1 + 1e-4);
+  }
+  // Test GPU
+  Caffeine::set_mode(Caffeine::GPU);
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // After the convolution, the output should all have output values 27.1
+  top_data = this->blob_top_->cpu_data();
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    EXPECT_GE(top_data[i], 27.1 - 1e-4);
+    EXPECT_LE(top_data[i], 27.1 + 1e-4);
+  }
+}
+
+TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolutionGroup) {
+  // We will simply see if the convolution layer carries out averaging well.
+  FillerParameter filler_param;
+  filler_param.set_value(1.);
+  ConstantFiller<TypeParam> filler(filler_param);
+  filler.Fill(this->blob_bottom_);
+  LayerParameter layer_param;
+  layer_param.set_kernelsize(3);
+  layer_param.set_stride(2);
+  layer_param.set_num_output(3);
+  layer_param.set_group(3);
+  layer_param.mutable_weight_filler()->set_type("constant");
+  layer_param.mutable_weight_filler()->set_value(1);
+  layer_param.mutable_bias_filler()->set_type("constant");
+  layer_param.mutable_bias_filler()->set_value(0.1);
+  shared_ptr<Layer<TypeParam> > layer(
+      new ConvolutionLayer<TypeParam>(layer_param));
+  layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  Caffeine::set_mode(Caffeine::CPU);
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // After the convolution, the output should all have output values 9.1
+  const TypeParam* top_data = this->blob_top_->cpu_data();
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    EXPECT_GE(top_data[i], 9.1 - 1e-4);
+    EXPECT_LE(top_data[i], 9.1 + 1e-4);
+  }
+  // Test GPU
+  Caffeine::set_mode(Caffeine::GPU);
+  layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  // After the convolution, the output should all have output values 9.1
+  top_data = this->blob_top_->cpu_data();
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    EXPECT_GE(top_data[i], 9.1 - 1e-4);
+    EXPECT_LE(top_data[i], 9.1 + 1e-4);
+  }
+}
+
+
+TYPED_TEST(ConvolutionLayerTest, TestCPUGradient) {
+  LayerParameter layer_param;
+  layer_param.set_kernelsize(3);
+  layer_param.set_stride(2);
+  layer_param.set_num_output(2);
+  Caffeine::set_mode(Caffeine::CPU);
+  ConvolutionLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-2);
+  checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+}
+
+TYPED_TEST(ConvolutionLayerTest, TestGPUGradient) {
+  LayerParameter layer_param;
+  layer_param.set_kernelsize(3);
+  layer_param.set_stride(2);
+  layer_param.set_num_output(2);
+  Caffeine::set_mode(Caffeine::GPU);
+  ConvolutionLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-2);
+  checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+}
+
+}
index 6b7733693ac0153c2ca31e37cb4ac6f3b76758fa..334a3ee8c01e13ba302c080c211d0c009b73fbb4 100644 (file)
@@ -71,11 +71,11 @@ void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>& layer,
     blobs_to_check.push_back(bottom[check_bottom]);
   }
   // go through the bottom and parameter blobs
-  //LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
+  LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
   for (int blobid = 0; blobid < blobs_to_check.size(); ++blobid) {
     Blob<Dtype>* current_blob = blobs_to_check[blobid];
-    //LOG(ERROR) << "Blob " << blobid << ": checking " << current_blob->count()
-    //    << " parameters.";
+    LOG(ERROR) << "Blob " << blobid << ": checking " << current_blob->count()
+        << " parameters.";
     // go through the values
     for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
       // First, obtain the original data
index f6df6f51eb44badcc633a984278aecca9aa18c6f..d5f939a210d4e4e97ab3237b9558d3ce3f39a7ad 100644 (file)
@@ -169,13 +169,28 @@ class ConvolutionLayer : public Layer<Dtype> {
  protected:
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       vector<Blob<Dtype>*>* top);
-  //virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
-  //    vector<Blob<Dtype>*>* top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      vector<Blob<Dtype>*>* top);
   virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top,
       const bool propagate_down, vector<Blob<Dtype>*>* bottom);
-  //virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
-  //    const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+  virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const bool propagate_down, vector<Blob<Dtype>*>* bottom);
   Blob<Dtype> col_bob_;
+ protected:
+  int KSIZE_;
+  int STRIDE_;
+  int NUM_;
+  int CHANNELS_;
+  int HEIGHT_;
+  int WIDTH_;
+  int NUM_OUTPUT_;
+  int GROUP_;
+  Blob<Dtype> col_buffer_;
+  shared_ptr<SyncedMemory> bias_multiplier_;
+  bool biasterm_;
+  int M_;
+  int K_;
+  int N_;
 };
 
 }  // namespace caffeine