summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (from parent 1: 166cea7)
raw | patch | inline | side by side (from parent 1: 166cea7)
author | Yangqing Jia <jiayq84@gmail.com> | |
Fri, 20 Sep 2013 23:39:25 +0000 (16:39 -0700) | ||
committer | Yangqing Jia <jiayq84@gmail.com> | |
Fri, 20 Sep 2013 23:39:25 +0000 (16:39 -0700) |
src/caffeine/blob.hpp | patch | blob | history | |
src/caffeine/layers/conv_layer.cpp | [new file with mode: 0644] | patch | blob |
src/caffeine/layers/inner_product_layer.cpp | patch | blob | history | |
src/caffeine/test/test_convolution_layer.cpp | [new file with mode: 0644] | patch | blob |
src/caffeine/test/test_gradient_check_util.hpp | patch | blob | history | |
src/caffeine/vision_layers.hpp | patch | blob | history |
diff --git a/src/caffeine/blob.hpp b/src/caffeine/blob.hpp
index acef4845c3a35d8f88158704897822e55f77248b..1457ee1d9d5d0ec07e2f4ebe37303002492623fd 100644 (file)
--- a/src/caffeine/blob.hpp
+++ b/src/caffeine/blob.hpp
const int w) const {
return *(cpu_diff() + offset(n, c, h, w));
}
-
+
const Dtype* cpu_data() const;
const Dtype* gpu_data() const;
const Dtype* cpu_diff() const;
diff --git a/src/caffeine/layers/conv_layer.cpp b/src/caffeine/layers/conv_layer.cpp
--- /dev/null
@@ -0,0 +1,237 @@
+#include "caffeine/layer.hpp"
+#include "caffeine/vision_layers.hpp"
+#include "caffeine/util/im2col.hpp"
+#include "caffeine/filler.hpp"
+#include "caffeine/util/math_functions.hpp"
+
+namespace caffeine {
+
+template <typename Dtype>
+void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ CHECK_EQ(bottom.size(), 1) << "Im2col Layer takes a single blob as input.";
+ CHECK_EQ(top->size(), 1) << "Im2col Layer takes a single blob as output.";
+ KSIZE_ = this->layer_param_.kernelsize();
+ STRIDE_ = this->layer_param_.stride();
+ GROUP_ = this->layer_param_.group();
+ NUM_ = bottom[0]->num();
+ CHANNELS_ = bottom[0]->channels();
+ HEIGHT_ = bottom[0]->height();
+ WIDTH_ = bottom[0]->width();
+ NUM_OUTPUT_ = this->layer_param_.num_output();
+ CHECK_EQ(CHANNELS_ % GROUP_, 0);
+ // The im2col result buffer would only hold one image at a time to avoid
+ // overly large memory usage.
+ int height_out = (HEIGHT_ - KSIZE_) / STRIDE_ + 1;
+ int width_out = (WIDTH_ - KSIZE_) / STRIDE_ + 1;
+ col_buffer_.Reshape(1, CHANNELS_ * KSIZE_ * KSIZE_, height_out, width_out);
+ // Set the parameters
+ CHECK_EQ(NUM_OUTPUT_ % GROUP_, 0)
+ << "Number of output should be multiples of group.";
+ biasterm_ = this->layer_param_.biasterm();
+ // Figure out the dimensions for individual gemms.
+ M_ = NUM_OUTPUT_ / GROUP_;
+ K_ = CHANNELS_ * KSIZE_ * KSIZE_ / GROUP_;
+ N_ = height_out * width_out;
+ (*top)[0]->Reshape(bottom[0]->num(), NUM_OUTPUT_, height_out, width_out);
+ if (biasterm_) {
+ this->blobs_.resize(2);
+ } else {
+ this->blobs_.resize(1);
+ }
+ // Intialize the weight
+ this->blobs_[0].Reshape(1, 1, NUM_OUTPUT_, K_);
+ // fill the weights
+ shared_ptr<Filler<Dtype> > weight_filler(
+ GetFiller<Dtype>(this->layer_param_.weight_filler()));
+ weight_filler->Fill(&this->blobs_[0]);
+ // If necessary, intiialize and fill the bias term
+ if (biasterm_) {
+ this->blobs_[1].Reshape(1, 1, 1, NUM_OUTPUT_);
+ shared_ptr<Filler<Dtype> > bias_filler(
+ GetFiller<Dtype>(this->layer_param_.bias_filler()));
+ bias_filler->Fill(&this->blobs_[1]);
+ bias_multiplier_.reset(new SyncedMemory(N_ * sizeof(Dtype)));
+ Dtype* bias_multiplier_data = (Dtype*)bias_multiplier_->mutable_cpu_data();
+ for (int i = 0; i < N_; ++i) {
+ bias_multiplier_data[i] = 1.;
+ }
+ }
+};
+
+
+template <typename Dtype>
+void ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->cpu_data();
+ Dtype* top_data = (*top)[0]->mutable_cpu_data();
+ Dtype* col_data = col_buffer_.mutable_cpu_data();
+ const Dtype* weight = this->blobs_[0].cpu_data();
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ for (int n = 0; n < NUM_; ++n) {
+ // First, im2col
+ im2col_cpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, STRIDE_, col_data);
+ // Second, innerproduct with groups
+ for (int g = 0; g < GROUP_; ++g) {
+ caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+ (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+ (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
+ }
+ // third, add bias
+ if (biasterm_) {
+ caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+ N_, 1, (Dtype)1., this->blobs_[1].cpu_data(),
+ (Dtype*)bias_multiplier_->cpu_data(), (Dtype)1.,
+ top_data + (*top)[0]->offset(n));
+ }
+ }
+}
+
+template <typename Dtype>
+void ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top) {
+ const Dtype* bottom_data = bottom[0]->gpu_data();
+ Dtype* top_data = (*top)[0]->mutable_gpu_data();
+ Dtype* col_data = col_buffer_.mutable_gpu_data();
+ const Dtype* weight = this->blobs_[0].gpu_data();
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ for (int n = 0; n < NUM_; ++n) {
+ // First, im2col
+ im2col_gpu(bottom_data + bottom[0]->offset(n), CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, STRIDE_, col_data);
+ // Second, innerproduct with groups
+ for (int g = 0; g < GROUP_; ++g) {
+ caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, K_,
+ (Dtype)1., weight + weight_offset * g, col_data + col_offset * g,
+ (Dtype)0., top_data + (*top)[0]->offset(n) + top_offset * g);
+ }
+ // third, add bias
+ if (biasterm_) {
+ caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, NUM_OUTPUT_,
+ N_, 1, (Dtype)1., this->blobs_[1].gpu_data(),
+ (Dtype*)bias_multiplier_->gpu_data(), (Dtype)1.,
+ top_data + (*top)[0]->offset(n));
+ }
+ }
+}
+
+template <typename Dtype>
+Dtype ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ const Dtype* top_diff = top[0]->cpu_diff();
+ const Dtype* weight = this->blobs_[0].cpu_data();
+ Dtype* weight_diff = this->blobs_[0].mutable_cpu_diff();
+ const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
+ Dtype* col_data = col_buffer_.mutable_cpu_data();
+ Dtype* col_diff = col_buffer_.mutable_cpu_diff();
+ // bias gradient if necessary
+ Dtype* bias_diff = NULL;
+
+ if (biasterm_) {
+ bias_diff = this->blobs_[1].mutable_cpu_diff();
+ memset(bias_diff, 0., sizeof(Dtype) * this->blobs_[1].count());
+ for (int n = 0; n < NUM_; ++n) {
+ caffeine_cpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+ 1., top_diff + top[0]->offset(n),
+ (Dtype*)bias_multiplier_->cpu_data(), 1., bias_diff);
+ }
+ }
+
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ memset(weight_diff, 0., sizeof(Dtype) * this->blobs_[0].count());
+ for (int n = 0; n < NUM_; ++n) {
+ // since we saved memory in the forward pass by not storing all col data,
+ // we will need to recompute them.
+ im2col_cpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, STRIDE_, col_data);
+ // gradient w.r.t. weight. Note that we will accumulate diffs.
+ for (int g = 0; g < GROUP_; ++g) {
+ caffeine_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+ (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
+ col_data + col_offset * g, (Dtype)1.,
+ weight_diff + weight_offset * g);
+ }
+ // gradient w.r.t. bottom data, if necessary
+ if (propagate_down) {
+ for (int g = 0; g < GROUP_; ++g) {
+ caffeine_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+ (Dtype)1., weight + weight_offset * g,
+ top_diff + top[0]->offset(n) + top_offset * g,
+ (Dtype)0., col_diff + col_offset * g);
+ }
+ // col2im back to the data
+ col2im_cpu(col_diff, CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
+ }
+ }
+ return Dtype(0.);
+}
+
+template <typename Dtype>
+Dtype ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
+ const Dtype* top_diff = top[0]->gpu_diff();
+ const Dtype* weight = this->blobs_[0].gpu_data();
+ Dtype* weight_diff = this->blobs_[0].mutable_gpu_diff();
+ const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+ Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
+ Dtype* col_data = col_buffer_.mutable_gpu_data();
+ Dtype* col_diff = col_buffer_.mutable_gpu_diff();
+ // bias gradient if necessary
+ Dtype* bias_diff = NULL;
+
+ if (biasterm_) {
+ bias_diff = this->blobs_[1].mutable_gpu_diff();
+ CUDA_CHECK(cudaMemset(bias_diff, 0.,
+ sizeof(Dtype) * this->blobs_[1].count()));
+ for (int n = 0; n < NUM_; ++n) {
+ caffeine_gpu_gemv<Dtype>(CblasNoTrans, NUM_OUTPUT_, N_,
+ 1., top_diff + top[0]->offset(n),
+ (Dtype*)bias_multiplier_->gpu_data(), 1., bias_diff);
+ }
+ }
+
+ int weight_offset = M_ * K_;
+ int col_offset = K_ * N_;
+ int top_offset = M_ * N_;
+ CUDA_CHECK(cudaMemset(weight_diff, 0.,
+ sizeof(Dtype) * this->blobs_[0].count()));
+ for (int n = 0; n < NUM_; ++n) {
+ // since we saved memory in the forward pass by not storing all col data,
+ // we will need to recompute them.
+ im2col_gpu(bottom_data + (*bottom)[0]->offset(n), CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, STRIDE_, col_data);
+ // gradient w.r.t. weight. Note that we will accumulate diffs.
+ for (int g = 0; g < GROUP_; ++g) {
+ caffeine_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+ (Dtype)1., top_diff + top[0]->offset(n) + top_offset * g,
+ col_data + col_offset * g, (Dtype)1.,
+ weight_diff + weight_offset * g);
+ }
+ // gradient w.r.t. bottom data, if necessary
+ if (propagate_down) {
+ for (int g = 0; g < GROUP_; ++g) {
+ caffeine_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+ (Dtype)1., weight + weight_offset * g,
+ top_diff + top[0]->offset(n) + top_offset * g,
+ (Dtype)0., col_diff + col_offset * g);
+ }
+ // col2im back to the data
+ col2im_gpu(col_diff, CHANNELS_, HEIGHT_,
+ WIDTH_, KSIZE_, STRIDE_, bottom_diff + (*bottom)[0]->offset(n));
+ }
+ }
+ return Dtype(0.);
+}
+
+INSTANTIATE_CLASS(ConvolutionLayer);
+
+} // namespace caffeine
diff --git a/src/caffeine/layers/inner_product_layer.cpp b/src/caffeine/layers/inner_product_layer.cpp
index a16439c2d5d4c5ba3f5a998c5f2de318e9e24562..2f7af89fa1e38aed6ee2f82c4732dd9d17c5a2ed 100644 (file)
this->blobs_.resize(1);
}
// Intialize the weight
- this->blobs_[0].Reshape(1, 1, K_, N_);
+ this->blobs_[0].Reshape(1, 1, N_, K_);
// fill the weights
shared_ptr<Filler<Dtype> > weight_filler(
GetFiller<Dtype>(this->layer_param_.weight_filler()));
diff --git a/src/caffeine/test/test_convolution_layer.cpp b/src/caffeine/test/test_convolution_layer.cpp
--- /dev/null
@@ -0,0 +1,163 @@
+#include <cstring>
+#include <cuda_runtime.h>
+
+#include "gtest/gtest.h"
+#include "caffeine/blob.hpp"
+#include "caffeine/common.hpp"
+#include "caffeine/filler.hpp"
+#include "caffeine/vision_layers.hpp"
+#include "caffeine/test/test_gradient_check_util.hpp"
+
+#include "caffeine/test/test_caffeine_main.hpp"
+
+namespace caffeine {
+
+extern cudaDeviceProp CAFFEINE_TEST_CUDA_PROP;
+
+template <typename Dtype>
+class ConvolutionLayerTest : public ::testing::Test {
+ protected:
+ ConvolutionLayerTest()
+ : blob_bottom_(new Blob<Dtype>()),
+ blob_top_(new Blob<Dtype>()) {};
+ virtual void SetUp() {
+ blob_bottom_->Reshape(2, 3, 6, 5);
+ // fill the values
+ FillerParameter filler_param;
+ filler_param.set_value(1.);
+ GaussianFiller<Dtype> filler(filler_param);
+ filler.Fill(this->blob_bottom_);
+ blob_bottom_vec_.push_back(blob_bottom_);
+ blob_top_vec_.push_back(blob_top_);
+ };
+
+ virtual ~ConvolutionLayerTest() { delete blob_bottom_; delete blob_top_; }
+ Blob<Dtype>* const blob_bottom_;
+ Blob<Dtype>* const blob_top_;
+ vector<Blob<Dtype>*> blob_bottom_vec_;
+ vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+typedef ::testing::Types<float, double> Dtypes;
+TYPED_TEST_CASE(ConvolutionLayerTest, Dtypes);
+
+TYPED_TEST(ConvolutionLayerTest, TestSetup) {
+ LayerParameter layer_param;
+ layer_param.set_kernelsize(3);
+ layer_param.set_stride(2);
+ layer_param.set_num_output(4);
+ shared_ptr<Layer<TypeParam> > layer(
+ new ConvolutionLayer<TypeParam>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ EXPECT_EQ(this->blob_top_->num(), 2);
+ EXPECT_EQ(this->blob_top_->channels(), 4);
+ EXPECT_EQ(this->blob_top_->height(), 2);
+ EXPECT_EQ(this->blob_top_->width(), 2);
+ // setting group should not change the shape
+ layer_param.set_num_output(3);
+ layer_param.set_group(3);
+ layer.reset(new ConvolutionLayer<TypeParam>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ EXPECT_EQ(this->blob_top_->num(), 2);
+ EXPECT_EQ(this->blob_top_->channels(), 3);
+ EXPECT_EQ(this->blob_top_->height(), 2);
+ EXPECT_EQ(this->blob_top_->width(), 2);
+}
+
+TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolution) {
+ // We will simply see if the convolution layer carries out averaging well.
+ FillerParameter filler_param;
+ filler_param.set_value(1.);
+ ConstantFiller<TypeParam> filler(filler_param);
+ filler.Fill(this->blob_bottom_);
+ LayerParameter layer_param;
+ layer_param.set_kernelsize(3);
+ layer_param.set_stride(2);
+ layer_param.set_num_output(4);
+ layer_param.mutable_weight_filler()->set_type("constant");
+ layer_param.mutable_weight_filler()->set_value(1);
+ layer_param.mutable_bias_filler()->set_type("constant");
+ layer_param.mutable_bias_filler()->set_value(0.1);
+ shared_ptr<Layer<TypeParam> > layer(
+ new ConvolutionLayer<TypeParam>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ Caffeine::set_mode(Caffeine::CPU);
+ layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ // After the convolution, the output should all have output values 27.1
+ const TypeParam* top_data = this->blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_top_->count(); ++i) {
+ EXPECT_GE(top_data[i], 27.1 - 1e-4);
+ EXPECT_LE(top_data[i], 27.1 + 1e-4);
+ }
+ // Test GPU
+ Caffeine::set_mode(Caffeine::GPU);
+ layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ // After the convolution, the output should all have output values 27.1
+ top_data = this->blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_top_->count(); ++i) {
+ EXPECT_GE(top_data[i], 27.1 - 1e-4);
+ EXPECT_LE(top_data[i], 27.1 + 1e-4);
+ }
+}
+
+TYPED_TEST(ConvolutionLayerTest, TestSimpleConvolutionGroup) {
+ // We will simply see if the convolution layer carries out averaging well.
+ FillerParameter filler_param;
+ filler_param.set_value(1.);
+ ConstantFiller<TypeParam> filler(filler_param);
+ filler.Fill(this->blob_bottom_);
+ LayerParameter layer_param;
+ layer_param.set_kernelsize(3);
+ layer_param.set_stride(2);
+ layer_param.set_num_output(3);
+ layer_param.set_group(3);
+ layer_param.mutable_weight_filler()->set_type("constant");
+ layer_param.mutable_weight_filler()->set_value(1);
+ layer_param.mutable_bias_filler()->set_type("constant");
+ layer_param.mutable_bias_filler()->set_value(0.1);
+ shared_ptr<Layer<TypeParam> > layer(
+ new ConvolutionLayer<TypeParam>(layer_param));
+ layer->SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ Caffeine::set_mode(Caffeine::CPU);
+ layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ // After the convolution, the output should all have output values 9.1
+ const TypeParam* top_data = this->blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_top_->count(); ++i) {
+ EXPECT_GE(top_data[i], 9.1 - 1e-4);
+ EXPECT_LE(top_data[i], 9.1 + 1e-4);
+ }
+ // Test GPU
+ Caffeine::set_mode(Caffeine::GPU);
+ layer->Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+ // After the convolution, the output should all have output values 9.1
+ top_data = this->blob_top_->cpu_data();
+ for (int i = 0; i < this->blob_top_->count(); ++i) {
+ EXPECT_GE(top_data[i], 9.1 - 1e-4);
+ EXPECT_LE(top_data[i], 9.1 + 1e-4);
+ }
+}
+
+
+TYPED_TEST(ConvolutionLayerTest, TestCPUGradient) {
+ LayerParameter layer_param;
+ layer_param.set_kernelsize(3);
+ layer_param.set_stride(2);
+ layer_param.set_num_output(2);
+ Caffeine::set_mode(Caffeine::CPU);
+ ConvolutionLayer<TypeParam> layer(layer_param);
+ GradientChecker<TypeParam> checker(1e-2, 1e-2);
+ checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+}
+
+TYPED_TEST(ConvolutionLayerTest, TestGPUGradient) {
+ LayerParameter layer_param;
+ layer_param.set_kernelsize(3);
+ layer_param.set_stride(2);
+ layer_param.set_num_output(2);
+ Caffeine::set_mode(Caffeine::GPU);
+ ConvolutionLayer<TypeParam> layer(layer_param);
+ GradientChecker<TypeParam> checker(1e-2, 1e-2);
+ checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+}
+
+}
diff --git a/src/caffeine/test/test_gradient_check_util.hpp b/src/caffeine/test/test_gradient_check_util.hpp
index 6b7733693ac0153c2ca31e37cb4ac6f3b76758fa..334a3ee8c01e13ba302c080c211d0c009b73fbb4 100644 (file)
blobs_to_check.push_back(bottom[check_bottom]);
}
// go through the bottom and parameter blobs
- //LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
+ LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
for (int blobid = 0; blobid < blobs_to_check.size(); ++blobid) {
Blob<Dtype>* current_blob = blobs_to_check[blobid];
- //LOG(ERROR) << "Blob " << blobid << ": checking " << current_blob->count()
- // << " parameters.";
+ LOG(ERROR) << "Blob " << blobid << ": checking " << current_blob->count()
+ << " parameters.";
// go through the values
for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
// First, obtain the original data
index f6df6f51eb44badcc633a984278aecca9aa18c6f..d5f939a210d4e4e97ab3237b9558d3ce3f39a7ad 100644 (file)
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
vector<Blob<Dtype>*>* top);
- //virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
- // vector<Blob<Dtype>*>* top);
+ virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+ vector<Blob<Dtype>*>* top);
virtual Dtype Backward_cpu(const vector<Blob<Dtype>*>& top,
const bool propagate_down, vector<Blob<Dtype>*>* bottom);
- //virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
- // const bool propagate_down, vector<Blob<Dtype>*>* bottom);
+ virtual Dtype Backward_gpu(const vector<Blob<Dtype>*>& top,
+ const bool propagate_down, vector<Blob<Dtype>*>* bottom);
Blob<Dtype> col_bob_;
+ protected:
+ int KSIZE_;
+ int STRIDE_;
+ int NUM_;
+ int CHANNELS_;
+ int HEIGHT_;
+ int WIDTH_;
+ int NUM_OUTPUT_;
+ int GROUP_;
+ Blob<Dtype> col_buffer_;
+ shared_ptr<SyncedMemory> bias_multiplier_;
+ bool biasterm_;
+ int M_;
+ int K_;
+ int N_;
};
} // namespace caffeine