src/caffeine/test/test_gradient_check_util.hpp

   1 #ifndef CAFFEINE_TEST_GRADIENT_CHECK_UTIL_H_
   2 #define CAFFEINE_TEST_GRADIENT_CHECK_UTIL_H_
   3
   4 #include <algorithm>
   5 #include <cmath>
   6 #include <glog/logging.h>
   7 #include <gtest/gtest.h>
   8 #include "caffeine/layer.hpp"
   9
  10 using std::max;
  11
  12 namespace caffeine {
  13
  14 // The gradient checker adds a L2 normalization loss function on top of the
  15 // top blobs, and checks the gradient.
  16 template <typename Dtype>
  17 class GradientChecker {
  18  public:
  19   GradientChecker(const Dtype stepsize, const Dtype threshold,
  20       const unsigned int seed = 1701, const Dtype kink = 0.,
  21       const Dtype kink_range = -1)
  22       : stepsize_(stepsize), threshold_(threshold), seed_(seed),
  23         kink_(kink), kink_range_(kink_range) {};
  24   // Checks the gradient of a layer, with provided bottom layers and top
  25   // layers. The gradient checker will check the gradient with respect to
  26   // the parameters of the layer, as well as the input blobs if check_through
  27   // is set True.
  28   // Note that after the gradient check, we do not guarantee that the data
  29   // stored in the layer parameters and the blobs.
  30   void CheckGradient(Layer<Dtype>& layer, vector<Blob<Dtype>*>& bottom,
  31       vector<Blob<Dtype>*>& top, int check_bottom = -1) {
  32       layer.SetUp(bottom, &top);
  33       CheckGradientSingle(layer, bottom, top, check_bottom, -1, -1);
  34   }
  35   void CheckGradientExhaustive(Layer<Dtype>& layer,
  36       vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
  37       int check_bottom = -1);
  38
  39   void CheckGradientSingle(Layer<Dtype>& layer, vector<Blob<Dtype>*>& bottom,
  40       vector<Blob<Dtype>*>& top, int check_bottom, int top_id,
  41       int top_data_id);
  42  protected:
  43   Dtype GetObjAndGradient(vector<Blob<Dtype>*>& top, int top_id = -1,
  44       int top_data_id = -1);
  45   Dtype stepsize_;
  46   Dtype threshold_;
  47   unsigned int seed_;
  48   Dtype kink_;
  49   Dtype kink_range_;
  50 };
  51
  52
  53 // Detailed implementations are as follows.
  54
  55
  56 template <typename Dtype>
  57 void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>& layer,
  58     vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
  59     int check_bottom, int top_id, int top_data_id) {
  60   // First, figure out what blobs we need to check against.
  61   vector<Blob<Dtype>*> blobs_to_check;
  62   for (int i = 0; i < layer.params().size(); ++i) {
  63     blobs_to_check.push_back(&layer.params()[i]);
  64   }
  65   if (check_bottom < 0) {
  66     for (int i = 0; i < bottom.size(); ++i) {
  67       blobs_to_check.push_back(bottom[i]);
  68     }
  69   } else {
  70     CHECK(check_bottom < bottom.size());
  71     blobs_to_check.push_back(bottom[check_bottom]);
  72   }
  73   // go through the bottom and parameter blobs
  74   //LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
  75   for (int blobid = 0; blobid < blobs_to_check.size(); ++blobid) {
  76     Blob<Dtype>* current_blob = blobs_to_check[blobid];
  77     //LOG(ERROR) << "Blob " << blobid << ": checking " << current_blob->count()
  78     //    << " parameters.";
  79     // go through the values
  80     for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
  81       // First, obtain the original data
  82       Caffeine::set_random_seed(seed_);
  83       layer.Forward(bottom, &top);
  84       Dtype computed_objective = GetObjAndGradient(top, top_id, top_data_id);
  85       // Get any additional loss from the layer
  86       computed_objective += layer.Backward(top, true, &bottom);
  87       Dtype computed_gradient = current_blob->cpu_diff()[feat_id];
  88       // compute score by adding stepsize
  89       current_blob->mutable_cpu_data()[feat_id] += stepsize_;
  90       Caffeine::set_random_seed(seed_);
  91       layer.Forward(bottom, &top);
  92       Dtype positive_objective = GetObjAndGradient(top, top_id, top_data_id);
  93       positive_objective += layer.Backward(top, true, &bottom);
  94       // compute score by subtracting stepsize
  95       current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
  96       Caffeine::set_random_seed(seed_);
  97       layer.Forward(bottom, &top);
  98       Dtype negative_objective = GetObjAndGradient(top, top_id, top_data_id);
  99       negative_objective += layer.Backward(top, true, &bottom);
 100       // Recover stepsize
 101       current_blob->mutable_cpu_data()[feat_id] += stepsize_;
 102       Dtype estimated_gradient = (positive_objective - negative_objective) /
 103           stepsize_ / 2.;
 104       Dtype feature = current_blob->cpu_data()[feat_id];
 105       //LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
 106       //    << current_blob->cpu_diff()[feat_id];
 107       if (kink_ - kink_range_ > feature || feature > kink_ + kink_range_) {
 108         // We check relative accuracy, but for too small values, we threshold
 109         // the scale factor by 1.
 110         Dtype scale = max(max(fabs(computed_gradient), fabs(estimated_gradient)),
 111             1.);
 112         EXPECT_GT(computed_gradient, estimated_gradient - threshold_ * scale);
 113         EXPECT_LT(computed_gradient, estimated_gradient + threshold_ * scale);
 114       }
 115       //LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
 116       //LOG(ERROR) << "computed gradient: " << computed_gradient
 117       //    << " estimated_gradient: " << estimated_gradient;
 118     }
 119   }
 120 }
 121
 122 template <typename Dtype>
 123 void GradientChecker<Dtype>::CheckGradientExhaustive(Layer<Dtype>& layer,
 124     vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top, int check_bottom) {
 125   layer.SetUp(bottom, &top);
 126   //LOG(ERROR) << "Exhaustive Mode.";
 127   for (int i = 0; i < top.size(); ++i) {
 128     //LOG(ERROR) << "Exhaustive: blob " << i << " size " << top[i]->count();
 129     for (int j = 0; j < top[i]->count(); ++j) {
 130       //LOG(ERROR) << "Exhaustive: blob " << i << " data " << j;
 131       CheckGradientSingle(layer, bottom, top, check_bottom, i, j);
 132     }
 133   }
 134 }
 135
 136 template <typename Dtype>
 137 Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>& top,
 138     int top_id, int top_data_id) {
 139   Dtype loss = 0;
 140   if (top_id < 0) {
 141     // the loss will be half of the sum of squares of all outputs
 142     for (int i = 0; i < top.size(); ++i) {
 143       Blob<Dtype>* top_blob = top[i];
 144       const Dtype* top_blob_data = top_blob->cpu_data();
 145       Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
 146       int count = top_blob->count();
 147       for (int j = 0; j < count; ++j) {
 148         loss += top_blob_data[j] * top_blob_data[j];
 149       }
 150       // set the diff: simply the data.
 151       memcpy(top_blob_diff, top_blob_data, sizeof(Dtype) * top_blob->count());
 152     }
 153     loss /= 2.;
 154   } else {
 155     // the loss will be the top_data_id-th element in the top_id-th blob.
 156     for (int i = 0; i < top.size(); ++i) {
 157       Blob<Dtype>* top_blob = top[i];
 158       Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
 159       memset(top_blob_diff, 0, sizeof(Dtype) * top_blob->count());
 160     }
 161     loss = top[top_id]->cpu_data()[top_data_id];
 162     top[top_id]->mutable_cpu_diff()[top_data_id] = 1.;
 163   }
 164   return loss;
 165 }
 166
 167 }  // namespace caffeine
 168
 169 #endif  // CAFFEINE_TEST_GRADIENT_CHECK_UTIL_H_