src/caffe/test/test_gradient_check_util.hpp

   1 // Copyright 2013 Yangqing Jia
   2
   3 #ifndef CAFFE_TEST_GRADIENT_CHECK_UTIL_H_
   4 #define CAFFE_TEST_GRADIENT_CHECK_UTIL_H_
   5
   6 #include <glog/logging.h>
   7 #include <gtest/gtest.h>
   8
   9 #include <algorithm>
  10 #include <cmath>
  11 #include <vector>
  12
  13 #include "caffe/layer.hpp"
  14
  15 using std::max;
  16
  17 namespace caffe {
  18
  19 // The gradient checker adds a L2 normalization loss function on top of the
  20 // top blobs, and checks the gradient.
  21 template <typename Dtype>
  22 class GradientChecker {
  23  public:
  24   GradientChecker(const Dtype stepsize, const Dtype threshold,
  25       const unsigned int seed = 1701, const Dtype kink = 0.,
  26       const Dtype kink_range = -1)
  27       : stepsize_(stepsize), threshold_(threshold), seed_(seed),
  28         kink_(kink), kink_range_(kink_range) {}
  29   // Checks the gradient of a layer, with provided bottom layers and top
  30   // layers. The gradient checker will check the gradient with respect to
  31   // the parameters of the layer, as well as the input blobs if check_through
  32   // is set True.
  33   // Note that after the gradient check, we do not guarantee that the data
  34   // stored in the layer parameters and the blobs.
  35   void CheckGradient(Layer<Dtype>& layer, vector<Blob<Dtype>*>& bottom,
  36       vector<Blob<Dtype>*>& top, int check_bottom = -1) {
  37       layer.SetUp(bottom, &top);
  38       CheckGradientSingle(layer, bottom, top, check_bottom, -1, -1);
  39   }
  40   void CheckGradientExhaustive(Layer<Dtype>& layer,
  41       vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
  42       int check_bottom = -1);
  43
  44   void CheckGradientSingle(Layer<Dtype>& layer, vector<Blob<Dtype>*>& bottom,
  45       vector<Blob<Dtype>*>& top, int check_bottom, int top_id,
  46       int top_data_id);
  47
  48  protected:
  49   Dtype GetObjAndGradient(vector<Blob<Dtype>*>& top, int top_id = -1,
  50       int top_data_id = -1);
  51   Dtype stepsize_;
  52   Dtype threshold_;
  53   unsigned int seed_;
  54   Dtype kink_;
  55   Dtype kink_range_;
  56 };
  57
  58
  59 // Detailed implementations are as follows.
  60
  61
  62 template <typename Dtype>
  63 void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>& layer,
  64     vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
  65     int check_bottom, int top_id, int top_data_id) {
  66   // First, figure out what blobs we need to check against.
  67   vector<Blob<Dtype>*> blobs_to_check;
  68   for (int i = 0; i < layer.params().size(); ++i) {
  69     blobs_to_check.push_back(layer.params()[i].get());
  70   }
  71   if (check_bottom < 0) {
  72     for (int i = 0; i < bottom.size(); ++i) {
  73       blobs_to_check.push_back(bottom[i]);
  74     }
  75   } else {
  76     CHECK(check_bottom < bottom.size());
  77     blobs_to_check.push_back(bottom[check_bottom]);
  78   }
  79   // go through the bottom and parameter blobs
  80   // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
  81   for (int blobid = 0; blobid < blobs_to_check.size(); ++blobid) {
  82     Blob<Dtype>* current_blob = blobs_to_check[blobid];
  83     // LOG(ERROR) << "Blob " << blobid << ": checking " << current_blob->count()
  84     //     << " parameters.";
  85     // go through the values
  86     for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
  87       // First, obtain the original data
  88       Caffe::set_random_seed(seed_);
  89       layer.Forward(bottom, &top);
  90       Dtype computed_objective = GetObjAndGradient(top, top_id, top_data_id);
  91       // Get any additional loss from the layer
  92       computed_objective += layer.Backward(top, true, &bottom);
  93       Dtype computed_gradient = current_blob->cpu_diff()[feat_id];
  94       // compute score by adding stepsize
  95       current_blob->mutable_cpu_data()[feat_id] += stepsize_;
  96       Caffe::set_random_seed(seed_);
  97       layer.Forward(bottom, &top);
  98       Dtype positive_objective = GetObjAndGradient(top, top_id, top_data_id);
  99       positive_objective += layer.Backward(top, true, &bottom);
 100       // compute score by subtracting stepsize
 101       current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
 102       Caffe::set_random_seed(seed_);
 103       layer.Forward(bottom, &top);
 104       Dtype negative_objective = GetObjAndGradient(top, top_id, top_data_id);
 105       negative_objective += layer.Backward(top, true, &bottom);
 106       // Recover stepsize
 107       current_blob->mutable_cpu_data()[feat_id] += stepsize_;
 108       Dtype estimated_gradient = (positive_objective - negative_objective) /
 109           stepsize_ / 2.;
 110       Dtype feature = current_blob->cpu_data()[feat_id];
 111       // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
 112       //     << current_blob->cpu_diff()[feat_id];
 113       if (kink_ - kink_range_ > feature || feature > kink_ + kink_range_) {
 114         // We check relative accuracy, but for too small values, we threshold
 115         // the scale factor by 1.
 116         Dtype scale = max(
 117             max(fabs(computed_gradient), fabs(estimated_gradient)), 1.);
 118         EXPECT_GT(computed_gradient, estimated_gradient - threshold_ * scale)
 119           << "debug: (top_id, top_data_id, blob_id, feat_id)="
 120           << top_id << "," << top_data_id << "," << blobid << "," << feat_id;
 121         EXPECT_LT(computed_gradient, estimated_gradient + threshold_ * scale)
 122           << "debug: (top_id, top_data_id, blob_id, feat_id)="
 123           << top_id << "," << top_data_id << "," << blobid << "," << feat_id;
 124       }
 125       // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
 126       // LOG(ERROR) << "computed gradient: " << computed_gradient
 127       //     << " estimated_gradient: " << estimated_gradient;
 128     }
 129   }
 130 }
 131
 132 template <typename Dtype>
 133 void GradientChecker<Dtype>::CheckGradientExhaustive(Layer<Dtype>& layer,
 134     vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
 135     int check_bottom) {
 136   layer.SetUp(bottom, &top);
 137   // LOG(ERROR) << "Exhaustive Mode.";
 138   for (int i = 0; i < top.size(); ++i) {
 139     // LOG(ERROR) << "Exhaustive: blob " << i << " size " << top[i]->count();
 140     for (int j = 0; j < top[i]->count(); ++j) {
 141       // LOG(ERROR) << "Exhaustive: blob " << i << " data " << j;
 142       CheckGradientSingle(layer, bottom, top, check_bottom, i, j);
 143     }
 144   }
 145 }
 146
 147 template <typename Dtype>
 148 Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>& top,
 149     int top_id, int top_data_id) {
 150   Dtype loss = 0;
 151   if (top_id < 0) {
 152     // the loss will be half of the sum of squares of all outputs
 153     for (int i = 0; i < top.size(); ++i) {
 154       Blob<Dtype>* top_blob = top[i];
 155       const Dtype* top_blob_data = top_blob->cpu_data();
 156       Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
 157       int count = top_blob->count();
 158       for (int j = 0; j < count; ++j) {
 159         loss += top_blob_data[j] * top_blob_data[j];
 160       }
 161       // set the diff: simply the data.
 162       memcpy(top_blob_diff, top_blob_data, sizeof(Dtype) * top_blob->count());
 163     }
 164     loss /= 2.;
 165   } else {
 166     // the loss will be the top_data_id-th element in the top_id-th blob.
 167     for (int i = 0; i < top.size(); ++i) {
 168       Blob<Dtype>* top_blob = top[i];
 169       Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
 170       memset(top_blob_diff, 0, sizeof(Dtype) * top_blob->count());
 171     }
 172     loss = top[top_id]->cpu_data()[top_data_id];
 173     top[top_id]->mutable_cpu_diff()[top_data_id] = 1.;
 174   }
 175   return loss;
 176 }
 177
 178 }  // namespace caffe
 179
 180 #endif  // CAFFE_TEST_GRADIENT_CHECK_UTIL_H_