1 // Copyright 2013 Yangqing Jia
3 #ifndef CAFFE_TEST_GRADIENT_CHECK_UTIL_H_
4 #define CAFFE_TEST_GRADIENT_CHECK_UTIL_H_
6 #include <glog/logging.h>
7 #include <gtest/gtest.h>
9 #include <algorithm>
10 #include <cmath>
11 #include <vector>
13 #include "caffe/layer.hpp"
14 #include "caffe/net.hpp"
16 using std::max;
18 namespace caffe {
20 // The gradient checker adds a L2 normalization loss function on top of the
21 // top blobs, and checks the gradient.
22 template <typename Dtype>
23 class GradientChecker {
24 public:
25 GradientChecker(const Dtype stepsize, const Dtype threshold,
26 const unsigned int seed = 1701, const Dtype kink = 0.,
27 const Dtype kink_range = -1)
28 : stepsize_(stepsize), threshold_(threshold), seed_(seed),
29 kink_(kink), kink_range_(kink_range) {}
30 // Checks the gradient of a layer, with provided bottom layers and top
31 // layers.
32 // Note that after the gradient check, we do not guarantee that the data
33 // stored in the layer parameters and the blobs are unchanged.
34 void CheckGradient(Layer<Dtype>& layer, vector<Blob<Dtype>*>& bottom,
35 vector<Blob<Dtype>*>& top, int check_bottom = -1) {
36 layer.SetUp(bottom, &top);
37 CheckGradientSingle(layer, bottom, top, check_bottom, -1, -1);
38 }
39 void CheckGradientExhaustive(Layer<Dtype>& layer,
40 vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
41 int check_bottom = -1);
43 void CheckGradientSingle(Layer<Dtype>& layer, vector<Blob<Dtype>*>& bottom,
44 vector<Blob<Dtype>*>& top, int check_bottom, int top_id,
45 int top_data_id);
47 // Checks the gradient of a network. This network should not have any data
48 // layers or loss layers, since the function does not explicitly deal with
49 // such cases yet. All input blobs and parameter blobs are going to be
50 // checked, layer-by-layer to avoid numerical problems to accumulate.
51 void CheckGradientNet(Net<Dtype>& net, vector<Blob<Dtype>*>& input);
53 protected:
54 Dtype GetObjAndGradient(vector<Blob<Dtype>*>& top, int top_id = -1,
55 int top_data_id = -1);
56 Dtype stepsize_;
57 Dtype threshold_;
58 unsigned int seed_;
59 Dtype kink_;
60 Dtype kink_range_;
61 };
64 // Detailed implementations are as follows.
67 template <typename Dtype>
68 void GradientChecker<Dtype>::CheckGradientSingle(Layer<Dtype>& layer,
69 vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
70 int check_bottom, int top_id, int top_data_id) {
71 // First, figure out what blobs we need to check against.
72 vector<Blob<Dtype>*> blobs_to_check;
73 for (int i = 0; i < layer.blobs().size(); ++i) {
74 blobs_to_check.push_back(layer.blobs()[i].get());
75 }
76 if (check_bottom < 0) {
77 for (int i = 0; i < bottom.size(); ++i) {
78 blobs_to_check.push_back(bottom[i]);
79 }
80 } else {
81 CHECK(check_bottom < bottom.size());
82 blobs_to_check.push_back(bottom[check_bottom]);
83 }
84 // go through the bottom and parameter blobs
85 // LOG(ERROR) << "Checking " << blobs_to_check.size() << " blobs.";
86 for (int blobid = 0; blobid < blobs_to_check.size(); ++blobid) {
87 Blob<Dtype>* current_blob = blobs_to_check[blobid];
88 // LOG(ERROR) << "Blob " << blobid << ": checking " << current_blob->count()
89 // << " parameters.";
90 // go through the values
91 for (int feat_id = 0; feat_id < current_blob->count(); ++feat_id) {
92 // First, obtain the original data
93 Caffe::set_random_seed(seed_);
94 layer.Forward(bottom, &top);
95 Dtype computed_objective = GetObjAndGradient(top, top_id, top_data_id);
96 // Get any additional loss from the layer
97 computed_objective += layer.Backward(top, true, &bottom);
98 Dtype computed_gradient = current_blob->cpu_diff()[feat_id];
99 // compute score by adding stepsize
100 current_blob->mutable_cpu_data()[feat_id] += stepsize_;
101 Caffe::set_random_seed(seed_);
102 layer.Forward(bottom, &top);
103 Dtype positive_objective = GetObjAndGradient(top, top_id, top_data_id);
104 positive_objective += layer.Backward(top, true, &bottom);
105 // compute score by subtracting stepsize
106 current_blob->mutable_cpu_data()[feat_id] -= stepsize_ * 2;
107 Caffe::set_random_seed(seed_);
108 layer.Forward(bottom, &top);
109 Dtype negative_objective = GetObjAndGradient(top, top_id, top_data_id);
110 negative_objective += layer.Backward(top, true, &bottom);
111 // Recover stepsize
112 current_blob->mutable_cpu_data()[feat_id] += stepsize_;
113 Dtype estimated_gradient = (positive_objective - negative_objective) /
114 stepsize_ / 2.;
115 Dtype feature = current_blob->cpu_data()[feat_id];
116 // LOG(ERROR) << "debug: " << current_blob->cpu_data()[feat_id] << " "
117 // << current_blob->cpu_diff()[feat_id];
118 if (kink_ - kink_range_ > feature || feature > kink_ + kink_range_) {
119 // We check relative accuracy, but for too small values, we threshold
120 // the scale factor by 1.
121 Dtype scale = max(
122 max(fabs(computed_gradient), fabs(estimated_gradient)), 1.);
123 EXPECT_GT(computed_gradient, estimated_gradient - threshold_ * scale)
124 << "debug: (top_id, top_data_id, blob_id, feat_id)="
125 << top_id << "," << top_data_id << "," << blobid << "," << feat_id;
126 EXPECT_LT(computed_gradient, estimated_gradient + threshold_ * scale)
127 << "debug: (top_id, top_data_id, blob_id, feat_id)="
128 << top_id << "," << top_data_id << "," << blobid << "," << feat_id;
129 }
130 // LOG(ERROR) << "Feature: " << current_blob->cpu_data()[feat_id];
131 // LOG(ERROR) << "computed gradient: " << computed_gradient
132 // << " estimated_gradient: " << estimated_gradient;
133 }
134 }
135 }
137 template <typename Dtype>
138 void GradientChecker<Dtype>::CheckGradientExhaustive(Layer<Dtype>& layer,
139 vector<Blob<Dtype>*>& bottom, vector<Blob<Dtype>*>& top,
140 int check_bottom) {
141 layer.SetUp(bottom, &top);
142 // LOG(ERROR) << "Exhaustive Mode.";
143 for (int i = 0; i < top.size(); ++i) {
144 // LOG(ERROR) << "Exhaustive: blob " << i << " size " << top[i]->count();
145 for (int j = 0; j < top[i]->count(); ++j) {
146 // LOG(ERROR) << "Exhaustive: blob " << i << " data " << j;
147 CheckGradientSingle(layer, bottom, top, check_bottom, i, j);
148 }
149 }
150 }
152 template <typename Dtype>
153 void GradientChecker<Dtype>::CheckGradientNet(
154 Net<Dtype>& net, vector<Blob<Dtype>*>& input) {
155 const vector<shared_ptr<Layer<Dtype> > >& layers = net.layers();
156 vector<vector<Blob<Dtype>*> >& bottom_vecs = net.bottom_vecs();
157 vector<vector<Blob<Dtype>*> >& top_vecs = net.top_vecs();
158 for (int i = 0; i < layers.size(); ++i) {
159 net.Forward(input);
160 LOG(ERROR) << "Checking gradient for " << layers[i]->layer_param().name();
161 CheckGradientExhaustive(*(layers[i].get()), bottom_vecs[i], top_vecs[i]);
162 }
163 }
165 template <typename Dtype>
166 Dtype GradientChecker<Dtype>::GetObjAndGradient(vector<Blob<Dtype>*>& top,
167 int top_id, int top_data_id) {
168 Dtype loss = 0;
169 if (top_id < 0) {
170 // the loss will be half of the sum of squares of all outputs
171 for (int i = 0; i < top.size(); ++i) {
172 Blob<Dtype>* top_blob = top[i];
173 const Dtype* top_blob_data = top_blob->cpu_data();
174 Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
175 int count = top_blob->count();
176 for (int j = 0; j < count; ++j) {
177 loss += top_blob_data[j] * top_blob_data[j];
178 }
179 // set the diff: simply the data.
180 memcpy(top_blob_diff, top_blob_data, sizeof(Dtype) * top_blob->count());
181 }
182 loss /= 2.;
183 } else {
184 // the loss will be the top_data_id-th element in the top_id-th blob.
185 for (int i = 0; i < top.size(); ++i) {
186 Blob<Dtype>* top_blob = top[i];
187 Dtype* top_blob_diff = top_blob->mutable_cpu_diff();
188 memset(top_blob_diff, 0, sizeof(Dtype) * top_blob->count());
189 }
190 loss = top[top_id]->cpu_data()[top_data_id];
191 top[top_id]->mutable_cpu_diff()[top_data_id] = 1.;
192 }
193 return loss;
194 }
196 } // namespace caffe
198 #endif // CAFFE_TEST_GRADIENT_CHECK_UTIL_H_