1 // Copyright Yangqing Jia 2013
3 #include <map>
4 #include <set>
5 #include <string>
6 #include <vector>
8 #include "caffe/proto/caffe.pb.h"
9 #include "caffe/layer.hpp"
10 #include "caffe/net.hpp"
12 using std::pair;
13 using std::map;
14 using std::set;
16 namespace caffe {
18 template <typename Dtype>
19 Net<Dtype>::Net(const NetParameter& param,
20 const vector<Blob<Dtype>* >& bottom) {
21 // Basically, build all the layers and set up its connections.
22 name_ = param.name();
23 map<string, int> blob_name_to_idx;
24 set<string> available_blobs;
25 int num_layers = param.layers_size();
26 CHECK_EQ(bottom.size(), param.input_size())
27 << "Incorrect bottom blob size.";
28 // set the input blobs
29 for (int i = 0; i < param.input_size(); ++i) {
30 const string& blob_name = param.input(i);
31 CHECK_GT(bottom[i]->count(), 0);
32 shared_ptr<Blob<Dtype> > blob_pointer(
33 new Blob<Dtype>(bottom[i]->num(), bottom[i]->channels(),
34 bottom[i]->height(), bottom[i]->width()));
35 blobs_.push_back(blob_pointer);
36 blob_names_.push_back(blob_name);
37 net_input_blob_indices_.push_back(i);
38 blob_name_to_idx[blob_name] = i;
39 available_blobs.insert(blob_name);
40 }
41 // For each layer, set up their input and output
42 bottom_vecs_.resize(param.layers_size());
43 top_vecs_.resize(param.layers_size());
44 bottom_id_vecs_.resize(param.layers_size());
45 top_id_vecs_.resize(param.layers_size());
46 for (int i = 0; i < param.layers_size(); ++i) {
47 const LayerConnection& layer_connection = param.layers(i);
48 const LayerParameter& layer_param = layer_connection.layer();
49 layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
50 layer_names_.push_back(layer_param.name());
51 LOG(INFO) << "Creating Layer " << layer_param.name();
52 // Figure out this layer's input and output
53 for (int j = 0; j < layer_connection.bottom_size(); ++j) {
54 const string& blob_name = layer_connection.bottom(j);
55 if (available_blobs.find(blob_name) == available_blobs.end()) {
56 LOG(FATAL) << "Unknown blob input " << blob_name <<
57 " to layer" << j;
58 }
59 LOG(INFO) << layer_param.name() << " <- " << blob_name;
60 bottom_vecs_[i].push_back(
61 blobs_[blob_name_to_idx[blob_name]].get());
62 bottom_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
63 available_blobs.erase(blob_name);
64 }
65 for (int j = 0; j < layer_connection.top_size(); ++j) {
66 const string& blob_name = layer_connection.top(j);
67 // Check if we are doing in-place computation
68 if (layer_connection.bottom_size() > j &&
69 blob_name == layer_connection.bottom(j)) {
70 // In-place computation
71 LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)";
72 available_blobs.insert(blob_name);
73 top_vecs_[i].push_back(
74 blobs_[blob_name_to_idx[blob_name]].get());
75 top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
76 } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
77 // If we are not doing in-place computation but has duplicated blobs,
78 // raise an error.
79 LOG(FATAL) << "Duplicate blobs produced by multiple sources.";
80 } else {
81 // Normal output.
82 LOG(INFO) << layer_param.name() << " -> " << blob_name;
83 shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
84 blobs_.push_back(blob_pointer);
85 blob_names_.push_back(blob_name);
86 blob_name_to_idx[blob_name] = blob_names_.size() - 1;
87 available_blobs.insert(blob_name);
88 top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
89 top_id_vecs_[i].push_back(blob_names_.size() - 1);
90 }
91 }
92 }
93 // In the end, all remaining blobs are considered output blobs.
94 for (set<string>::iterator it = available_blobs.begin();
95 it != available_blobs.end(); ++it) {
96 LOG(ERROR) << "This network produces output " << *it;
97 net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
98 net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
99 }
101 LOG(INFO) << "Setting up the layers.";
102 for (int i = 0; i < layers_.size(); ++i) {
103 LOG(INFO) << "Setting up " << layer_names_[i];
104 layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]);
105 vector<shared_ptr<Blob<Dtype> > >& layer_blobs = layers_[i]->blobs();
106 for (int j = 0; j < layer_blobs.size(); ++j) {
107 params_.push_back(layer_blobs[j]);
108 }
109 // push the learning rate mutlipliers
110 if (layers_[i]->layer_param().blobs_lr_size()) {
111 CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size());
112 for (int j = 0; j < layer_blobs.size(); ++j) {
113 float local_lr = layers_[i]->layer_param().blobs_lr(j);
114 CHECK_GT(local_lr, 0.);
115 params_lr_.push_back(local_lr);
116 }
117 } else {
118 for (int j = 0; j < layer_blobs.size(); ++j) {
119 params_lr_.push_back(1.);
120 }
121 }
122 // push the weight decay multipliers
123 if (layers_[i]->layer_param().weight_decay_size()) {
124 CHECK_EQ(layers_[i]->layer_param().weight_decay_size(),
125 layer_blobs.size());
126 for (int j = 0; j < layer_blobs.size(); ++j) {
127 float local_decay = layers_[i]->layer_param().weight_decay(j);
128 CHECK_GT(local_decay, 0.);
129 params_weight_decay_.push_back(local_decay);
130 }
131 } else {
132 for (int j = 0; j < layer_blobs.size(); ++j) {
133 params_weight_decay_.push_back(1.);
134 }
135 }
136 for (int topid = 0; topid < top_vecs_[i].size(); ++topid) {
137 LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " "
138 << top_vecs_[i][topid]->height() << " "
139 << top_vecs_[i][topid]->width();
140 }
141 }
142 LOG(INFO) << "Network initialization done.";
143 }
145 template <typename Dtype>
146 const vector<Blob<Dtype>*>& Net<Dtype>::Forward(
147 const vector<Blob<Dtype>*> & bottom) {
148 // Copy bottom to internal bottom
149 for (int i = 0; i < bottom.size(); ++i) {
150 blobs_[net_input_blob_indices_[i]]->CopyFrom(*bottom[i]);
151 }
152 for (int i = 0; i < layers_.size(); ++i) {
153 // LOG(ERROR) << "Forwarding " << layer_names_[i];
154 layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
155 }
156 return net_output_blobs_;
157 }
159 template <typename Dtype>
160 Dtype Net<Dtype>::Backward() {
161 Dtype loss = 0;
162 // TODO(Yangqing): figure out those layers that do not need backward.
163 for (int i = layers_.size() - 1; i >= 0; --i) {
164 Dtype layer_loss = layers_[i]->Backward(
165 top_vecs_[i], true, &bottom_vecs_[i]);
166 loss += layer_loss;
167 }
168 return loss;
169 }
171 template <typename Dtype>
172 void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
173 int num_source_layers = param.layers_size();
174 for (int i = 0; i < num_source_layers; ++i) {
175 const LayerParameter& source_layer = param.layers(i).layer();
176 const string& source_layer_name = source_layer.name();
177 int target_layer_id = 0;
178 while (target_layer_id != layer_names_.size() &&
179 layer_names_[target_layer_id] != source_layer_name) {
180 ++target_layer_id;
181 }
182 if (target_layer_id == layer_names_.size()) {
183 LOG(INFO) << "Ignoring source layer " << source_layer_name;
184 continue;
185 }
186 LOG(INFO) << "Loading source layer " << source_layer_name;
187 vector<shared_ptr<Blob<Dtype> > >& target_blobs =
188 layers_[target_layer_id]->blobs();
189 CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
190 << "Incompatible number of blobs for layer " << source_layer_name;
191 for (int j = 0; j < target_blobs.size(); ++j) {
192 CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num());
193 CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels());
194 CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height());
195 CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width());
196 target_blobs[j]->FromProto(source_layer.blobs(j));
197 }
198 }
199 }
201 template <typename Dtype>
202 void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) {
203 param->Clear();
204 param->set_name(name_);
205 // Add bottom and top
206 for (int i = 0; i < net_input_blob_indices_.size(); ++i) {
207 param->add_input(blob_names_[net_input_blob_indices_[i]]);
208 }
209 LOG(INFO) << "Serializing " << layers_.size() << " layers";
210 for (int i = 0; i < layers_.size(); ++i) {
211 LayerConnection* layer_connection = param->add_layers();
212 for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) {
213 layer_connection->add_bottom(blob_names_[bottom_id_vecs_[i][j]]);
214 }
215 for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
216 layer_connection->add_top(blob_names_[top_id_vecs_[i][j]]);
217 }
218 LayerParameter* layer_parameter = layer_connection->mutable_layer();
219 layers_[i]->ToProto(layer_parameter, write_diff);
220 }
221 }
223 template <typename Dtype>
224 void Net<Dtype>::Update() {
225 for (int i = 0; i < params_.size(); ++i) {
226 params_[i]->Update();
227 }
228 }
230 INSTANTIATE_CLASS(Net);
232 } // namespace caffe