src/caffe/net.cpp

   1 // Copyright Yangqing Jia 2013
   2
   3 #include <map>
   4 #include <set>
   5 #include <string>
   6 #include <vector>
   7
   8 #include "caffe/proto/caffe.pb.h"
   9 #include "caffe/layer_factory.hpp"
  10 #include "caffe/net.hpp"
  11
  12 using std::pair;
  13 using std::map;
  14 using std::set;
  15
  16 namespace caffe {
  17
  18 template <typename Dtype>
  19 Net<Dtype>::Net(const NetParameter& param,
  20     const vector<Blob<Dtype>* >& bottom) {
  21   // Basically, build all the layers and set up its connections.
  22   name_ = param.name();
  23   map<string, int> blob_name_to_idx;
  24   set<string> available_blobs;
  25   int num_layers = param.layers_size();
  26   CHECK_EQ(bottom.size(), param.input_size())
  27       << "Incorrect bottom blob size.";
  28   // set the input blobs
  29   for (int i = 0; i < param.input_size(); ++i) {
  30     const string& blob_name = param.input(i);
  31     CHECK_GT(bottom[i]->count(), 0);
  32     shared_ptr<Blob<Dtype> > blob_pointer(
  33         new Blob<Dtype>(bottom[i]->num(), bottom[i]->channels(),
  34             bottom[i]->height(), bottom[i]->width()));
  35     blobs_.push_back(blob_pointer);
  36     blob_names_.push_back(blob_name);
  37     net_input_blob_indices_.push_back(i);
  38     blob_name_to_idx[blob_name] = i;
  39     available_blobs.insert(blob_name);
  40   }
  41   // For each layer, set up their input and output
  42   bottom_vecs_.resize(param.layers_size());
  43   top_vecs_.resize(param.layers_size());
  44   bottom_id_vecs_.resize(param.layers_size());
  45   top_id_vecs_.resize(param.layers_size());
  46   for (int i = 0; i < param.layers_size(); ++i) {
  47     const LayerConnection& layer_connection = param.layers(i);
  48     const LayerParameter& layer_param = layer_connection.layer();
  49     layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
  50     layer_names_.push_back(layer_param.name());
  51     LOG(INFO) << "Creating Layer " << layer_param.name();
  52     // Figure out this layer's input and output
  53     for (int j = 0; j < layer_connection.bottom_size(); ++j) {
  54       const string& blob_name = layer_connection.bottom(j);
  55       if (available_blobs.find(blob_name) == available_blobs.end()) {
  56         LOG(FATAL) << "Unknown blob input " << blob_name <<
  57             " to layer" << j;
  58       }
  59       LOG(INFO) << layer_param.name() << " <- " << blob_name;
  60       bottom_vecs_[i].push_back(
  61           blobs_[blob_name_to_idx[blob_name]].get());
  62       bottom_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
  63       available_blobs.erase(blob_name);
  64     }
  65     for (int j = 0; j < layer_connection.top_size(); ++j) {
  66       const string& blob_name = layer_connection.top(j);
  67       if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
  68         LOG(FATAL) << "Duplicate blobs produced by multiple sources.";
  69       }
  70       LOG(INFO) << layer_param.name() << " -> " << blob_name;
  71       shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
  72       blobs_.push_back(blob_pointer);
  73       blob_names_.push_back(blob_name);
  74       blob_name_to_idx[blob_name] = blob_names_.size() - 1;
  75       available_blobs.insert(blob_name);
  76       top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
  77       top_id_vecs_[i].push_back(blob_names_.size() - 1);
  78     }
  79   }
  80   // In the end, all remaining blobs are considered output blobs.
  81   for (set<string>::iterator it = available_blobs.begin();
  82       it != available_blobs.end(); ++it) {
  83     LOG(ERROR) << "This network produces output " << *it;
  84     net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
  85     net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
  86   }
  87
  88   LOG(ERROR) << "Setting up the layers.";
  89   for (int i = 0; i < layers_.size(); ++i) {
  90     LOG(INFO) << "Setting up " << layer_names_[i];
  91     layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]);
  92     vector<shared_ptr<Blob<Dtype> > >& layer_params = layers_[i]->params();
  93     for (int j = 0; j < layer_params.size(); ++j) {
  94       params_.push_back(layer_params[j]);
  95     }
  96   }
  97
  98   LOG(ERROR) << "Network initialization done.";
  99 }
 100
 101 template <typename Dtype>
 102 const vector<Blob<Dtype>*>& Net<Dtype>::Forward(
 103     const vector<Blob<Dtype>*> & bottom) {
 104   // Copy bottom to internal bottom
 105   for (int i = 0; i < bottom.size(); ++i) {
 106     blobs_[net_input_blob_indices_[i]]->CopyFrom(*bottom[i]);
 107   }
 108   for (int i = 0; i < layers_.size(); ++i) {
 109     layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
 110   }
 111   return net_output_blobs_;
 112 }
 113
 114 template <typename Dtype>
 115 Dtype Net<Dtype>::Backward() {
 116   Dtype loss = 0;
 117   // TODO(Yangqing): figure out those layers that do not need backward.
 118   for (int i = layers_.size() - 1; i >= 0; --i) {
 119     Dtype layer_loss = layers_[i]->Backward(
 120         top_vecs_[i], true, &bottom_vecs_[i]);
 121     loss += layer_loss;
 122   }
 123   return loss;
 124 }
 125
 126 template <typename Dtype>
 127 void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
 128   int num_source_layers = param.layers_size();
 129   for (int i = 0; i < num_source_layers; ++i) {
 130     const LayerParameter& source_layer = param.layers(i).layer();
 131     const string& source_layer_name = source_layer.name();
 132     int target_layer_id = 0;
 133     while (target_layer_id != layer_names_.size() &&
 134         layer_names_[target_layer_id] != source_layer_name) {
 135       ++target_layer_id;
 136     }
 137     if (target_layer_id == layer_names_.size()) {
 138       LOG(INFO) << "Ignoring source layer " << source_layer_name;
 139       continue;
 140     }
 141     LOG(INFO) << "Loading source layer " << source_layer_name;
 142     vector<shared_ptr<Blob<Dtype> > >& target_blobs =
 143         layers_[target_layer_id]->params();
 144     CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
 145         << "Incompatible number of blobs for layer " << source_layer_name;
 146     for (int j = 0; j < target_blobs.size(); ++j) {
 147       target_blobs[j]->FromProto(source_layer.blobs(j));
 148     }
 149   }
 150 }
 151
 152 template <typename Dtype>
 153 void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) {
 154   param->Clear();
 155   param->set_name(name_);
 156   // Add bottom and top
 157   for (int i = 0; i < net_input_blob_indices_.size(); ++i) {
 158     param->add_input(blob_names_[net_input_blob_indices_[i]]);
 159   }
 160   LOG(INFO) << "Serializing " << layers_.size() << " layers";
 161   for (int i = 0; i < layers_.size(); ++i) {
 162     LayerConnection* layer_connection = param->add_layers();
 163     for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) {
 164       layer_connection->add_bottom(blob_names_[bottom_id_vecs_[i][j]]);
 165     }
 166     for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
 167       layer_connection->add_top(blob_names_[top_id_vecs_[i][j]]);
 168     }
 169     LayerParameter* layer_parameter = layer_connection->mutable_layer();
 170     layers_[i]->ToProto(layer_parameter);
 171   }
 172 }
 173
 174 template <typename Dtype>
 175 void Net<Dtype>::Update() {
 176   for (int i = 0; i < params_.size(); ++i) {
 177     params_[i]->Update();
 178   }
 179 }
 180
 181 INSTANTIATE_CLASS(Net);
 182
 183 }  // namespace caffe