src/caffe/net.cpp

   1 // Copyright Yangqing Jia 2013
   2
   3 #include <map>
   4 #include <set>
   5 #include <string>
   6 #include <vector>
   7
   8 #include "caffe/proto/caffe.pb.h"
   9 #include "caffe/layer_factory.hpp"
  10 #include "caffe/net.hpp"
  11
  12 using std::pair;
  13 using std::map;
  14 using std::set;
  15
  16 namespace caffe {
  17
  18 template <typename Dtype>
  19 Net<Dtype>::Net(const NetParameter& param,
  20     const vector<Blob<Dtype>* >& bottom) {
  21   // Basically, build all the layers and set up its connections.
  22   name_ = param.name();
  23   map<string, int> blob_name_to_idx;
  24   set<string> available_blobs;
  25   int num_layers = param.layers_size();
  26   CHECK_EQ(bottom.size(), param.bottom_size())
  27       << "Incorrect bottom blob size.";
  28   // set the input blobs
  29   for (int i = 0; i < param.bottom_size(); ++i) {
  30     const string& blob_name = param.bottom(i);
  31     CHECK_GT(bottom[i]->count(), 0);
  32     shared_ptr<Blob<Dtype> > blob_pointer(
  33         new Blob<Dtype>(bottom[i]->num(), bottom[i]->channels(),
  34             bottom[i]->height(), bottom[i]->width()));
  35     blobs_.push_back(blob_pointer);
  36     blob_names_.push_back(blob_name);
  37     net_input_blob_indices_.push_back(i);
  38     blob_name_to_idx[blob_name] = i;
  39     available_blobs.insert(blob_name);
  40   }
  41   // For each layer, set up their input and output
  42   bottom_vecs_.resize(param.layers_size());
  43   top_vecs_.resize(param.layers_size());
  44   for (int i = 0; i < param.layers_size(); ++i) {
  45     const LayerConnection& layer_connection = param.layers(i);
  46     const LayerParameter& layer_param = layer_connection.layer();
  47     layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
  48     layer_names_.push_back(layer_param.name());
  49     LOG(INFO) << "Creating Layer " << layer_param.name();
  50     // Figure out this layer's input and output
  51     for (int j = 0; j < layer_connection.bottom_size(); ++j) {
  52       const string& blob_name = layer_connection.bottom(j);
  53       if (available_blobs.find(blob_name) == available_blobs.end()) {
  54         LOG(FATAL) << "Unknown blob input " << blob_name <<
  55             " to layer" << j;
  56       }
  57       LOG(INFO) << layer_param.name() << " <- " << blob_name;
  58       bottom_vecs_[i].push_back(
  59           blobs_[blob_name_to_idx[blob_name]].get());
  60       available_blobs.erase(blob_name);
  61     }
  62     for (int j = 0; j < layer_connection.top_size(); ++j) {
  63       const string& blob_name = layer_connection.top(j);
  64       if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
  65         LOG(FATAL) << "Duplicate blobs produced by multiple sources.";
  66       }
  67       LOG(INFO) << layer_param.name() << " -> " << blob_name;
  68       shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
  69       blobs_.push_back(blob_pointer);
  70       blob_names_.push_back(blob_name);
  71       blob_name_to_idx[blob_name] = blob_names_.size() - 1;
  72       available_blobs.insert(blob_name);
  73       top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
  74     }
  75   }
  76   LOG(INFO) << "Checking top blobs.";
  77   // In the end, check if all remaining available blobs are top blobs.
  78   for (int i = 0; i < param.top_size(); ++i) {
  79     const string& blob_name = param.top(i);
  80     if (blob_name_to_idx.find(blob_name) == blob_name_to_idx.end()) {
  81       LOG(FATAL) << "Unknown blob output " << blob_name;
  82     }
  83     net_output_blob_indices_.push_back(blob_name_to_idx[blob_name]);
  84     available_blobs.erase(blob_name);
  85   }
  86   if (!available_blobs.empty()) {
  87     LOG(WARNING) << "There are some internal blobs not used:";
  88     for (set<string>::iterator it = available_blobs.begin();
  89         it != available_blobs.end(); ++it) {
  90       LOG(WARNING) << "    " << *it;
  91     }
  92   }
  93
  94   LOG(INFO) << "Setting up the layers.";
  95   for (int i = 0; i < layers_.size(); ++i) {
  96     LOG(INFO) << "Setting up " << layer_names_[i];
  97     layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]);
  98     vector<shared_ptr<Blob<Dtype> > >& layer_params = layers_[i].params();
  99     for (int j = 0; j < layer_params.size(); ++j) {
 100       params_.push_back(layer_params[j]);
 101     }
 102   }
 103
 104   LOG(INFO) << "Network initialization done.";
 105 }
 106
 107 template <typename Dtype>
 108 void Net<Dtype>::Forward(const vector<Blob<Dtype>*> & bottom,
 109     vector<Blob<Dtype>*>* top) {
 110   // Copy bottom to internal bottom
 111   for (int i = 0; i < bottom.size(); ++i) {
 112     memcpy(blobs_[net_input_blob_indices_[i]]->mutable_cpu_data(),
 113         bottom[i]->cpu_data(), sizeof(Dtype) * bottom[i]->count());
 114   }
 115   for (int i = 0; i < layers_.size(); ++i) {
 116     layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
 117   }
 118   // Copy internal top to top
 119   for (int i = 0; i < (*top).size(); ++i) {
 120     NOT_IMPLEMENTED;
 121   }
 122 }
 123
 124 template <typename Dtype>
 125 Dtype Net<Dtype>::Backward() {
 126   Dtype loss = 0;
 127   // TODO(Yangqing): figure out those layers that do not need backward.
 128   for (int i = layers_.size() - 1; i >= 0; --i) {
 129     Dtype layer_loss = layers_[i]->Backward(
 130         top_vecs_[i], true, &bottom_vecs_[i]);
 131     loss += layer_loss;
 132   }
 133   return loss;
 134 }
 135
 136 template <typename Dtype>
 137 void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
 138   int num_source_layers = param.layers_size();
 139   for (int i = 0; i < num_source_layers; ++i) {
 140     const LayerParameter& source_layer = param.layers(i).layer();
 141     const string& source_layer_name = source_layer.name();
 142     int target_layer_id = 0;
 143     while (target_layer_id != layer_names_.size() &&
 144         layer_names_[target_layer_id] != source_layer_name) {
 145       ++target_layer_id;
 146     }
 147     if (target_layer_id == layer_names_.size()) {
 148       LOG(INFO) << "Ignoring source layer " << source_layer_name;
 149       continue;
 150     }
 151     LOG(INFO) << "Loading source layer " << source_layer_name;
 152     vector<shared_ptr<Blob<Dtype> > >& target_blobs =
 153         layers_[target_layer_id]->params();
 154     CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
 155         << "Incompatible number of blobs for layer " << source_layer_name;
 156     for (int j = 0; j < target_blobs.size(); ++j) {
 157       target_blobs[j]->FromProto(source_layer.blobs(j));
 158     }
 159   }
 160 }
 161
 162 template <typename Dtype>
 163 void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) {
 164   param->Clear();
 165   param->set_name(name_);
 166   // Add bottom and top
 167   for (int i = 0; i < net_input_blob_indices_.size(); ++i) {
 168     param->add_bottom(blob_names_[net_input_blob_indices_[i]]);
 169   }
 170   for (int i = 0; i < net_input_blob_indices_.size(); ++i) {
 171     param->add_bottom(blob_names_[net_input_blob_indices_[i]]);
 172   }
 173   for (int i = 0; i < layers_.size(); ++i) {
 174     LayerConnection* layer_connection = param->add_layers();
 175   }
 176 }
 177
 178 INSTANTIATE_CLASS(Net);
 179
 180 }  // namespace caffe