src/caffe/net.cpp

   1 // Copyright Yangqing Jia 2013
   2
   3 #include <map>
   4 #include <set>
   5 #include <string>
   6 #include <vector>
   7
   8 #include "caffe/proto/caffe.pb.h"
   9 #include "caffe/layer.hpp"
  10 #include "caffe/net.hpp"
  11
  12 using std::pair;
  13 using std::map;
  14 using std::set;
  15
  16 namespace caffe {
  17
  18 template <typename Dtype>
  19 Net<Dtype>::Net(const NetParameter& param,
  20     const vector<Blob<Dtype>* >& bottom) {
  21   // Basically, build all the layers and set up its connections.
  22   name_ = param.name();
  23   map<string, int> blob_name_to_idx;
  24   set<string> available_blobs;
  25   int num_layers = param.layers_size();
  26   CHECK_EQ(bottom.size(), param.input_size())
  27       << "Incorrect bottom blob size.";
  28   // set the input blobs
  29   for (int i = 0; i < param.input_size(); ++i) {
  30     const string& blob_name = param.input(i);
  31     CHECK_GT(bottom[i]->count(), 0);
  32     shared_ptr<Blob<Dtype> > blob_pointer(
  33         new Blob<Dtype>(bottom[i]->num(), bottom[i]->channels(),
  34             bottom[i]->height(), bottom[i]->width()));
  35     blobs_.push_back(blob_pointer);
  36     blob_names_.push_back(blob_name);
  37     net_input_blob_indices_.push_back(i);
  38     blob_name_to_idx[blob_name] = i;
  39     available_blobs.insert(blob_name);
  40   }
  41   // For each layer, set up their input and output
  42   bottom_vecs_.resize(param.layers_size());
  43   top_vecs_.resize(param.layers_size());
  44   bottom_id_vecs_.resize(param.layers_size());
  45   top_id_vecs_.resize(param.layers_size());
  46   for (int i = 0; i < param.layers_size(); ++i) {
  47     const LayerConnection& layer_connection = param.layers(i);
  48     const LayerParameter& layer_param = layer_connection.layer();
  49     layers_.push_back(shared_ptr<Layer<Dtype> >(GetLayer<Dtype>(layer_param)));
  50     layer_names_.push_back(layer_param.name());
  51     LOG(INFO) << "Creating Layer " << layer_param.name();
  52     // Figure out this layer's input and output
  53     for (int j = 0; j < layer_connection.bottom_size(); ++j) {
  54       const string& blob_name = layer_connection.bottom(j);
  55       if (available_blobs.find(blob_name) == available_blobs.end()) {
  56         LOG(FATAL) << "Unknown blob input " << blob_name <<
  57             " to layer" << j;
  58       }
  59       LOG(INFO) << layer_param.name() << " <- " << blob_name;
  60       bottom_vecs_[i].push_back(
  61           blobs_[blob_name_to_idx[blob_name]].get());
  62       bottom_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
  63       available_blobs.erase(blob_name);
  64     }
  65     for (int j = 0; j < layer_connection.top_size(); ++j) {
  66       const string& blob_name = layer_connection.top(j);
  67       // Check if we are doing in-place computation
  68       if (layer_connection.bottom_size() > j &&
  69           blob_name == layer_connection.bottom(j)) {
  70         // In-place computation
  71         LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)";
  72         available_blobs.insert(blob_name);
  73         top_vecs_[i].push_back(
  74             blobs_[blob_name_to_idx[blob_name]].get());
  75         top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
  76       } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
  77         // If we are not doing in-place computation but has duplicated blobs,
  78         // raise an error.
  79         LOG(FATAL) << "Duplicate blobs produced by multiple sources.";
  80       } else {
  81         // Normal output.
  82         LOG(INFO) << layer_param.name() << " -> " << blob_name;
  83         shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
  84         blobs_.push_back(blob_pointer);
  85         blob_names_.push_back(blob_name);
  86         blob_name_to_idx[blob_name] = blob_names_.size() - 1;
  87         available_blobs.insert(blob_name);
  88         top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
  89         top_id_vecs_[i].push_back(blob_names_.size() - 1);
  90       }
  91     }
  92   }
  93   // In the end, all remaining blobs are considered output blobs.
  94   for (set<string>::iterator it = available_blobs.begin();
  95       it != available_blobs.end(); ++it) {
  96     LOG(ERROR) << "This network produces output " << *it;
  97     net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
  98     net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
  99   }
 100
 101   LOG(INFO) << "Setting up the layers.";
 102   for (int i = 0; i < layers_.size(); ++i) {
 103     LOG(INFO) << "Setting up " << layer_names_[i];
 104     layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]);
 105     vector<shared_ptr<Blob<Dtype> > >& layer_blobs = layers_[i]->blobs();
 106     for (int j = 0; j < layer_blobs.size(); ++j) {
 107       params_.push_back(layer_blobs[j]);
 108     }
 109     // push the learning rate mutlipliers
 110     if (layers_[i]->layer_param().blobs_lr_size()) {
 111       CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size());
 112       for (int j = 0; j < layer_blobs.size(); ++j) {
 113         float local_lr = layers_[i]->layer_param().blobs_lr(j);
 114         CHECK_GT(local_lr, 0.);
 115         params_lr_.push_back(local_lr);
 116       }
 117     } else {
 118       for (int j = 0; j < layer_blobs.size(); ++j) {
 119         params_lr_.push_back(1.);
 120       }
 121     }
 122     for (int topid = 0; topid < top_vecs_[i].size(); ++topid) {
 123       LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " "
 124           << top_vecs_[i][topid]->height() << " "
 125           << top_vecs_[i][topid]->width();
 126     }
 127   }
 128   LOG(INFO) << "Network initialization done.";
 129 }
 130
 131 template <typename Dtype>
 132 const vector<Blob<Dtype>*>& Net<Dtype>::Forward(
 133     const vector<Blob<Dtype>*> & bottom) {
 134   // Copy bottom to internal bottom
 135   for (int i = 0; i < bottom.size(); ++i) {
 136     blobs_[net_input_blob_indices_[i]]->CopyFrom(*bottom[i]);
 137   }
 138   for (int i = 0; i < layers_.size(); ++i) {
 139     // LOG(ERROR) << "Forwarding " << layer_names_[i];
 140     layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
 141   }
 142   return net_output_blobs_;
 143 }
 144
 145 template <typename Dtype>
 146 Dtype Net<Dtype>::Backward() {
 147   Dtype loss = 0;
 148   // TODO(Yangqing): figure out those layers that do not need backward.
 149   for (int i = layers_.size() - 1; i >= 0; --i) {
 150     Dtype layer_loss = layers_[i]->Backward(
 151         top_vecs_[i], true, &bottom_vecs_[i]);
 152     loss += layer_loss;
 153   }
 154   return loss;
 155 }
 156
 157 template <typename Dtype>
 158 void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
 159   int num_source_layers = param.layers_size();
 160   for (int i = 0; i < num_source_layers; ++i) {
 161     const LayerParameter& source_layer = param.layers(i).layer();
 162     const string& source_layer_name = source_layer.name();
 163     int target_layer_id = 0;
 164     while (target_layer_id != layer_names_.size() &&
 165         layer_names_[target_layer_id] != source_layer_name) {
 166       ++target_layer_id;
 167     }
 168     if (target_layer_id == layer_names_.size()) {
 169       LOG(INFO) << "Ignoring source layer " << source_layer_name;
 170       continue;
 171     }
 172     LOG(INFO) << "Loading source layer " << source_layer_name;
 173     vector<shared_ptr<Blob<Dtype> > >& target_blobs =
 174         layers_[target_layer_id]->blobs();
 175     CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
 176         << "Incompatible number of blobs for layer " << source_layer_name;
 177     for (int j = 0; j < target_blobs.size(); ++j) {
 178       CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num());
 179       CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels());
 180       CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height());
 181       CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width());
 182       target_blobs[j]->FromProto(source_layer.blobs(j));
 183     }
 184   }
 185 }
 186
 187 template <typename Dtype>
 188 void Net<Dtype>::ToProto(NetParameter* param, bool write_diff) {
 189   param->Clear();
 190   param->set_name(name_);
 191   // Add bottom and top
 192   for (int i = 0; i < net_input_blob_indices_.size(); ++i) {
 193     param->add_input(blob_names_[net_input_blob_indices_[i]]);
 194   }
 195   LOG(INFO) << "Serializing " << layers_.size() << " layers";
 196   for (int i = 0; i < layers_.size(); ++i) {
 197     LayerConnection* layer_connection = param->add_layers();
 198     for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) {
 199       layer_connection->add_bottom(blob_names_[bottom_id_vecs_[i][j]]);
 200     }
 201     for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
 202       layer_connection->add_top(blob_names_[top_id_vecs_[i][j]]);
 203     }
 204     LayerParameter* layer_parameter = layer_connection->mutable_layer();
 205     layers_[i]->ToProto(layer_parameter, write_diff);
 206   }
 207 }
 208
 209 template <typename Dtype>
 210 void Net<Dtype>::Update() {
 211   for (int i = 0; i < params_.size(); ++i) {
 212     params_[i]->Update();
 213   }
 214 }
 215
 216 INSTANTIATE_CLASS(Net);
 217
 218 }  // namespace caffe