diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index c6dfce19c038239b841b00b24c18c36bee8af025..165869d4b3353781cf22b4ee546b3f02a0322097 100644 (file)
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
#include <vector>
#include "caffe/proto/caffe.pb.h"
-#include "caffe/layer_factory.hpp"
+#include "caffe/layer.hpp"
#include "caffe/net.hpp"
using std::pair;
map<string, int> blob_name_to_idx;
set<string> available_blobs;
int num_layers = param.layers_size();
- CHECK_EQ(bottom.size(), param.bottom_size())
+ CHECK_EQ(bottom.size(), param.input_size())
<< "Incorrect bottom blob size.";
// set the input blobs
- for (int i = 0; i < param.bottom_size(); ++i) {
- const string& blob_name = param.bottom(i);
+ for (int i = 0; i < param.input_size(); ++i) {
+ const string& blob_name = param.input(i);
CHECK_GT(bottom[i]->count(), 0);
shared_ptr<Blob<Dtype> > blob_pointer(
new Blob<Dtype>(bottom[i]->num(), bottom[i]->channels(),
// For each layer, set up their input and output
bottom_vecs_.resize(param.layers_size());
top_vecs_.resize(param.layers_size());
+ bottom_id_vecs_.resize(param.layers_size());
+ top_id_vecs_.resize(param.layers_size());
for (int i = 0; i < param.layers_size(); ++i) {
const LayerConnection& layer_connection = param.layers(i);
const LayerParameter& layer_param = layer_connection.layer();
LOG(INFO) << layer_param.name() << " <- " << blob_name;
bottom_vecs_[i].push_back(
blobs_[blob_name_to_idx[blob_name]].get());
+ bottom_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
available_blobs.erase(blob_name);
}
for (int j = 0; j < layer_connection.top_size(); ++j) {
const string& blob_name = layer_connection.top(j);
- if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
+ // Check if we are doing in-place computation
+ if (layer_connection.bottom_size() > j &&
+ blob_name == layer_connection.bottom(j)) {
+ // In-place computation
+ LOG(INFO) << layer_param.name() << " -> " << blob_name << " (in-place)";
+ available_blobs.insert(blob_name);
+ top_vecs_[i].push_back(
+ blobs_[blob_name_to_idx[blob_name]].get());
+ top_id_vecs_[i].push_back(blob_name_to_idx[blob_name]);
+ } else if (blob_name_to_idx.find(blob_name) != blob_name_to_idx.end()) {
+ // If we are not doing in-place computation but has duplicated blobs,
+ // raise an error.
LOG(FATAL) << "Duplicate blobs produced by multiple sources.";
+ } else {
+ // Normal output.
+ LOG(INFO) << layer_param.name() << " -> " << blob_name;
+ shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
+ blobs_.push_back(blob_pointer);
+ blob_names_.push_back(blob_name);
+ blob_name_to_idx[blob_name] = blob_names_.size() - 1;
+ available_blobs.insert(blob_name);
+ top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
+ top_id_vecs_[i].push_back(blob_names_.size() - 1);
}
- LOG(INFO) << layer_param.name() << " -> " << blob_name;
- shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
- blobs_.push_back(blob_pointer);
- blob_names_.push_back(blob_name);
- blob_name_to_idx[blob_name] = blob_names_.size() - 1;
- available_blobs.insert(blob_name);
- top_vecs_[i].push_back(blobs_[blob_names_.size() - 1].get());
}
}
- LOG(INFO) << "Checking top blobs.";
- // In the end, check if all remaining available blobs are top blobs.
- for (int i = 0; i < param.top_size(); ++i) {
- const string& blob_name = param.top(i);
- if (blob_name_to_idx.find(blob_name) == blob_name_to_idx.end()) {
- LOG(FATAL) << "Unknown blob output " << blob_name;
- }
- net_output_blob_indices_.push_back(blob_name_to_idx[blob_name]);
- available_blobs.erase(blob_name);
- }
- if (!available_blobs.empty()) {
- LOG(WARNING) << "There are some internal blobs not used:";
- for (set<string>::iterator it = available_blobs.begin();
- it != available_blobs.end(); ++it) {
- LOG(WARNING) << " " << *it;
- }
+ // In the end, all remaining blobs are considered output blobs.
+ for (set<string>::iterator it = available_blobs.begin();
+ it != available_blobs.end(); ++it) {
+ LOG(ERROR) << "This network produces output " << *it;
+ net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
+ net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
}
LOG(INFO) << "Setting up the layers.";
for (int i = 0; i < layers_.size(); ++i) {
LOG(INFO) << "Setting up " << layer_names_[i];
layers_[i]->SetUp(bottom_vecs_[i], &top_vecs_[i]);
- vector<shared_ptr<Blob<Dtype> > >& layer_params = layers_[i].params();
- for (int j = 0; j < layer_params.size(); ++j) {
- params_.push_back(layer_params[j]);
+ vector<shared_ptr<Blob<Dtype> > >& layer_blobs = layers_[i]->blobs();
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ params_.push_back(layer_blobs[j]);
+ }
+ // push the learning rate mutlipliers
+ if (layers_[i]->layer_param().blobs_lr_size()) {
+ CHECK_EQ(layers_[i]->layer_param().blobs_lr_size(), layer_blobs.size());
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ float local_lr = layers_[i]->layer_param().blobs_lr(j);
+ CHECK_GT(local_lr, 0.);
+ params_lr_.push_back(local_lr);
+ }
+ } else {
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ params_lr_.push_back(1.);
+ }
+ }
+ // push the weight decay multipliers
+ if (layers_[i]->layer_param().weight_decay_size()) {
+ CHECK_EQ(layers_[i]->layer_param().weight_decay_size(),
+ layer_blobs.size());
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ float local_decay = layers_[i]->layer_param().weight_decay(j);
+ CHECK_GT(local_decay, 0.);
+ params_weight_decay_.push_back(local_decay);
+ }
+ } else {
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ params_weight_decay_.push_back(1.);
+ }
+ }
+ for (int topid = 0; topid < top_vecs_[i].size(); ++topid) {
+ LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " "
+ << top_vecs_[i][topid]->height() << " "
+ << top_vecs_[i][topid]->width();
}
}
-
LOG(INFO) << "Network initialization done.";
}
template <typename Dtype>
-void Net<Dtype>::Forward(const vector<Blob<Dtype>*> & bottom,
- vector<Blob<Dtype>*>* top) {
+const vector<Blob<Dtype>*>& Net<Dtype>::Forward(
+ const vector<Blob<Dtype>*> & bottom) {
// Copy bottom to internal bottom
for (int i = 0; i < bottom.size(); ++i) {
- memcpy(blobs_[net_input_blob_indices_[i]]->mutable_cpu_data(),
- bottom[i]->cpu_data(), sizeof(Dtype) * bottom[i]->count());
+ blobs_[net_input_blob_indices_[i]]->CopyFrom(*bottom[i]);
}
for (int i = 0; i < layers_.size(); ++i) {
+ // LOG(ERROR) << "Forwarding " << layer_names_[i];
layers_[i]->Forward(bottom_vecs_[i], &top_vecs_[i]);
}
- // Copy internal top to top
- for (int i = 0; i < (*top).size(); ++i) {
- NOT_IMPLEMENTED;
- }
+ return net_output_blobs_;
}
template <typename Dtype>
}
LOG(INFO) << "Loading source layer " << source_layer_name;
vector<shared_ptr<Blob<Dtype> > >& target_blobs =
- layers_[target_layer_id]->params();
+ layers_[target_layer_id]->blobs();
CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
<< "Incompatible number of blobs for layer " << source_layer_name;
for (int j = 0; j < target_blobs.size(); ++j) {
+ CHECK_EQ(target_blobs[j]->num(), source_layer.blobs(j).num());
+ CHECK_EQ(target_blobs[j]->channels(), source_layer.blobs(j).channels());
+ CHECK_EQ(target_blobs[j]->height(), source_layer.blobs(j).height());
+ CHECK_EQ(target_blobs[j]->width(), source_layer.blobs(j).width());
target_blobs[j]->FromProto(source_layer.blobs(j));
}
}
param->set_name(name_);
// Add bottom and top
for (int i = 0; i < net_input_blob_indices_.size(); ++i) {
- param->add_bottom(blob_names_[net_input_blob_indices_[i]]);
- }
- for (int i = 0; i < net_input_blob_indices_.size(); ++i) {
- param->add_bottom(blob_names_[net_input_blob_indices_[i]]);
+ param->add_input(blob_names_[net_input_blob_indices_[i]]);
}
+ LOG(INFO) << "Serializing " << layers_.size() << " layers";
for (int i = 0; i < layers_.size(); ++i) {
LayerConnection* layer_connection = param->add_layers();
+ for (int j = 0; j < bottom_id_vecs_[i].size(); ++j) {
+ layer_connection->add_bottom(blob_names_[bottom_id_vecs_[i][j]]);
+ }
+ for (int j = 0; j < top_id_vecs_[i].size(); ++j) {
+ layer_connection->add_top(blob_names_[top_id_vecs_[i][j]]);
+ }
+ LayerParameter* layer_parameter = layer_connection->mutable_layer();
+ layers_[i]->ToProto(layer_parameter, write_diff);
+ }
+}
+
+template <typename Dtype>
+void Net<Dtype>::Update() {
+ for (int i = 0; i < params_.size(); ++i) {
+ params_[i]->Update();
}
}