summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 8046bcf)
raw | patch | inline | side by side (parent: 8046bcf)
author | Yangqing Jia <jiayq84@gmail.com> | |
Tue, 22 Oct 2013 20:18:50 +0000 (13:18 -0700) | ||
committer | Yangqing Jia <jiayq84@gmail.com> | |
Tue, 22 Oct 2013 20:18:50 +0000 (13:18 -0700) |
index 9f24cfa81bac3b1388c628739ee6a1517159fd4a..a821f438cf4749e5b5c220ffaf09c7abf91263a8 100644 (file)
type: "data"
source: "/home/jiayq/caffe-train-leveldb"
meanfile: "/home/jiayq/ilsvrc2012_mean.binaryproto"
- batchsize: 64
+ batchsize: 256
cropsize: 227
mirror: true
}
}
bias_filler {
type: "constant"
- value: 0.1
+ value: 0.
}
blobs_lr: 1.
blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
}
bottom: "data"
top: "conv1"
}
bias_filler {
type: "constant"
- value: 0.1
+ value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
}
bottom: "pad2"
top: "conv2"
}
bias_filler {
type: "constant"
- value: 0.1
+ value: 0.
}
blobs_lr: 1.
blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
}
bottom: "pad3"
top: "conv3"
}
bias_filler {
type: "constant"
- value: 0.1
+ value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
}
bottom: "pad4"
top: "conv4"
}
bias_filler {
type: "constant"
- value: 0.1
+ value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
}
bottom: "pad5"
top: "conv5"
}
bias_filler {
type: "constant"
- value: 0.1
+ value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
}
bottom: "pool5"
top: "fc6"
}
bias_filler {
type: "constant"
- value: 0.1
+ value: 1.
}
blobs_lr: 1.
blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
}
bottom: "fc6"
top: "fc7"
}
blobs_lr: 1.
blobs_lr: 2.
+ weight_decay: 1.
+ weight_decay: 0.
}
bottom: "fc7"
top: "fc8"
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index c27442b8243da29d3f2adadf0211d1b7651a6350..f0a5ebb93c018177294ef1cb45a8f9cc989713d4 100644 (file)
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
inline vector<shared_ptr<Blob<Dtype> > >& params() { return params_; }
// returns the parameter learning rate multipliers
inline vector<float>& params_lr() {return params_lr_; }
+ inline vector<float>& params_weight_decay() { return params_weight_decay_; }
// Updates the network
void Update();
vector<shared_ptr<Blob<Dtype> > > params_;
// the learning rate multipliers
vector<float> params_lr_;
+ // the weight decay multipliers
+ vector<float> params_weight_decay_;
DISABLE_COPY_AND_ASSIGN(Net);
};
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index e1442ecb184859614641ef1e1f2d5e59e7a1828e..165869d4b3353781cf22b4ee546b3f02a0322097 100644 (file)
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
params_lr_.push_back(1.);
}
}
+ // push the weight decay multipliers
+ if (layers_[i]->layer_param().weight_decay_size()) {
+ CHECK_EQ(layers_[i]->layer_param().weight_decay_size(),
+ layer_blobs.size());
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ float local_decay = layers_[i]->layer_param().weight_decay(j);
+ CHECK_GT(local_decay, 0.);
+ params_weight_decay_.push_back(local_decay);
+ }
+ } else {
+ for (int j = 0; j < layer_blobs.size(); ++j) {
+ params_weight_decay_.push_back(1.);
+ }
+ }
for (int topid = 0; topid < top_vecs_[i].size(); ++topid) {
LOG(INFO) << "Top shape: " << top_vecs_[i][topid]->channels() << " "
<< top_vecs_[i][topid]->height() << " "
index 0aa90fd2af3c1b3afb0a9d787b882668bfd00009..9d50c36d90970f629ce61277863a9a02bf11b0a2 100644 (file)
// The ratio that is multiplied on the global learning rate. If you want to set
// the learning ratio for one blob, you need to set it for all blobs.
repeated float blobs_lr = 51;
+ // The weight decay that is multiplied on the global weight decay.
+ repeated float weight_decay = 52;
}
message LayerConnection {
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 425bd421e082e4e62fde685d207c339b366f9e5b..6fe2ce91257d7601ce44093cbf0a5312445ed8af 100644 (file)
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
void SGDSolver<Dtype>::ComputeUpdateValue() {
vector<shared_ptr<Blob<Dtype> > >& net_params = this->net_->params();
vector<float>& net_params_lr = this->net_->params_lr();
+ vector<float>& net_params_weight_decay = this->net_->params_weight_decay();
// get the learning rate
Dtype rate = GetLearningRate();
if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
}
Dtype momentum = this->param_.momentum();
Dtype weight_decay = this->param_.weight_decay();
- // LOG(ERROR) << "rate:" << rate << " momentum:" << momentum
- // << " weight_decay:" << weight_decay;
switch (Caffe::mode()) {
case Caffe::CPU:
for (int param_id = 0; param_id < net_params.size(); ++param_id) {
// Compute the value to history, and then copy them to the blob's diff.
Dtype local_rate = rate * net_params_lr[param_id];
+ Dtype local_decay = weight_decay * net_params_weight_decay[param_id];
caffe_axpby(net_params[param_id]->count(), local_rate,
net_params[param_id]->cpu_diff(), momentum,
history_[param_id]->mutable_cpu_data());
- if (weight_decay) {
+ if (local_decay) {
// add weight decay
caffe_axpy(net_params[param_id]->count(),
- weight_decay * local_rate,
+ local_decay * local_rate,
net_params[param_id]->cpu_data(),
history_[param_id]->mutable_cpu_data());
}
for (int param_id = 0; param_id < net_params.size(); ++param_id) {
// Compute the value to history, and then copy them to the blob's diff.
Dtype local_rate = rate * net_params_lr[param_id];
+ Dtype local_decay = weight_decay * net_params_weight_decay[param_id];
caffe_gpu_axpby(net_params[param_id]->count(), local_rate,
net_params[param_id]->gpu_diff(), momentum,
history_[param_id]->mutable_gpu_data());
- if (weight_decay) {
+ if (local_decay) {
// add weight decay
caffe_gpu_axpy(net_params[param_id]->count(),
- weight_decay * local_rate,
+ local_decay * local_rate,
net_params[param_id]->gpu_data(),
history_[param_id]->mutable_gpu_data());
}