diff --git a/src/caffe/layers/detection_output_layer.cpp b/src/caffe/layers/detection_output_layer.cpp
index 0e9b619346a17b4ee0c3bdabac1cccf1843790b3..8d7ab2f382f952bdffb9fd205ccaa10a22ad691e 100644 (file)
@@ -121,11 +121,16 @@ void DetectionOutputLayer<Ftype, Btype>::LayerSetUp(const vector<Blob*>& bottom,
bbox_permute_.ReshapeLike(*(bottom[0]));
}
conf_permute_.ReshapeLike(*(bottom[1]));
+ num_keypoint_ = detection_output_param.num_keypoint();
+ output_size_ = num_keypoint_*2 + 7;
+ code_size_ = num_keypoint_*2 + 4;
}
template <typename Ftype, typename Btype>
void DetectionOutputLayer<Ftype, Btype>::Reshape(const vector<Blob*>& bottom,
const vector<Blob*>& top) {
+ const int code_size_ = num_keypoint_*2 + 4;
+
if (need_save_) {
CHECK_LE(name_count_, names_.size());
if (name_count_ % num_test_image_ == 0) {
conf_permute_.count(1) != bottom[1]->count(1)) {
conf_permute_.ReshapeLike(*(bottom[1]));
}
- num_priors_ = bottom[2]->height() / 4;
- CHECK_EQ(num_priors_ * num_loc_classes_ * 4, bottom[0]->channels())
+ num_priors_ = bottom[2]->height() / code_size_;
+
+ CHECK_EQ(num_priors_ * num_loc_classes_ * code_size_, bottom[0]->channels())
<< "Number of priors must match number of location predictions.";
CHECK_EQ(num_priors_ * num_classes_, bottom[1]->channels())
<< "Number of priors must match number of confidence predictions.";
top_shape.push_back(1);
// Each row is a 7 dimension vector, which stores
// [image_id, label, confidence, xmin, ymin, xmax, ymax]
- top_shape.push_back(7);
+ top_shape.push_back(output_size_);
top[0]->Reshape(top_shape);
}
template <typename Ftype, typename Btype>
void DetectionOutputLayer<Ftype, Btype>::Forward_cpu(
const vector<Blob*>& bottom, const vector<Blob*>& top) {
+
const Ftype* loc_data = bottom[0]->cpu_data<Ftype>();
const Ftype* conf_data = bottom[1]->cpu_data<Ftype>();
const Ftype* prior_data = bottom[2]->cpu_data<Ftype>();
// Retrieve all location predictions.
vector<LabelBBox> all_loc_preds;
GetLocPredictions(loc_data, num, num_priors_, num_loc_classes_,
- share_location_, &all_loc_preds);
-
+ share_location_, &all_loc_preds, num_keypoint_);
// Retrieve all confidences.
vector<map<int, vector<float> > > all_conf_scores;
GetConfidenceScores(conf_data, num, num_priors_, num_classes_,
&all_conf_scores);
-
// Retrieve all prior bboxes. It is same within a batch since we assume all
// images in a batch are of same dimension.
vector<NormalizedBBox> prior_bboxes;
vector<vector<float> > prior_variances;
- GetPriorBBoxes(prior_data, num_priors_, &prior_bboxes, &prior_variances);
+ GetPriorBBoxes(prior_data, num_priors_, &prior_bboxes, &prior_variances, code_size_);
// Decode all loc predictions to bboxes.
vector<LabelBBox> all_decode_bboxes;
vector<int> top_shape(2, 1);
top_shape.push_back(num_kept);
- top_shape.push_back(7);
+ top_shape.push_back(output_size_);
Ftype* top_data;
if (num_kept == 0) {
LOG(INFO) << "Couldn't find any detections";
// Generate fake results per image.
for (int i = 0; i < num; ++i) {
top_data[0] = i;
- top_data += 7;
+ top_data += output_size_;
}
} else {
top[0]->Reshape(top_shape);
}
for (int j = 0; j < indices.size(); ++j) {
int idx = indices[j];
- top_data[count * 7] = i;
- top_data[count * 7 + 1] = label;
- top_data[count * 7 + 2] = scores[idx];
+ top_data[count * output_size_] = i;
+ top_data[count * output_size_ + 1] = label;
+ top_data[count * output_size_ + 2] = scores[idx];
const NormalizedBBox& bbox = bboxes[idx];
- top_data[count * 7 + 3] = bbox.xmin();
- top_data[count * 7 + 4] = bbox.ymin();
- top_data[count * 7 + 5] = bbox.xmax();
- top_data[count * 7 + 6] = bbox.ymax();
+ top_data[count * output_size_ + 3] = bbox.xmin();
+ top_data[count * output_size_ + 4] = bbox.ymin();
+ top_data[count * output_size_ + 5] = bbox.xmax();
+ top_data[count * output_size_ + 6] = bbox.ymax();
+
+ for ( int i = 7; i < output_size_; i+=2){
+ top_data[count * output_size_ + i] = bbox.keypoint_x((int)((i-7)/2));
+ top_data[count * output_size_ + i + 1] = bbox.keypoint_y((int)((i-7)/2));
+ }
+
if (need_save_) {
NormalizedBBox out_bbox;
OutputBBox(bbox, sizes_[name_count_], has_resize_, resize_param_,
&out_bbox);
- float score = top_data[count * 7 + 2];
+ float score = top_data[count * output_size_ + 2];
float xmin = out_bbox.xmin();
float ymin = out_bbox.ymin();
float xmax = out_bbox.xmax();