1 #include <boost/algorithm/string/classification.hpp>
2 #include <boost/algorithm/string/split.hpp>
3 #include <boost/filesystem.hpp>
4 #include <boost/foreach.hpp>
5 #include <boost/property_tree/json_parser.hpp>
6 #include <boost/property_tree/ptree.hpp>
7 #include <boost/property_tree/xml_parser.hpp>
8 #include <fcntl.h>
9 #include <google/protobuf/io/coded_stream.h>
10 #include <google/protobuf/io/zero_copy_stream_impl.h>
11 #include <google/protobuf/text_format.h>
12 #include <opencv2/core/core.hpp>
13 #include <opencv2/highgui/highgui.hpp>
14 #include <opencv2/imgproc/imgproc.hpp>
15 #include <fstream> // NOLINT(readability/streams)
16 #include <map>
17 #include <turbojpeg.h>
19 #include "caffe/blob.hpp"
20 #include "caffe/util/io.hpp"
22 const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte.
24 namespace caffe {
26 using namespace boost::property_tree; // NOLINT(build/namespaces)
27 using google::protobuf::io::FileInputStream;
28 using google::protobuf::io::FileOutputStream;
29 using google::protobuf::io::ZeroCopyInputStream;
30 using google::protobuf::io::CodedInputStream;
31 using google::protobuf::io::ZeroCopyOutputStream;
32 using google::protobuf::io::CodedOutputStream;
33 using google::protobuf::Message;
35 bool ReadProtoFromTextFile(const char* filename, Message* proto) {
36 int fd = open(filename, O_RDONLY);
37 CHECK_NE(fd, -1) << "File not found: " << filename;
38 FileInputStream* input = new FileInputStream(fd);
39 bool success = google::protobuf::TextFormat::Parse(input, proto);
40 delete input;
41 close(fd);
42 return success;
43 }
45 void WriteProtoToTextFile(const Message& proto, const char* filename) {
46 int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
47 FileOutputStream* output = new FileOutputStream(fd);
48 CHECK(google::protobuf::TextFormat::Print(proto, output));
49 delete output;
50 close(fd);
51 }
53 bool ReadProtoFromBinaryFile(const char* filename, Message* proto) {
54 int fd = open(filename, O_RDONLY);
55 CHECK_NE(fd, -1) << "File not found: " << filename;
56 ZeroCopyInputStream* raw_input = new FileInputStream(fd);
57 CodedInputStream* coded_input = new CodedInputStream(raw_input);
58 coded_input->SetTotalBytesLimit(kProtoReadBytesLimit, 536870912);
60 bool success = proto->ParseFromCodedStream(coded_input);
62 delete coded_input;
63 delete raw_input;
64 close(fd);
65 return success;
66 }
68 void WriteProtoToBinaryFile(const Message& proto, const char* filename) {
69 fstream output(filename, ios::out | ios::trunc | ios::binary);
70 CHECK(proto.SerializeToOstream(&output)) << "Possible reasons: no disk space, "
71 "no write permissions, the destination folder doesn't exist";
72 }
74 bool ReadFileToDatum(const string& filename, const int label, Datum* datum) {
75 std::streampos size;
77 fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);
78 if (file.is_open()) {
79 size = file.tellg();
80 std::string buffer(size, ' ');
81 file.seekg(0, ios::beg);
82 file.read(&buffer[0], size);
83 file.close();
84 datum->set_data(buffer);
85 datum->set_label(label);
86 datum->set_encoded(true);
87 return true;
88 } else {
89 return false;
90 }
91 }
93 /**
94 * Decode Datum to cv::Mat
95 * @param datum
96 * @param color_mode -1 enforce gray, 0 deduce from datum, +1 enforce color
97 * @param out
98 */
99 vector<int> DecodeDatumToCVMat(const Datum& datum, int color_mode, cv::Mat& cv_img,
100 bool shape_only, bool accurate_jpeg) {
101 CHECK(datum.encoded()) << "Datum not encoded";
102 const std::string& content = datum.data();
103 const size_t content_size = content.size();
104 return Decode(reinterpret_cast<const unsigned char*>(content.data()), content_size,
105 color_mode, &cv_img, nullptr, 0, shape_only, accurate_jpeg);
106 }
108 void DecodeDatumToSignedBuf(const Datum& datum, int color_mode,
109 char* buf, size_t buf_len, bool accurate_jpeg) {
110 CHECK(datum.encoded()) << "Datum not encoded";
111 const std::string& content = datum.data();
112 const size_t content_size = content.size();
113 Decode(reinterpret_cast<const unsigned char*>(content.data()), content_size,
114 color_mode, nullptr, buf, buf_len, false, accurate_jpeg);
115 }
117 // decodes to either cv_img or buf
118 vector<int> Decode(const unsigned char* content, size_t content_size, int color_mode,
119 cv::Mat* cv_img, char* buf, size_t buf_len, bool shape_only, bool accurate_jpeg) {
120 if (content_size > 1
121 && content[0] == 255
122 && content[1] == 216) { // probably jpeg
123 int width, height, subsamp;
124 auto *content_data = const_cast<unsigned char *>(content);
126 tjhandle jpeg_decoder = tjInitDecompress();
127 tjDecompressHeader2(jpeg_decoder, content_data, content_size, &width, &height, &subsamp);
129 int ch = subsamp == TJSAMP_GRAY ? 1 : 3;
130 if (color_mode < 0) {
131 ch = 1;
132 }
133 if (!shape_only) {
134 if (cv_img != nullptr) {
135 cv_img->create(height, width, ch == 3 ? CV_8UC3 : CV_8UC1);
136 } else {
137 CHECK_EQ(ch * height * width, buf_len);
138 }
139 if (0 > tjDecompress2(jpeg_decoder, content_data, content_size,
140 cv_img != nullptr ? cv_img->ptr<unsigned char>()
141 : reinterpret_cast<unsigned char*>(buf),
142 width,
143 0,
144 height,
145 ch == 3 ? TJPF_BGR : TJPF_GRAY, // TODO RGB?
146 (accurate_jpeg ? TJFLAG_ACCURATEDCT : TJFLAG_FASTDCT) |
147 TJFLAG_NOREALLOC)) {
148 return vector<int>{};
149 }
150 }
151 tjDestroy(jpeg_decoder);
153 if(cv_img->channels()<3 && color_mode>0) {
154 cv::cvtColor(*cv_img, *cv_img, cv::COLOR_GRAY2BGR);
155 }
156 return vector<int>{1, ch, height, width};
157 }
158 // probably not jpeg...
159 std::vector<char> vec_data(content, content + content_size);
160 const int flag = color_mode < 0 ? cv::IMREAD_GRAYSCALE :
161 (color_mode > 0 ? cv::IMREAD_COLOR : cv::IMREAD_ANYCOLOR);
162 if (cv_img != nullptr && !shape_only) {
163 *cv_img = cv::imdecode(vec_data, flag);
164 return vector<int>{1, cv_img->channels(), cv_img->rows, cv_img->cols};
165 }
166 cv::Mat img = cv::imdecode(vec_data, flag);
167 if (!shape_only) {
168 CHECK_EQ(img.channels() * img.rows * img.cols, buf_len);
169 std::memcpy(buf, img.data, buf_len); // NOLINT(caffe/alt_fn)
170 }
171 return vector<int>{1, img.channels(), img.rows, img.cols};
172 }
174 cv::Mat ReadImageToCVMat(const string& filename,
175 int height, int width, bool is_color, int short_side) {
176 cv::Mat cv_img_origin;
177 std::ifstream ifs(filename, std::ios::in | std::ios::binary);
178 if (!ifs) {
179 LOG(ERROR) << "Could not open or find file " << filename;
180 return cv_img_origin;
181 }
182 std::string content;
183 ifs.seekg(0, std::ios::end);
184 content.resize(ifs.tellg());
185 ifs.seekg(0, std::ios::beg);
186 ifs.read(&content.front(), content.size());
187 ifs.close();
189 cv::Mat cv_img;
190 vector<int> shape = Decode(reinterpret_cast<const unsigned char*>(content.data()), content.size(),
191 is_color ? 1 : -1, &cv_img_origin, nullptr, 0, false, true);
192 if (shape.size() == 0) {
193 int cv_read_flag = is_color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE;
194 // Trying this one. It's slow but might decode better
195 cv_img_origin = cv::imread(filename, cv_read_flag);
196 }
197 if (cv_img_origin.data) {
198 if (is_color && cv_img_origin.channels() < 3) {
199 cv::cvtColor(cv_img_origin, cv_img, CV_GRAY2RGB);
200 }
201 if (short_side > 0) {
202 if (cv_img_origin.rows > cv_img_origin.cols) {
203 width = short_side;
204 height = cv_img_origin.rows * short_side / cv_img_origin.cols;
205 } else {
206 height = short_side;
207 width = cv_img_origin.cols * short_side / cv_img_origin.rows;
208 }
209 }
210 if (height <= 0 || width <= 0) {
211 return cv_img.data ? cv_img : cv_img_origin;
212 }
213 cv::Size sz(width, height);
214 if (cv_img.data) {
215 cv::resize(cv_img, cv_img_origin, sz, 0., 0., CV_INTER_LINEAR);
216 return cv_img_origin;
217 }
218 cv::resize(cv_img_origin, cv_img, sz, 0., 0., CV_INTER_LINEAR);
219 } else {
220 LOG(ERROR) << "Could not decode file " << filename;
221 }
222 return cv_img;
223 }
225 cv::Mat ReadImageToCVMat(const string& filename, const int height,
226 const int width, const int min_dim, const int max_dim,
227 const bool is_color) {
228 cv::Mat cv_img;
229 int cv_read_flag = (is_color ? CV_LOAD_IMAGE_COLOR :
230 CV_LOAD_IMAGE_GRAYSCALE);
231 cv::Mat cv_img_origin = cv::imread(filename, cv_read_flag);
232 if (!cv_img_origin.data) {
233 LOG(ERROR) << "Could not open or find file " << filename;
234 return cv_img_origin;
235 }
236 if (min_dim > 0 || max_dim > 0) {
237 int num_rows = cv_img_origin.rows;
238 int num_cols = cv_img_origin.cols;
239 int min_num = std::min(num_rows, num_cols);
240 int max_num = std::max(num_rows, num_cols);
241 float scale_factor = 1;
242 if (min_dim > 0 && min_num < min_dim) {
243 scale_factor = static_cast<float>(min_dim) / min_num;
244 }
245 if (max_dim > 0 && static_cast<int>(scale_factor * max_num) > max_dim) {
246 // Make sure the maximum dimension is less than max_dim.
247 scale_factor = static_cast<float>(max_dim) / max_num;
248 }
249 if (scale_factor == 1) {
250 cv_img = cv_img_origin;
251 } else {
252 cv::resize(cv_img_origin, cv_img, cv::Size(0, 0),
253 scale_factor, scale_factor);
254 }
255 } else if (height > 0 && width > 0) {
256 cv::resize(cv_img_origin, cv_img, cv::Size(width, height));
257 } else {
258 cv_img = cv_img_origin;
259 }
260 return cv_img;
261 }
263 cv::Mat ReadImageToCVMat(const string& filename, const int height,
264 const int width, const int min_dim, const int max_dim) {
265 return ReadImageToCVMat(filename, height, width, min_dim, max_dim, true);
266 }
268 cv::Mat ReadImageToCVMat(const string& filename,
269 const int height, const int width) {
270 return ReadImageToCVMat(filename, height, width, true);
271 }
273 cv::Mat ReadImageToCVMat(const string& filename,
274 const bool is_color) {
275 return ReadImageToCVMat(filename, 0, 0, is_color);
276 }
278 cv::Mat ReadImageToCVMat(const string& filename) {
279 return ReadImageToCVMat(filename, 0, 0, true);
280 }
282 // Do the file extension and encoding match?
283 static bool matchExt(const std::string & fn,
284 std::string en) {
285 size_t p = fn.rfind('.');
286 std::string ext = p != fn.npos ? fn.substr(p) : fn;
287 std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
288 std::transform(en.begin(), en.end(), en.begin(), ::tolower);
289 if ( ext == en )
290 return true;
291 if ( en == "jpg" && ext == "jpeg" )
292 return true;
293 return false;
294 }
296 bool ReadImageToDatum(const string& filename, const int label,
297 const int height, const int width, const bool is_color,
298 const std::string & encoding, Datum* datum) {
299 cv::Mat cv_img = ReadImageToCVMat(filename, height, width, is_color);
300 if (cv_img.data) {
301 if (encoding.size()) {
302 if ( (cv_img.channels() == 3) == is_color && !height && !width &&
303 matchExt(filename, encoding) ) {
304 return ReadFileToDatum(filename, label, datum);
305 }
306 std::vector<uchar> buf;
307 cv::imencode("."+encoding, cv_img, buf);
308 datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),
309 buf.size()));
310 datum->set_label(label);
311 datum->set_encoded(true);
312 return true;
313 }
314 CVMatToDatum(cv_img, *datum);
315 datum->set_label(label);
316 return true;
317 } else {
318 return false;
319 }
320 }
322 bool ReadImageToDatum(const string& filename, const int label,
323 const int height, const int width, const int min_dim, const int max_dim,
324 const bool is_color, const std::string & encoding, Datum* datum) {
325 cv::Mat cv_img = ReadImageToCVMat(filename, height, width, min_dim, max_dim,
326 is_color);
327 if (cv_img.data) {
328 if (encoding.size()) {
329 if ( (cv_img.channels() == 3) == is_color && !height && !width &&
330 !min_dim && !max_dim && matchExt(filename, encoding) ) {
331 datum->set_channels(cv_img.channels());
332 datum->set_height(cv_img.rows);
333 datum->set_width(cv_img.cols);
334 return ReadFileToDatum(filename, label, datum);
335 }
336 EncodeCVMatToDatum(cv_img, encoding, datum);
337 datum->set_label(label);
338 return true;
339 }
340 CVMatToDatum(cv_img, *datum);
341 datum->set_label(label);
342 return true;
343 } else {
344 return false;
345 }
346 }
348 void GetImageSize(const string& filename, int* height, int* width) {
349 cv::Mat cv_img = cv::imread(filename);
350 if (!cv_img.data) {
351 LOG(ERROR) << "Could not open or find file " << filename;
352 return;
353 }
354 *height = cv_img.rows;
355 *width = cv_img.cols;
356 }
358 bool ReadRichImageToAnnotatedDatum(const string& filename,
359 const string& labelfile, const int height, const int width,
360 const int min_dim, const int max_dim, const bool is_color,
361 const string& encoding, const AnnotatedDatum_AnnotationType type,
362 const string& labeltype, const std::map<string, int>& name_to_label,
363 AnnotatedDatum* anno_datum) {
364 // Read image to datum.
365 bool status = ReadImageToDatum(filename, -1, height, width,
366 min_dim, max_dim, is_color, encoding,
367 anno_datum->mutable_datum());
368 if (status == false) {
369 return status;
370 }
371 anno_datum->clear_annotation_group();
372 if (!boost::filesystem::exists(labelfile)) {
373 return true;
374 }
375 switch (type) {
376 case AnnotatedDatum_AnnotationType_BBOX:
377 int ori_height, ori_width;
378 GetImageSize(filename, &ori_height, &ori_width);
379 if (labeltype == "xml") {
380 return ReadXMLToAnnotatedDatum(labelfile, ori_height, ori_width,
381 name_to_label, anno_datum);
382 } else if (labeltype == "json") {
383 return ReadJSONToAnnotatedDatum(labelfile, ori_height, ori_width,
384 name_to_label, anno_datum);
385 } else if (labeltype == "txt") {
386 return ReadTxtToAnnotatedDatum(labelfile, ori_height, ori_width,
387 anno_datum);
388 } else {
389 LOG(FATAL) << "Unknown label file type.";
390 return false;
391 }
392 break;
393 default:
394 LOG(FATAL) << "Unknown annotation type.";
395 return false;
396 }
397 }
399 //bool ReadFileToDatum(const string& filename, const int label,
400 // Datum* datum) {
401 // std::streampos size;
402 //
403 // fstream file(filename.c_str(), ios::in|ios::binary|ios::ate);
404 // if (file.is_open()) {
405 // size = file.tellg();
406 // std::string buffer(size, ' ');
407 // file.seekg(0, ios::beg);
408 // file.read(&buffer[0], size);
409 // file.close();
410 // datum->set_data(buffer);
411 // datum->set_label(label);
412 // datum->set_encoded(true);
413 // return true;
414 // } else {
415 // return false;
416 // }
417 //}
419 // Parse VOC/ILSVRC detection annotation.
420 bool ReadXMLToAnnotatedDatum(const string& labelfile, const int img_height,
421 const int img_width, const std::map<string, int>& name_to_label,
422 AnnotatedDatum* anno_datum) {
423 ptree pt;
424 read_xml(labelfile, pt);
426 // Parse annotation.
427 int width = 0, height = 0;
428 try {
429 height = pt.get<int>("annotation.size.height");
430 width = pt.get<int>("annotation.size.width");
431 } catch (const ptree_error &e) {
432 LOG(WARNING) << "When parsing " << labelfile << ": " << e.what();
433 height = img_height;
434 width = img_width;
435 }
436 LOG_IF(WARNING, height != img_height) << labelfile <<
437 " inconsistent image height.";
438 LOG_IF(WARNING, width != img_width) << labelfile <<
439 " inconsistent image width.";
440 CHECK(width != 0 && height != 0) << labelfile <<
441 " no valid image width/height.";
442 int instance_id = 0;
443 BOOST_FOREACH(ptree::value_type &v1, pt.get_child("annotation")) {
444 ptree pt1 = v1.second;
445 if (v1.first == "object") {
446 Annotation* anno = NULL;
447 bool difficult = false;
448 ptree object = v1.second;
449 BOOST_FOREACH(ptree::value_type &v2, object.get_child("")) {
450 ptree pt2 = v2.second;
451 if (v2.first == "name") {
452 string name = pt2.data();
453 if (name_to_label.find(name) == name_to_label.end()) {
454 LOG(FATAL) << "Unknown name: " << name;
455 }
456 int label = name_to_label.find(name)->second;
457 bool found_group = false;
458 for (int g = 0; g < anno_datum->annotation_group_size(); ++g) {
459 AnnotationGroup* anno_group =
460 anno_datum->mutable_annotation_group(g);
461 if (label == anno_group->group_label()) {
462 if (anno_group->annotation_size() == 0) {
463 instance_id = 0;
464 } else {
465 instance_id = anno_group->annotation(
466 anno_group->annotation_size() - 1).instance_id() + 1;
467 }
468 anno = anno_group->add_annotation();
469 found_group = true;
470 }
471 }
472 if (!found_group) {
473 // If there is no such annotation_group, create a new one.
474 AnnotationGroup* anno_group = anno_datum->add_annotation_group();
475 anno_group->set_group_label(label);
476 anno = anno_group->add_annotation();
477 instance_id = 0;
478 }
479 anno->set_instance_id(instance_id++);
480 } else if (v2.first == "difficult") {
481 difficult = pt2.data() == "1";
482 NormalizedBBox* bbox = anno->mutable_bbox();
483 bbox->set_difficult(difficult);
484 } else if (v2.first == "bndbox") {
485 int xmin = pt2.get("xmin", 0);
486 int ymin = pt2.get("ymin", 0);
487 int xmax = pt2.get("xmax", 0);
488 int ymax = pt2.get("ymax", 0);
489 CHECK_NOTNULL(anno);
490 LOG_IF(WARNING, xmin > width) << labelfile <<
491 " bounding box exceeds image boundary.";
492 LOG_IF(WARNING, ymin > height) << labelfile <<
493 " bounding box exceeds image boundary.";
494 LOG_IF(WARNING, xmax > width) << labelfile <<
495 " bounding box exceeds image boundary.";
496 LOG_IF(WARNING, ymax > height) << labelfile <<
497 " bounding box exceeds image boundary.";
498 LOG_IF(WARNING, xmin < 0) << labelfile <<
499 " bounding box exceeds image boundary.";
500 LOG_IF(WARNING, ymin < 0) << labelfile <<
501 " bounding box exceeds image boundary.";
502 LOG_IF(WARNING, xmax < 0) << labelfile <<
503 " bounding box exceeds image boundary.";
504 LOG_IF(WARNING, ymax < 0) << labelfile <<
505 " bounding box exceeds image boundary.";
506 LOG_IF(WARNING, xmin > xmax) << labelfile <<
507 " bounding box irregular.";
508 LOG_IF(WARNING, ymin > ymax) << labelfile <<
509 " bounding box irregular.";
510 // Store the normalized bounding box.
511 NormalizedBBox* bbox = anno->mutable_bbox();
512 bbox->set_xmin(static_cast<float>(xmin) / width);
513 bbox->set_ymin(static_cast<float>(ymin) / height);
514 bbox->set_xmax(static_cast<float>(xmax) / width);
515 bbox->set_ymax(static_cast<float>(ymax) / height);
516 bbox->set_difficult(difficult);
517 }
518 }
519 }
520 }
521 return true;
522 }
524 // Parse MSCOCO detection annotation.
525 bool ReadJSONToAnnotatedDatum(const string& labelfile, const int img_height,
526 const int img_width, const std::map<string, int>& name_to_label,
527 AnnotatedDatum* anno_datum) {
528 ptree pt;
529 read_json(labelfile, pt);
531 // Get image info.
532 int width = 0, height = 0;
533 try {
534 height = pt.get<int>("image.height");
535 width = pt.get<int>("image.width");
536 } catch (const ptree_error &e) {
537 LOG(WARNING) << "When parsing " << labelfile << ": " << e.what();
538 height = img_height;
539 width = img_width;
540 }
541 LOG_IF(WARNING, height != img_height) << labelfile <<
542 " inconsistent image height.";
543 LOG_IF(WARNING, width != img_width) << labelfile <<
544 " inconsistent image width.";
545 CHECK(width != 0 && height != 0) << labelfile <<
546 " no valid image width/height.";
548 // Get annotation info.
549 int instance_id = 0;
550 BOOST_FOREACH(ptree::value_type& v1, pt.get_child("annotation")) {
551 Annotation* anno = NULL;
552 bool iscrowd = false;
553 ptree object = v1.second;
554 // Get category_id.
555 string name = object.get<string>("category_id");
556 if (name_to_label.find(name) == name_to_label.end()) {
557 LOG(FATAL) << "Unknown name: " << name;
558 }
559 int label = name_to_label.find(name)->second;
560 bool found_group = false;
561 for (int g = 0; g < anno_datum->annotation_group_size(); ++g) {
562 AnnotationGroup* anno_group =
563 anno_datum->mutable_annotation_group(g);
564 if (label == anno_group->group_label()) {
565 if (anno_group->annotation_size() == 0) {
566 instance_id = 0;
567 } else {
568 instance_id = anno_group->annotation(
569 anno_group->annotation_size() - 1).instance_id() + 1;
570 }
571 anno = anno_group->add_annotation();
572 found_group = true;
573 }
574 }
575 if (!found_group) {
576 // If there is no such annotation_group, create a new one.
577 AnnotationGroup* anno_group = anno_datum->add_annotation_group();
578 anno_group->set_group_label(label);
579 anno = anno_group->add_annotation();
580 instance_id = 0;
581 }
582 anno->set_instance_id(instance_id++);
584 // Get iscrowd.
585 iscrowd = object.get<int>("iscrowd", 0);
587 // Get bbox.
588 vector<float> bbox_items;
589 BOOST_FOREACH(ptree::value_type& v2, object.get_child("bbox")) {
590 bbox_items.push_back(v2.second.get_value<float>());
591 }
592 CHECK_EQ(bbox_items.size(), 4);
593 float xmin = bbox_items[0];
594 float ymin = bbox_items[1];
595 float xmax = bbox_items[0] + bbox_items[2];
596 float ymax = bbox_items[1] + bbox_items[3];
597 CHECK_NOTNULL(anno);
598 LOG_IF(WARNING, xmin > width) << labelfile <<
599 " bounding box exceeds image boundary.";
600 LOG_IF(WARNING, ymin > height) << labelfile <<
601 " bounding box exceeds image boundary.";
602 LOG_IF(WARNING, xmax > width) << labelfile <<
603 " bounding box exceeds image boundary.";
604 LOG_IF(WARNING, ymax > height) << labelfile <<
605 " bounding box exceeds image boundary.";
606 LOG_IF(WARNING, xmin < 0) << labelfile <<
607 " bounding box exceeds image boundary.";
608 LOG_IF(WARNING, ymin < 0) << labelfile <<
609 " bounding box exceeds image boundary.";
610 LOG_IF(WARNING, xmax < 0) << labelfile <<
611 " bounding box exceeds image boundary.";
612 LOG_IF(WARNING, ymax < 0) << labelfile <<
613 " bounding box exceeds image boundary.";
614 LOG_IF(WARNING, xmin > xmax) << labelfile <<
615 " bounding box irregular.";
616 LOG_IF(WARNING, ymin > ymax) << labelfile <<
617 " bounding box irregular.";
618 // Store the normalized bounding box.
619 NormalizedBBox* bbox = anno->mutable_bbox();
620 bbox->set_xmin(xmin / width);
621 bbox->set_ymin(ymin / height);
622 bbox->set_xmax(xmax / width);
623 bbox->set_ymax(ymax / height);
624 bbox->set_difficult(iscrowd);
625 }
626 return true;
627 }
629 // Parse plain txt detection annotation: label_id, xmin, ymin, xmax, ymax.
630 bool ReadTxtToAnnotatedDatum(const string& labelfile, const int height,
631 const int width, AnnotatedDatum* anno_datum) {
632 std::ifstream infile(labelfile.c_str());
633 if (!infile.good()) {
634 LOG(INFO) << "Cannot open " << labelfile;
635 return false;
636 }
637 int label;
638 float xmin, ymin, xmax, ymax;
639 while (infile >> label >> xmin >> ymin >> xmax >> ymax) {
640 Annotation* anno = NULL;
641 int instance_id = 0;
642 bool found_group = false;
643 for (int g = 0; g < anno_datum->annotation_group_size(); ++g) {
644 AnnotationGroup* anno_group = anno_datum->mutable_annotation_group(g);
645 if (label == anno_group->group_label()) {
646 if (anno_group->annotation_size() == 0) {
647 instance_id = 0;
648 } else {
649 instance_id = anno_group->annotation(
650 anno_group->annotation_size() - 1).instance_id() + 1;
651 }
652 anno = anno_group->add_annotation();
653 found_group = true;
654 }
655 }
656 if (!found_group) {
657 // If there is no such annotation_group, create a new one.
658 AnnotationGroup* anno_group = anno_datum->add_annotation_group();
659 anno_group->set_group_label(label);
660 anno = anno_group->add_annotation();
661 instance_id = 0;
662 }
663 anno->set_instance_id(instance_id++);
664 LOG_IF(WARNING, xmin > width) << labelfile <<
665 " bounding box exceeds image boundary.";
666 LOG_IF(WARNING, ymin > height) << labelfile <<
667 " bounding box exceeds image boundary.";
668 LOG_IF(WARNING, xmax > width) << labelfile <<
669 " bounding box exceeds image boundary.";
670 LOG_IF(WARNING, ymax > height) << labelfile <<
671 " bounding box exceeds image boundary.";
672 LOG_IF(WARNING, xmin < 0) << labelfile <<
673 " bounding box exceeds image boundary.";
674 LOG_IF(WARNING, ymin < 0) << labelfile <<
675 " bounding box exceeds image boundary.";
676 LOG_IF(WARNING, xmax < 0) << labelfile <<
677 " bounding box exceeds image boundary.";
678 LOG_IF(WARNING, ymax < 0) << labelfile <<
679 " bounding box exceeds image boundary.";
680 LOG_IF(WARNING, xmin > xmax) << labelfile <<
681 " bounding box irregular.";
682 LOG_IF(WARNING, ymin > ymax) << labelfile <<
683 " bounding box irregular.";
684 // Store the normalized bounding box.
685 NormalizedBBox* bbox = anno->mutable_bbox();
686 bbox->set_xmin(xmin / width);
687 bbox->set_ymin(ymin / height);
688 bbox->set_xmax(xmax / width);
689 bbox->set_ymax(ymax / height);
690 bbox->set_difficult(false);
691 }
692 return true;
693 }
695 bool ReadLabelFileToLabelMap(const string& filename, bool include_background,
696 const string& delimiter, LabelMap* map) {
697 // cleanup
698 map->Clear();
700 std::ifstream file(filename.c_str());
701 string line;
702 // Every line can have [1, 3] number of fields.
703 // The delimiter between fields can be one of " :;".
704 // The order of the fields are:
705 // name [label] [display_name]
706 // ...
707 int field_size = -1;
708 int label = 0;
709 LabelMapItem* map_item;
710 // Add background (none_of_the_above) class.
711 if (include_background) {
712 map_item = map->add_item();
713 map_item->set_name("none_of_the_above");
714 map_item->set_label(label++);
715 map_item->set_display_name("background");
716 }
717 while (std::getline(file, line)) {
718 vector<string> fields;
719 fields.clear();
720 boost::split(fields, line, boost::is_any_of(delimiter));
721 if (field_size == -1) {
722 field_size = fields.size();
723 } else {
724 CHECK_EQ(field_size, fields.size())
725 << "Inconsistent number of fields per line.";
726 }
727 map_item = map->add_item();
728 map_item->set_name(fields[0]);
729 switch (field_size) {
730 case 1:
731 map_item->set_label(label++);
732 map_item->set_display_name(fields[0]);
733 break;
734 case 2:
735 label = std::atoi(fields[1].c_str());
736 map_item->set_label(label);
737 map_item->set_display_name(fields[0]);
738 break;
739 case 3:
740 label = std::atoi(fields[1].c_str());
741 map_item->set_label(label);
742 map_item->set_display_name(fields[2]);
743 break;
744 default:
745 LOG(FATAL) << "The number of fields should be [1, 3].";
746 break;
747 }
748 }
749 return true;
750 }
752 bool MapNameToLabel(const LabelMap& map, const bool strict_check,
753 std::map<string, int>* name_to_label) {
754 // cleanup
755 name_to_label->clear();
757 for (int i = 0; i < map.item_size(); ++i) {
758 const string& name = map.item(i).name();
759 const int label = map.item(i).label();
760 if (strict_check) {
761 if (!name_to_label->insert(std::make_pair(name, label)).second) {
762 LOG(FATAL) << "There are many duplicates of name: " << name;
763 return false;
764 }
765 } else {
766 (*name_to_label)[name] = label;
767 }
768 }
769 return true;
770 }
772 bool MapLabelToName(const LabelMap& map, const bool strict_check,
773 std::map<int, string>* label_to_name) {
774 // cleanup
775 label_to_name->clear();
777 for (int i = 0; i < map.item_size(); ++i) {
778 const string& name = map.item(i).name();
779 const int label = map.item(i).label();
780 if (strict_check) {
781 if (!label_to_name->insert(std::make_pair(label, name)).second) {
782 LOG(FATAL) << "There are many duplicates of label: " << label;
783 return false;
784 }
785 } else {
786 (*label_to_name)[label] = name;
787 }
788 }
789 return true;
790 }
792 bool MapLabelToDisplayName(const LabelMap& map, const bool strict_check,
793 std::map<int, string>* label_to_display_name) {
794 // cleanup
795 label_to_display_name->clear();
797 for (int i = 0; i < map.item_size(); ++i) {
798 const string& display_name = map.item(i).display_name();
799 const int label = map.item(i).label();
800 if (strict_check) {
801 if (!label_to_display_name->insert(
802 std::make_pair(label, display_name)).second) {
803 LOG(FATAL) << "There are many duplicates of label: " << label;
804 return false;
805 }
806 } else {
807 (*label_to_display_name)[label] = display_name;
808 }
809 }
810 return true;
811 }
812 cv::Mat DecodeDatumToCVMatNative(const Datum& datum) {
813 cv::Mat cv_img;
814 DecodeDatumToCVMat(datum, 0, cv_img, false);
815 return cv_img;
816 }
818 // tests only, TODO: clean
819 cv::Mat DecodeDatumToCVMat(const Datum& datum, bool is_color) {
820 cv::Mat cv_img;
821 DecodeDatumToCVMat(datum, is_color ? 1 : -1, cv_img, false);
822 return cv_img;
823 }
825 // If Datum is encoded will decoded using DecodeDatumToCVMat and CVMatToDatum
826 // If Datum is not encoded will do nothing
827 bool DecodeDatumNative(Datum* datum) {
828 if (datum->encoded()) {
829 cv::Mat cv_img;
830 DecodeDatumToCVMat(*datum, 0, cv_img, false);
831 CVMatToDatum(cv_img, *datum);
832 return true;
833 } else {
834 return false;
835 }
836 }
838 bool DecodeDatum(Datum* datum, bool is_color) {
839 if (datum->encoded()) {
840 cv::Mat cv_img;
841 DecodeDatumToCVMat(*datum, is_color ? 1 : 0, cv_img, false);
842 CVMatToDatum(cv_img, *datum);
843 return true;
844 } else {
845 return false;
846 }
847 }
849 void EncodeCVMatToDatum(const cv::Mat& cv_img, const string& encoding,
850 Datum* datum) {
851 std::vector<uchar> buf;
852 cv::imencode("."+encoding, cv_img, buf);
853 datum->set_data(std::string(reinterpret_cast<char*>(&buf[0]),
854 buf.size()));
855 datum->set_channels(cv_img.channels());
856 datum->set_height(cv_img.rows);
857 datum->set_width(cv_img.cols);
858 datum->set_encoded(true);
859 }
861 vector<int> DatumToCVMat(const Datum& datum, cv::Mat& img, bool shape_only) {
862 if (datum.encoded()) {
863 LOG(FATAL) << "Datum encoded";
864 }
865 const int datum_channels = datum.channels();
866 const int datum_height = datum.height();
867 const int datum_width = datum.width();
868 if (shape_only) {
869 return vector<int>{1, datum_channels, datum_height, datum_width};
870 }
871 const int datum_size = datum_channels * datum_height * datum_width;
872 CHECK_GT(datum_channels, 0);
873 CHECK_GT(datum_height, 0);
874 CHECK_GT(datum_width, 0);
875 img.create(datum_height, datum_width, CVFC<float>(datum_channels));
876 CHECK_EQ(img.channels(), datum_channels);
877 CHECK_EQ(img.rows, datum_height);
878 CHECK_EQ(img.cols, datum_width);
879 const std::string& datum_buf = datum.data();
880 CHECK_EQ(datum_buf.size(), datum_size);
881 // CHW -> HWC
882 chw2hwc(datum_channels, datum_width, datum_height,
883 reinterpret_cast<const unsigned char*>(&datum_buf.front()), img.ptr<float>(0));
884 return vector<int>{1, datum_channels, datum_height, datum_width};
885 }
888 void CVMatToDatum(const cv::Mat& cv_img, Datum& datum) {
889 const unsigned int img_channels = cv_img.channels();
890 const unsigned int img_height = cv_img.rows;
891 const unsigned int img_width = cv_img.cols;
892 const unsigned int img_size = img_channels * img_height * img_width;
893 CHECK_GT(img_channels, 0);
894 CHECK_GT(img_height, 0);
895 CHECK_GT(img_width, 0);
896 string* buf = datum.release_data();
897 if (buf == nullptr || buf->size() != img_size) {
898 delete buf;
899 buf = new string(img_size, 0);
900 }
901 unsigned char* buf_front = reinterpret_cast<unsigned char*>(&buf->front());
902 // HWC -> CHW
903 if (cv_img.depth() == CV_8U) {
904 hwc2chw(img_channels, img_width, img_height, cv_img.ptr<unsigned char>(0), buf_front);
905 } else if (cv_img.depth() == CV_32F) {
906 hwc2chw(img_channels, img_width, img_height, cv_img.ptr<float>(0), buf_front);
907 } else if (cv_img.depth() == CV_64F) {
908 hwc2chw(img_channels, img_width, img_height, cv_img.ptr<double>(0), buf_front);
909 }
910 datum.set_allocated_data(buf);
911 datum.set_channels(img_channels);
912 datum.set_height(img_height);
913 datum.set_width(img_width);
914 datum.set_encoded(false);
915 }
917 } // namespace caffe