summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 4c2c197)
raw | patch | inline | side by side (parent: 4c2c197)
author | Yangqing Jia <jiayq84@gmail.com> | |
Sat, 26 Oct 2013 16:42:42 +0000 (09:42 -0700) | ||
committer | Yangqing Jia <jiayq84@gmail.com> | |
Sat, 26 Oct 2013 16:45:34 +0000 (09:45 -0700) |
Makefile | patch | blob | history | |
examples/convert_imageset.cpp | [new file with mode: 0644] | patch | blob |
include/caffe/util/io.hpp | patch | blob | history | |
src/caffe/util/io.cpp | patch | blob | history |
diff --git a/Makefile b/Makefile
index 7b28bfba0e67b6583e26d5f2135e47baa1a53b69..6bdab19f1baa8ecb46806b90c2a76dff1fac2510 100644 (file)
--- a/Makefile
+++ b/Makefile
# We put src here just for gtest
INCLUDE_DIRS := ./src ./include /usr/local/include $(CUDA_INCLUDE_DIR) $(MKL_INCLUDE_DIR)
LIBRARY_DIRS := /usr/lib /usr/local/lib $(CUDA_LIB_DIR) $(MKL_LIB_DIR)
-LIBRARIES := cuda cudart cublas protobuf glog mkl_rt mkl_intel_thread curand \
- leveldb snappy pthread
+LIBRARIES := cuda cudart cublas curand protobuf opencv_core opencv_highgui \
+ glog mkl_rt mkl_intel_thread leveldb snappy pthread
WARNINGS := -Wall
COMMON_FLAGS := $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir))
diff --git a/examples/convert_imageset.cpp b/examples/convert_imageset.cpp
--- /dev/null
@@ -0,0 +1,81 @@
+// Copyright 2013 Yangqing Jia
+// This program converts a set of images to a leveldb by storing them as Datum
+// proto buffers.
+// Usage:
+// convert_dataset ROOTFOLDER LISTFILE DB_NAME [0/1]
+// where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
+// should be a list of files as well as their labels, in the format as
+// subfolder1/file1.JPEG 7
+// ....
+// if the last argument is 1, a random shuffle will be carried out before we
+// process the file lines.
+// You are responsible for shuffling the files yourself.
+
+#include <glog/logging.h>
+#include <leveldb/db.h>
+#include <leveldb/write_batch.h>
+
+#include <algorithm>
+#include <string>
+#include <iostream>
+#include <fstream>
+
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/util/io.hpp"
+
+using namespace caffe;
+using std::pair;
+using std::string;
+using std::stringstream;
+
+int main(int argc, char** argv) {
+ ::google::InitGoogleLogging(argv[0]);
+ std::ifstream infile(argv[2]);
+ vector<pair<string, int> > lines;
+ string filename;
+ int label;
+ while (infile >> filename >> label) {
+ lines.push_back(std::make_pair(filename, label));
+ }
+ if (argc == 5 && argv[4][0] == '1') {
+ // randomly shuffle data
+ LOG(INFO) << "Shuffling data";
+ std::random_shuffle(lines.begin(), lines.end());
+ }
+ LOG(INFO) << "A total of " << lines.size() << "images.";
+
+ leveldb::DB* db;
+ leveldb::Options options;
+ options.error_if_exists = true;
+ options.create_if_missing = true;
+ options.write_buffer_size = 268435456;
+ LOG(INFO) << "Opening leveldb " << argv[3];
+ leveldb::Status status = leveldb::DB::Open(
+ options, argv[3], &db);
+ CHECK(status.ok()) << "Failed to open leveldb " << argv[3];
+
+ string root_folder(argv[1]);
+ Datum datum;
+ int count = 0;
+ char key_cstr[100];
+ leveldb::WriteBatch* batch = new leveldb::WriteBatch();
+ for (int line_id = 0; line_id < lines.size(); ++line_id) {
+ ReadImageToDatum(root_folder + lines[line_id].first, lines[line_id].second,
+ &datum);
+ // sequential
+ sprintf(key_cstr, "%08d_%s", line_id, lines[line_id].first.c_str());
+ string value;
+ // get the value
+ datum.SerializeToString(&value);
+ batch->Put(string(key_cstr), value);
+ if (++count % 1000 == 0) {
+ db->Write(leveldb::WriteOptions(), batch);
+ LOG(ERROR) << "Processed " << count << " files.";
+ delete batch;
+ batch = new leveldb::WriteBatch();
+ }
+ }
+
+ delete db;
+ return 0;
+}
index 0dce4e7e83e136ef44ad7df8aab4143cc0485d77..03df4b2e84a146e4ddf9524402bf637bd78ac4f1 100644 (file)
WriteProtoToBinaryFile(proto, filename.c_str());
}
+void ReadImageToDatum(const string& filename, const int label, Datum* datum);
} // namespace caffe
diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp
index 5a88b9d215317c4e406b945a769b880cb1843784..5e5510f55d31055d2d4346cd82b267f119955069 100644 (file)
--- a/src/caffe/util/io.cpp
+++ b/src/caffe/util/io.cpp
#include <google/protobuf/text_format.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/io/coded_stream.h>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
#include <algorithm>
#include <string>
CHECK(proto.SerializeToOstream(&output));
}
+
+void ReadImageToDatum(const string& filename, const int label, Datum* datum) {
+ Mat cv_img;
+ cv_img = cv::imread(filename, CV_LOAD_IMAGE_COLOR);
+ CHECK(cv_img.data) << "Could not open or find the image.";
+ datum->set_channels(3);
+ datum->set_height(cv_img.rows);
+ datum->set_width(cv_img.cols);
+ datum->set_label(label);
+ datum->clear_data();
+ datum->clear_float_data();
+ string* datum_string = datum->mutable_data();
+ for (int c = 0; c < 3; ++c) {
+ for (int h = 0; h < cv_img.rows; ++h) {
+ for (int w = 0; w < cv_img.cols; ++w) {
+ datum_string->push_back(static_cast<char>(cv_img.at<Vec3b>(h, w)[c]));
+ }
+ }
+ }
+}
+
} // namespace caffe