1 // Copyright 2013 Yangqing Jia
2 // This program converts a set of images to a leveldb by storing them as Datum
3 // proto buffers.
4 // Usage:
5 // convert_dataset ROOTFOLDER LISTFILE DB_NAME [0/1]
6 // where ROOTFOLDER is the root folder that holds all the images, and LISTFILE
7 // should be a list of files as well as their labels, in the format as
8 // subfolder1/file1.JPEG 7
9 // ....
10 // if the last argument is 1, a random shuffle will be carried out before we
11 // process the file lines.
12 // You are responsible for shuffling the files yourself.
14 #include <glog/logging.h>
15 #include <leveldb/db.h>
16 #include <leveldb/write_batch.h>
18 #include <algorithm>
19 #include <string>
20 #include <iostream>
21 #include <fstream>
23 #include "caffe/proto/caffe.pb.h"
24 #include "caffe/util/io.hpp"
26 using namespace caffe;
27 using std::pair;
28 using std::string;
29 using std::stringstream;
31 int main(int argc, char** argv) {
32 ::google::InitGoogleLogging(argv[0]);
33 if (argc < 4) {
34 LOG(ERROR) << "Usage: convert_imageset ROOTFOLDER LISTFILE DB_NAME [0/1]";
35 return 0;
36 }
37 std::ifstream infile(argv[2]);
38 std::vector<std::pair<string, int> > lines;
39 string filename;
40 int label;
41 while (infile >> filename >> label) {
42 lines.push_back(std::make_pair(filename, label));
43 }
44 if (argc == 5 && argv[4][0] == '1') {
45 // randomly shuffle data
46 LOG(INFO) << "Shuffling data";
47 std::random_shuffle(lines.begin(), lines.end());
48 }
49 LOG(INFO) << "A total of " << lines.size() << " images.";
51 leveldb::DB* db;
52 leveldb::Options options;
53 options.error_if_exists = true;
54 options.create_if_missing = true;
55 options.write_buffer_size = 268435456;
56 LOG(INFO) << "Opening leveldb " << argv[3];
57 leveldb::Status status = leveldb::DB::Open(
58 options, argv[3], &db);
59 CHECK(status.ok()) << "Failed to open leveldb " << argv[3];
61 string root_folder(argv[1]);
62 Datum datum;
63 int count = 0;
64 char key_cstr[100];
65 leveldb::WriteBatch* batch = new leveldb::WriteBatch();
66 for (int line_id = 0; line_id < lines.size(); ++line_id) {
67 if (!ReadImageToDatum(root_folder + lines[line_id].first, lines[line_id].second,
68 &datum)) {
69 continue;
70 };
71 // sequential
72 sprintf(key_cstr, "%08d_%s", line_id, lines[line_id].first.c_str());
73 string value;
74 // get the value
75 datum.SerializeToString(&value);
76 batch->Put(string(key_cstr), value);
77 if (++count % 1000 == 0) {
78 db->Write(leveldb::WriteOptions(), batch);
79 LOG(ERROR) << "Processed " << count << " files.";
80 delete batch;
81 batch = new leveldb::WriteBatch();
82 }
83 }
85 delete db;
86 return 0;
87 }