From: Yuan Zhao Date: Tue, 27 Nov 2018 15:58:46 +0000 (-0600) Subject: Add MNIST LeNet network model and test input X-Git-Tag: v01.02.00^2~4 X-Git-Url: https://git.ti.com/gitweb?p=tidl%2Ftidl-api.git;a=commitdiff_plain;h=782c7f753e775ffd56b067cb75ec95c931d8e11a Add MNIST LeNet network model and test input - Constrained to EVE only for now. - Add documentation for mnist example. - MCT-1083 --- diff --git a/docs/source/example.rst b/docs/source/example.rst index 608c728..6c7a3ac 100644 --- a/docs/source/example.rst +++ b/docs/source/example.rst @@ -37,6 +37,13 @@ Examples - Object detection - EVE and C66x (network is split across both EVE and C66x) - OpenCV used to read input image from file or capture from camera. + * - mnist + - handwritten digits recognition (MNIST). This example illustrates + low TIDL API overhead (~1.8%) for small networks with low compute + requirements (<5ms). + - EVE + - Pre-processed white-on-black images read from file, with or without + MNIST database file headers. * - classification - Classification example, called from the Matrix GUI. - EVE or C66x @@ -251,6 +258,68 @@ versus ExecutionObject level. - 1630 ms - 1408 ms +MNIST +----- + +The MNIST example takes a pre-processed 28x28 white-on-black frame from +a file as input and predicts the hand-written digit in the frame. +For example, the example will predict 0 for the following frame. + +.. code-block:: none + + root@am57xx-evm:~/tidl/examples/mnist# hexdump -v -e '28/1 "%2x" "\n"' -n 784 ../test/testvecs/input/digits10_images_28x28.y + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 314 8 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0319bdfeec1671b 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 01ed5ffd2a4e4ec89 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 1bcffee2a 031e6e225 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 05ff7ffbf 2 0 078ffa1 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0b2f2f34e 0 0 015e0d8 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0148deab2 0 0 0 0 0bdec 2 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 084f845 0 0 0 0 0a4f222 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0c4d3 5 0 0 0 0 096f21c 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 052f695 0 0 0 0 0 0a7ed 8 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 09af329 0 0 0 0 0 0d1cf 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 2d4c8 0 0 0 0 0 01ae9a2 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 038fa9a 0 0 0 0 0 062ff76 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 07afe5d 0 0 0 0 0 0a9e215 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0bdec1d 0 0 0 0 017e7aa 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 1e7d6 0 0 0 0 0 096f85a 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 01df2bf 0 0 0 0 015e1ca 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 061fc95 0 0 0 0 084f767 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 06eff8b 0 0 0 033e8ca 4 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 060fc9e 0 0 0 092d63e 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 01bf1da 6 0 019b656 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0c3fb8e a613e7b 5 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 049f1fcf5f696 9 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 04ca0b872 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + +The file can contain multiple frames. If an optional label file is also +given, the example will compare predicted result against pre-determined +label for accuracy. The input files may or may not have `MNIST dataset +file headers `_. If using headers, +input filenames must end with idx3-ubyte or idx1-ubyte. + +The MNIST example also illustrates low overhead of TIDL API for small +networks with low compute requirements (<5ms). The network runs about 3ms +on EVE for a single frame. As shown in the following table, when running +over 1000 frames, the overhead is about 1.8%. + +.. list-table:: Loop overall time over 1000 frames + :header-rows: 1 + + * - Device(s) + - Device Processing Time + - Host Processing Time + - API Overhead + * - 1 EVE + - 3091 ms + - 3146 ms + - 1.78% + Running Examples ---------------- @@ -295,6 +364,24 @@ The following listing illustrates how to build and run the examples. Loop total time (including read/write/opencv/print/etc): 320.2ms ssd_multibox PASSED + root@am57xx-evm:~/tidl/examples/mnist# ./mnist + Input images: ../test/testvecs/input/digits10_images_28x28.y + Input labels: ../test/testvecs/input/digits10_labels_10x1.y + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + Device total time: 31.02ms + Loop total time (including read/write/print/etc): 32.49ms + Accuracy: 100% + mnist PASSED + Image input ^^^^^^^^^^^ diff --git a/examples/mnist/main.cpp b/examples/mnist/main.cpp index 7794c51..d933ccc 100644 --- a/examples/mnist/main.cpp +++ b/examples/mnist/main.cpp @@ -47,31 +47,25 @@ #include "imgutil.h" #include "../common/video_utils.h" -#include "opencv2/core.hpp" -#include "opencv2/imgproc.hpp" -#include "opencv2/highgui.hpp" -#include "opencv2/videoio.hpp" - using namespace std; using namespace tidl; -using namespace cv; -#define NUM_VIDEO_FRAMES 300 -#define DEFAULT_CONFIG "mnist" -#define NUM_DEFAULT_INPUTS 1 -#define DEFAULT_INPUT_FRAMES 1 -const char *default_inputs[NUM_DEFAULT_INPUTS] = -{ - "../test/testvecs/input/digit_28x28.y" -}; +#define DEFAULT_CONFIG "mnist_lenet" +#define DEFAULT_INPUT_IMAGES "../test/testvecs/input/digits10_images_28x28.y" +#define DEFAULT_INPUT_LABELS "../test/testvecs/input/digits10_labels_10x1.y" + +uint32_t images_file_offset = 0; +uint32_t labels_file_offset = 0; +uint32_t num_frames_file = 0; +uint32_t num_wrong_results = 0; Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c); bool RunConfiguration(cmdline_opts_t& opts); bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx, const Configuration& c, - const cmdline_opts_t& opts, VideoCapture &cap); -bool WriteFrameOutput(const ExecutionObjectPipeline &eop); + const cmdline_opts_t& opts, ifstream &ifs); +bool WriteFrameOutput(const ExecutionObjectPipeline &eop, ifstream &ifs_labels); void DisplayHelp(); @@ -101,13 +95,31 @@ int main(int argc, char *argv[]) exit(EXIT_SUCCESS); } assert(opts.num_dsps != 0 || opts.num_eves != 0); - if (opts.num_frames == 0) - opts.num_frames = (opts.is_camera_input || opts.is_video_input) ? - NUM_VIDEO_FRAMES : 1; + if (opts.num_dsps != 0) + { + cout << "MNIST network not supported on DSP yet." << endl; + exit(EXIT_SUCCESS); + } + if (opts.input_file.empty()) - cout << "Input: " << default_inputs[0] << endl; - else - cout << "Input: " << opts.input_file << endl; + { + opts.input_file = DEFAULT_INPUT_IMAGES; + opts.object_classes_list_file = DEFAULT_INPUT_LABELS; + } + + // if inputs are MNIST data set: skip MNIST header + string& s_images = opts.input_file; + if (s_images.size() > 10 && + s_images.compare(s_images.size() - 10, 10, "idx3-ubyte") == 0) + images_file_offset = 16; + string& s_labels = opts.object_classes_list_file; + if (s_labels.size() > 10 && + s_labels.compare(s_labels.size() - 10, 10, "idx1-ubyte") == 0) + labels_file_offset = 8; + + cout << "Input images: " << opts.input_file << endl; + if (! opts.object_classes_list_file.empty()) + cout << "Input labels: " << opts.object_classes_list_file << endl; // Run network bool status = RunConfiguration(opts); @@ -126,7 +138,7 @@ bool RunConfiguration(cmdline_opts_t& opts) // Read the TI DL configuration file Configuration c; string config_file = "../test/testvecs/config/infer/tidl_config_" - + opts.config + ".txt"; + + opts.config + ".txt"; bool status = c.ReadFromFile(config_file); if (!status) { @@ -135,9 +147,27 @@ bool RunConfiguration(cmdline_opts_t& opts) } c.enableApiTrace = opts.verbose; - // setup camera/video input/output - VideoCapture cap; - if (! SetVideoInputOutput(cap, opts, "MNIST")) return false; + // setup images/labels input/output + ifstream ifs, ifs_labels; + ifs.open(opts.input_file, ios::binary | ios::ate); + if (! ifs.good()) + { + cerr << "Cannot open " << opts.input_file << endl; + return false; + } + num_frames_file = (((int) ifs.tellg()) - images_file_offset) / + (c.inWidth * c.inHeight); + if (opts.num_frames == 0) + opts.num_frames = num_frames_file; + if (! opts.object_classes_list_file.empty()) + { + ifs_labels.open(opts.object_classes_list_file, ios::binary); + if (! ifs_labels.good()) + { + cerr << "Cannot open " << opts.object_classes_list_file << endl; + return false; + } + } try { @@ -198,11 +228,11 @@ bool RunConfiguration(cmdline_opts_t& opts) { device_time += eos[frame_idx % num_eos]->GetProcessTimeInMilliSeconds(); - WriteFrameOutput(*eop); + WriteFrameOutput(*eop, ifs_labels); } // Read a frame and start processing it with current eop - if (ReadFrame(*eop, frame_idx, c, opts, cap)) + if (ReadFrame(*eop, frame_idx, c, opts, ifs)) eop->ProcessFrameStartAsync(); } @@ -210,9 +240,17 @@ bool RunConfiguration(cmdline_opts_t& opts) chrono::duration elapsed = tloop1 - tloop0; cout << "Device total time: " << setw(6) << setprecision(4) << device_time << "ms" << endl; - cout << "Loop total time (including read/write/opencv/print/etc): " + cout << "Loop total time (including read/write/print/etc): " << setw(6) << setprecision(4) << (elapsed.count() * 1000) << "ms" << endl; + if (opts.num_frames > 0 && ifs_labels.is_open()) + { + cout << "Accuracy: " << setw(6) << setprecision(4) + << (opts.num_frames-num_wrong_results)*100.f / opts.num_frames + << "%" << endl; + if (opts.input_file == DEFAULT_INPUT_IMAGES && num_wrong_results >0) + status = false; + } FreeMemory(eops); for (auto eop : eops) delete eop; @@ -242,7 +280,7 @@ Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c) bool ReadFrame(ExecutionObjectPipeline &eop, uint32_t frame_idx, const Configuration& c, - const cmdline_opts_t& opts, VideoCapture &cap) + const cmdline_opts_t& opts, ifstream &ifs) { if (frame_idx >= opts.num_frames) return false; @@ -253,63 +291,14 @@ bool ReadFrame(ExecutionObjectPipeline &eop, assert (frame_buffer != nullptr); int channel_size = c.inWidth * c.inHeight; - Mat image; - if (! opts.is_camera_input && ! opts.is_video_input) - { - if (opts.input_file.empty()) - { - ifstream ifs(default_inputs[frame_idx % NUM_DEFAULT_INPUTS], - ios::binary); - ifs.seekg((frame_idx % DEFAULT_INPUT_FRAMES) * channel_size); - ifs.read(frame_buffer, channel_size); - memcpy(frame_buffer+channel_size, frame_buffer, channel_size); - bool ifs_status = ifs.good(); - ifs.close(); - return ifs_status; // already PreProc-ed - } - else - { - image = cv::imread(opts.input_file, CV_LOAD_IMAGE_COLOR); - if (image.empty()) - { - cerr << "Unable to read input image" << endl; - return false; - } - } - } - else - { - Mat v_image; - if (! cap.grab()) return false; - if (! cap.retrieve(v_image)) return false; - #define DISPLAY_SIZE 112 - int orig_width = v_image.cols; - int orig_height = v_image.rows; - // Crop camera/video input to center DISPLAY_SIZE x DISPLAY_SIZE input - if (orig_width > DISPLAY_SIZE && orig_height > DISPLAY_SIZE) - { - image = Mat(v_image, Rect((orig_width-DISPLAY_SIZE)/2, - (orig_height-DISPLAY_SIZE)/2, - DISPLAY_SIZE, DISPLAY_SIZE)); - } - else - image = v_image; - cv::imshow("MNIST", image); - waitKey(2); - } - - // Convert to Gray image, resize to 28x28, copy into frame_buffer - Mat s_image, bgr_frames[3]; - cv::resize(image, s_image, Size(c.inWidth, c.inHeight), - 0, 0, cv::INTER_AREA); - cv::split(s_image, bgr_frames); - memcpy(frame_buffer, bgr_frames[0].ptr(), channel_size); - memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size); - return true; + // already PreProc-ed white-on-black 28x28 frames + ifs.seekg(images_file_offset + (frame_idx%num_frames_file) * channel_size); + ifs.read(frame_buffer, channel_size); + return ifs.good(); } // Display top 5 classified imagenet classes with probabilities -bool WriteFrameOutput(const ExecutionObjectPipeline &eop) +bool WriteFrameOutput(const ExecutionObjectPipeline &eop, ifstream &ifs_labels) { unsigned char *out = (unsigned char *) eop.GetOutputBufferPtr(); int out_size = eop.GetOutputBufferSizeInBytes(); @@ -318,7 +307,7 @@ bool WriteFrameOutput(const ExecutionObjectPipeline &eop) int maxloc = -1; for (int i = 0; i < out_size; i++) { - // cout << (int) out[i] << " "; + // cout << (int) out[i] << " "; // 10 probability outputs if (out[i] > maxval) { maxval = out[i]; @@ -327,25 +316,32 @@ bool WriteFrameOutput(const ExecutionObjectPipeline &eop) } cout << maxloc << endl; + // check inference result against pre-determined label + if (ifs_labels.is_open()) + { + int frame_index = eop.GetFrameIndex(); + char label = -1; + ifs_labels.seekg(labels_file_offset + (frame_index % num_frames_file)); + ifs_labels.read(&label, 1); + if (maxloc != (int) label) + num_wrong_results += 1; + } + return true; } void DisplayHelp() { cout << - "Usage: imagenet\n" - " Will run imagenet network to predict top 5 object" - " classes for the input.\n Use -c to run a" - " different imagenet network. Default is j11_v2.\n" + "Usage: mnist\n" + " Will run MNIST LeNet to predict the digit in a 28x28" + " white-on-black image.\n Use -c to run a" + " different MNIST network. Default is mnist_lenet.\n" "Optional arguments:\n" - " -c Valid configs: j11_bn, j11_prelu, j11_v2\n" - " -d Number of dsp cores to use\n" + " -c Valid configs: mnist_lenet\n" " -e Number of eve cores to use\n" - " -i Path to the image file as input\n" - " -i camera Use camera as input\n" - " video input port: /dev/video\n" - " -i .{mp4,mov,avi} Use video file as input\n" - " -l Path to the object classes list file\n" + " -i Path to the MNIST white-on-black images file\n" + " -l Path to the MNIST labels file\n" " -f Number of frames to process\n" " -v Verbose output during execution\n" " -h Help\n"; diff --git a/examples/test/testvecs/config/infer/tidl_config_mnist.txt b/examples/test/testvecs/config/infer/tidl_config_mnist_lenet.txt similarity index 71% rename from examples/test/testvecs/config/infer/tidl_config_mnist.txt rename to examples/test/testvecs/config/infer/tidl_config_mnist_lenet.txt index b62b859..f4b0b7f 100755 --- a/examples/test/testvecs/config/infer/tidl_config_mnist.txt +++ b/examples/test/testvecs/config/infer/tidl_config_mnist_lenet.txt @@ -1,9 +1,9 @@ numFrames = 1 preProcType = 0 -inData = ../test/testvecs/input/digit_28x28.y +inData = ../test/testvecs/input/digits10_images_28x28.y outData = "stats_tool_out.bin" -netBinFile = "../test/testvecs/config/tidl_models/tidl_net_mnist_cnn.bin" -paramsBinFile = "../test/testvecs/config/tidl_models/tidl_param_mnist_cnn.bin" +netBinFile = "../test/testvecs/config/tidl_models/tidl_net_mnist_lenet.bin" +paramsBinFile = "../test/testvecs/config/tidl_models/tidl_param_mnist_lenet.bin" inWidth = 28 inHeight = 28 inNumChannels = 2 diff --git a/examples/test/testvecs/config/tidl_models/tidl_net_mnist_lenet.bin b/examples/test/testvecs/config/tidl_models/tidl_net_mnist_lenet.bin new file mode 100644 index 0000000..ea9ac6c Binary files /dev/null and b/examples/test/testvecs/config/tidl_models/tidl_net_mnist_lenet.bin differ diff --git a/examples/test/testvecs/config/tidl_models/tidl_param_mnist_lenet.bin b/examples/test/testvecs/config/tidl_models/tidl_param_mnist_lenet.bin new file mode 100644 index 0000000..66cc8a6 Binary files /dev/null and b/examples/test/testvecs/config/tidl_models/tidl_param_mnist_lenet.bin differ diff --git a/examples/test/testvecs/input/digits10_images_28x28.y b/examples/test/testvecs/input/digits10_images_28x28.y new file mode 100644 index 0000000..8799ea3 Binary files /dev/null and b/examples/test/testvecs/input/digits10_images_28x28.y differ diff --git a/examples/test/testvecs/input/digits10_labels_10x1.y b/examples/test/testvecs/input/digits10_labels_10x1.y new file mode 100644 index 0000000..df93f5f Binary files /dev/null and b/examples/test/testvecs/input/digits10_labels_10x1.y differ