1 /******************************************************************************
2 * Copyright (c) 2018, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <signal.h>
29 #include <iostream>
30 #include <iomanip>
31 #include <fstream>
32 #include <cassert>
33 #include <string>
34 #include <functional>
35 #include <algorithm>
36 #include <time.h>
37 #include <unistd.h>
39 #include <queue>
40 #include <vector>
41 #include <cstdio>
42 #include <string>
43 #include <chrono>
45 #include "executor.h"
46 #include "execution_object.h"
47 #include "execution_object_pipeline.h"
48 #include "configuration.h"
49 #include "../common/object_classes.h"
50 #include "../common/utils.h"
51 #include "../common/video_utils.h"
53 using namespace std;
54 using namespace tidl;
55 using namespace cv;
58 #define NUM_VIDEO_FRAMES 100
59 #define DEFAULT_CONFIG "jdetnet_voc"
60 #define DEFAULT_INPUT "../test/testvecs/input/horse_768x320.y"
61 #define DEFAULT_INPUT_FRAMES (1)
62 #define DEFAULT_OBJECT_CLASSES_LIST_FILE "./jdetnet_voc_objects.json"
63 #define DEFAULT_OUTPUT_PROB_THRESHOLD 25
65 /* Enable this macro to record individual output files and */
66 /* resized, cropped network input files */
67 //#define DEBUG_FILES
69 std::unique_ptr<ObjectClasses> object_classes;
70 uint32_t orig_width;
71 uint32_t orig_height;
72 uint32_t num_frames_file;
74 bool RunConfiguration(const cmdline_opts_t& opts);
75 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
76 int layers_group_id);
77 bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
78 const Configuration& c, const cmdline_opts_t& opts,
79 VideoCapture &cap, ifstream &ifs);
80 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
81 const Configuration& c, const cmdline_opts_t& opts,
82 float confidence_value);
83 static void DisplayHelp();
85 /***************************************************************/
86 /* Slider to control detection confidence level */
87 /***************************************************************/
88 static void on_trackbar( int slider_id, void *inst )
89 {
90 //This function is invoked on every slider move.
91 //No action required, since prob_slider is automatically updated.
92 //But, for any additional operation on slider move, this is the place to insert code.
93 }
96 int main(int argc, char *argv[])
97 {
98 // Catch ctrl-c to ensure a clean exit
99 signal(SIGABRT, exit);
100 signal(SIGTERM, exit);
102 // If there are no devices capable of offloading TIDL on the SoC, exit
103 uint32_t num_eves = Executor::GetNumDevices(DeviceType::EVE);
104 uint32_t num_dsps = Executor::GetNumDevices(DeviceType::DSP);
105 if (num_eves == 0 || num_dsps == 0)
106 {
107 cout << "ssd_multibox requires both EVE and DSP for execution." << endl;
108 return EXIT_SUCCESS;
109 }
111 // Process arguments
112 cmdline_opts_t opts;
113 opts.config = DEFAULT_CONFIG;
114 opts.object_classes_list_file = DEFAULT_OBJECT_CLASSES_LIST_FILE;
115 opts.num_eves = 1;
116 opts.num_dsps = 1;
117 opts.input_file = DEFAULT_INPUT;
118 opts.output_prob_threshold = DEFAULT_OUTPUT_PROB_THRESHOLD;
119 if (! ProcessArgs(argc, argv, opts))
120 {
121 DisplayHelp();
122 exit(EXIT_SUCCESS);
123 }
124 assert(opts.num_dsps != 0 && opts.num_eves != 0);
125 if (opts.num_frames == 0)
126 opts.num_frames = (opts.is_camera_input || opts.is_video_input) ?
127 NUM_VIDEO_FRAMES :
128 ((opts.input_file == DEFAULT_INPUT) ?
129 DEFAULT_INPUT_FRAMES : 1);
130 cout << "Input: " << opts.input_file << endl;
132 // Get object classes list
133 object_classes = std::unique_ptr<ObjectClasses>(
134 new ObjectClasses(opts.object_classes_list_file));
135 if (object_classes->GetNumClasses() == 0)
136 {
137 cout << "No object classes defined for this config." << endl;
138 return EXIT_FAILURE;
139 }
141 // Run network
142 bool status = RunConfiguration(opts);
143 if (!status)
144 {
145 cout << "ssd_multibox FAILED" << endl;
146 return EXIT_FAILURE;
147 }
149 cout << "ssd_multibox PASSED" << endl;
150 return EXIT_SUCCESS;
151 }
153 bool RunConfiguration(const cmdline_opts_t& opts)
154 {
155 int prob_slider = opts.output_prob_threshold;
156 // Read the TI DL configuration file
157 Configuration c;
158 std::string config_file = "../test/testvecs/config/infer/tidl_config_"
159 + opts.config + ".txt";
160 bool status = c.ReadFromFile(config_file);
161 if (!status)
162 {
163 cerr << "Error in configuration file: " << config_file << endl;
164 return false;
165 }
166 c.enableApiTrace = opts.verbose;
167 // setup camera/video input
168 VideoCapture cap;
169 if (! SetVideoInputOutput(cap, opts, "SSD_Multibox")) return false;
171 if (opts.is_camera_input || opts.is_video_input)
172 {
173 std::string TrackbarName("Confidence(%):");
174 createTrackbar( TrackbarName.c_str(), "SSD_Multibox",
175 &prob_slider, 100, on_trackbar );
176 std::cout << TrackbarName << std::endl;
177 }
179 // setup preprocessed input
180 ifstream ifs;
181 if (opts.is_preprocessed_input)
182 {
183 ifs.open(opts.input_file, ios::binary | ios::ate);
184 if (! ifs.good())
185 {
186 cerr << "Cannot open " << opts.input_file << endl;
187 return false;
188 }
189 num_frames_file = ((int) ifs.tellg()) /
190 (c.inWidth * c.inHeight * c.inNumChannels);
191 }
193 try
194 {
195 // Create Executors with the approriate core type, number of cores
196 // and configuration specified
197 // EVE will run layersGroupId 1 in the network, while
198 // DSP will run layersGroupId 2 in the network
199 Executor* e_eve = CreateExecutor(DeviceType::EVE, opts.num_eves, c, 1);
200 Executor* e_dsp = CreateExecutor(DeviceType::DSP, opts.num_dsps, c, 2);
202 // Construct ExecutionObjectPipeline that utilizes multiple
203 // ExecutionObjects to process a single frame, each ExecutionObject
204 // processes one layerGroup of the network
205 //
206 // Pipeline depth can enable more optimized pipeline execution:
207 // Given one EVE and one DSP as an example, with different
208 // pipeline_depth, we have different execution behavior:
209 // If pipeline_depth is set to 1,
210 // we create one EOP: eop0 (eve0, dsp0)
211 // pipeline execution of multiple frames over time is as follows:
212 // --------------------- time ------------------->
213 // eop0: [eve0...][dsp0]
214 // eop0: [eve0...][dsp0]
215 // eop0: [eve0...][dsp0]
216 // eop0: [eve0...][dsp0]
217 // If pipeline_depth is set to 2,
218 // we create two EOPs: eop0 (eve0, dsp0), eop1(eve0, dsp0)
219 // pipeline execution of multiple frames over time is as follows:
220 // --------------------- time ------------------->
221 // eop0: [eve0...][dsp0]
222 // eop1: [eve0...][dsp0]
223 // eop0: [eve0...][dsp0]
224 // eop1: [eve0...][dsp0]
225 // Additional benefit of setting pipeline_depth to 2 is that
226 // it can also overlap host ReadFrame() with device processing:
227 // --------------------- time ------------------->
228 // eop0: [RF][eve0...][dsp0]
229 // eop1: [RF] [eve0...][dsp0]
230 // eop0: [RF][eve0...][dsp0]
231 // eop1: [RF][eve0...][dsp0]
232 vector<ExecutionObjectPipeline *> eops;
233 uint32_t pipeline_depth = 2; // 2 EOs in EOP -> depth 2
234 for (uint32_t j = 0; j < pipeline_depth; j++)
235 for (uint32_t i = 0; i < max(opts.num_eves, opts.num_dsps); i++)
236 eops.push_back(new ExecutionObjectPipeline(
237 {(*e_eve)[i%opts.num_eves], (*e_dsp)[i%opts.num_dsps]}));
238 uint32_t num_eops = eops.size();
240 // Allocate input/output memory for each EOP
241 AllocateMemory(eops);
243 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
244 tloop0 = chrono::steady_clock::now();
246 // Process frames with available eops in a pipelined manner
247 // additional num_eops iterations to flush pipeline (epilogue)
248 for (uint32_t frame_idx = 0;
249 frame_idx < opts.num_frames + num_eops; frame_idx++)
250 {
251 ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];
253 // Wait for previous frame on the same eop to finish processing
254 if (eop->ProcessFrameWait())
255 {
256 WriteFrameOutput(*eop, c, opts, (float)prob_slider);
257 }
259 // Read a frame and start processing it with current eo
260 if (ReadFrame(*eop, frame_idx, c, opts, cap, ifs))
261 eop->ProcessFrameStartAsync();
262 }
264 tloop1 = chrono::steady_clock::now();
265 chrono::duration<float> elapsed = tloop1 - tloop0;
266 cout << "Loop total time (including read/write/opencv/print/etc): "
267 << setw(6) << setprecision(4)
268 << (elapsed.count() * 1000) << "ms" << endl;
270 FreeMemory(eops);
271 for (auto eop : eops) delete eop;
272 delete e_eve;
273 delete e_dsp;
274 }
275 catch (tidl::Exception &e)
276 {
277 cerr << e.what() << endl;
278 status = false;
279 }
281 return status;
282 }
284 // Create an Executor with the specified type and number of EOs
285 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
286 int layers_group_id)
287 {
288 if (num == 0) return nullptr;
290 DeviceIds ids;
291 for (uint32_t i = 0; i < num; i++)
292 ids.insert(static_cast<DeviceId>(i));
294 return new Executor(dt, ids, c, layers_group_id);
295 }
297 bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
298 const Configuration& c, const cmdline_opts_t& opts,
299 VideoCapture &cap, ifstream &ifs)
300 {
301 if ((uint32_t)frame_idx >= opts.num_frames)
302 return false;
304 eop.SetFrameIndex(frame_idx);
306 char* frame_buffer = eop.GetInputBufferPtr();
307 assert (frame_buffer != nullptr);
308 int channel_size = c.inWidth * c.inHeight;
309 int frame_size = channel_size * c.inNumChannels;
311 Mat image;
312 if (!opts.is_camera_input && !opts.is_video_input)
313 {
314 if (opts.is_preprocessed_input)
315 {
316 orig_width = c.inWidth;
317 orig_height = c.inHeight;
318 ifs.seekg((frame_idx % num_frames_file) * frame_size);
319 ifs.read(frame_buffer, frame_size);
320 return ifs.good();
321 }
322 else
323 {
324 image = cv::imread(opts.input_file, CV_LOAD_IMAGE_COLOR);
325 if (image.empty())
326 {
327 cerr << "Unable to read from: " << opts.input_file << endl;
328 return false;
329 }
330 }
331 }
332 else
333 {
334 if(opts.is_camera_input)
335 {
336 if (! cap.grab()) return false;
337 if (! cap.retrieve(image)) return false;
338 }
339 else
340 { // Video clip
341 if (cap.grab())
342 {
343 if (! cap.retrieve(image)) return false;
344 } else {
345 //Rewind!
346 std::cout << "Video clip rewinded!" << std::endl;
347 cap.set(CAP_PROP_POS_FRAMES, 0);
348 if (! cap.grab()) return false;
349 if (! cap.retrieve(image)) return false;
350 }
351 }
352 }
354 // Scale to network input size:
355 Mat s_image, bgr_frames[3];
356 orig_width = image.cols;
357 orig_height = image.rows;
358 if (!opts.is_camera_input && !opts.is_video_input)
359 {
360 cv::resize(image, s_image, Size(c.inWidth, c.inHeight),
361 0, 0, cv::INTER_AREA);
362 }
363 else
364 {
365 // Preserve aspect ratio, by doing central cropping
366 // Choose vertical or horizontal central cropping
367 // based on dimension reduction
368 if(orig_width > orig_height)
369 {
370 float change_width = (float)c.inWidth / (float)orig_width;
371 float change_height = (float)c.inHeight / (float)orig_height;
372 if(change_width < change_height)
373 {
374 // E.g. for 1920x1080->512x512, we first crop central part
375 // roi(420, 0, 1080, 1080), then resize to (512x512)
376 int offset_x = (int)round(0.5 * ((float)orig_width -
377 ((float)orig_height * (float)c.inWidth / (float)c.inHeight)));
378 cv::resize(image(Rect(offset_x, 0, orig_width - 2 * offset_x,
379 orig_height)), s_image,
380 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
381 } else {
382 // E.g. for 1920x1080->768x320, we first crop central part
383 // roi(0, 140, 1920, 800), then resize to (768x320)
384 int offset_y = (int)round(0.5 * ((float)orig_height -
385 ((float)orig_width * (float)c.inHeight / (float)c.inWidth)));
386 cv::resize(image(Rect(0, offset_y, orig_width,
387 orig_height - 2 * offset_y)), s_image,
388 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
389 }
390 } else {
391 // E.g. for 540x960->512x512, we first crop central part
392 // roi(0, 210, 540, 540), then resize to (512x512)
393 // E.g. for 540x960->768x320, we first crop central part
394 // roi(0, 367, 540, 225), then resize to (768x320)
395 int offset_y = (int)round(0.5 * ((float)orig_height -
396 ((float)orig_width * (float)c.inHeight / (float)c.inWidth)));
397 cv::resize(image(Rect(0, offset_y, orig_width, orig_height -
398 2 * offset_y)), s_image,
399 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
400 }
401 }
403 #ifdef DEBUG_FILES
404 {
405 // Image files can be converted into video using, example script
406 // (on desktop Ubuntu, with ffmpeg installed):
407 // ffmpeg -i netin_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
408 // Update width 768, height 320, if necessary
409 char netin_name[80];
410 sprintf(netin_name, "netin_%04d.png", frame_idx);
411 cv::imwrite(netin_name, s_image);
412 std::cout << "Video input, width:" << orig_width << " height:"
413 << orig_height << " Network width:" << c.inWidth
414 << " height:" << c.inHeight << std::endl;
415 }
416 #endif
418 cv::split(s_image, bgr_frames);
419 memcpy(frame_buffer, bgr_frames[0].ptr(), channel_size);
420 memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
421 memcpy(frame_buffer+2*channel_size, bgr_frames[2].ptr(), channel_size);
422 return true;
423 }
425 // Create frame with boxes drawn around classified objects
426 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
427 const Configuration& c, const cmdline_opts_t& opts,
428 float confidence_value)
429 {
430 // Asseembly original frame
431 int width = c.inWidth;
432 int height = c.inHeight;
433 int channel_size = width * height;
434 Mat frame, bgr[3];
436 unsigned char *in = (unsigned char *) eop.GetInputBufferPtr();
437 bgr[0] = Mat(height, width, CV_8UC(1), in);
438 bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
439 bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
440 cv::merge(bgr, 3, frame);
442 int frame_index = eop.GetFrameIndex();
443 char outfile_name[64];
444 if (opts.is_preprocessed_input)
445 {
446 snprintf(outfile_name, 64, "frame_%d.png", frame_index);
447 cv::imwrite(outfile_name, frame);
448 printf("Saving frame %d to: %s\n", frame_index, outfile_name);
449 }
451 // Draw boxes around classified objects
452 float *out = (float *) eop.GetOutputBufferPtr();
453 int num_floats = eop.GetOutputBufferSizeInBytes() / sizeof(float);
454 for (int i = 0; i < num_floats / 7; i++)
455 {
456 int index = (int) out[i * 7 + 0];
457 if (index < 0) break;
459 float score = out[i * 7 + 2];
460 if (score * 100 < confidence_value) continue;
462 int label = (int) out[i * 7 + 1];
463 int xmin = (int) (out[i * 7 + 3] * width);
464 int ymin = (int) (out[i * 7 + 4] * height);
465 int xmax = (int) (out[i * 7 + 5] * width);
466 int ymax = (int) (out[i * 7 + 6] * height);
468 const ObjectClass& object_class = object_classes->At(label);
470 if(opts.verbose) {
471 printf("%2d: (%d, %d) -> (%d, %d): %s, score=%f\n",
472 i, xmin, ymin, xmax, ymax, object_class.label.c_str(), score);
473 }
475 if (xmin < 0) xmin = 0;
476 if (ymin < 0) ymin = 0;
477 if (xmax > width) xmax = width;
478 if (ymax > height) ymax = height;
479 cv::rectangle(frame, Point(xmin, ymin), Point(xmax, ymax),
480 Scalar(object_class.color.blue,
481 object_class.color.green,
482 object_class.color.red), 2);
483 }
485 if (opts.is_camera_input || opts.is_video_input)
486 {
487 cv::imshow("SSD_Multibox", frame);
488 #ifdef DEBUG_FILES
489 // Image files can be converted into video using, example script
490 // (on desktop Ubuntu, with ffmpeg installed):
491 // ffmpeg -i multibox_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
492 // Update width 768, height 320, if necessary
493 snprintf(outfile_name, 64, "multibox_%04d.png", frame_index);
494 cv::imwrite(outfile_name, r_frame);
495 #endif
496 waitKey(1);
497 }
498 else
499 {
500 // Resize to output width/height, keep aspect ratio
501 Mat r_frame;
502 uint32_t output_width = opts.output_width;
503 if (output_width == 0) output_width = orig_width;
504 uint32_t output_height = (output_width*1.0f) / orig_width * orig_height;
505 cv::resize(frame, r_frame, Size(output_width, output_height));
507 snprintf(outfile_name, 64, "multibox_%d.png", frame_index);
508 cv::imwrite(outfile_name, frame);
509 printf("Saving frame %d with SSD multiboxes to: %s\n",
510 frame_index, outfile_name);
511 }
513 return true;
514 }
516 void DisplayHelp()
517 {
518 std::cout <<
519 "Usage: ssd_multibox\n"
520 " Will run partitioned ssd_multibox network to perform "
521 "multi-objects detection\n"
522 " and classification. First part of network "
523 "(layersGroupId 1) runs on EVE,\n"
524 " second part (layersGroupId 2) runs on DSP.\n"
525 " Use -c to run a different segmentation network. Default is jdetnet_voc.\n"
526 "Optional arguments:\n"
527 " -c <config> Valid configs: jdetnet_voc, jdetnet \n"
528 " -d <number> Number of dsp cores to use\n"
529 " -e <number> Number of eve cores to use\n"
530 " -i <image> Path to the image file as input\n"
531 " Default are 9 frames in testvecs\n"
532 " -i camera<number> Use camera as input\n"
533 " video input port: /dev/video<number>\n"
534 " -i <name>.{mp4,mov,avi} Use video file as input\n"
535 " -l <objects_list> Path to the object classes list file\n"
536 " -f <number> Number of frames to process\n"
537 " -w <number> Output image/video width\n"
538 " -p <number> Output probability threshold in percentage\n"
539 " Default is 25 percent or higher\n"
540 " -v Verbose output during execution\n"
541 " -h Help\n";
542 }