1 /******************************************************************************
2 * Copyright (c) 2018, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <signal.h>
29 #include <iostream>
30 #include <iomanip>
31 #include <fstream>
32 #include <cassert>
33 #include <string>
34 #include <functional>
35 #include <algorithm>
36 #include <time.h>
37 #include <unistd.h>
39 #include <queue>
40 #include <vector>
41 #include <cstdio>
42 #include <string>
43 #include <chrono>
45 #include "executor.h"
46 #include "execution_object.h"
47 #include "execution_object_pipeline.h"
48 #include "configuration.h"
49 #include "../common/object_classes.h"
50 #include "../common/utils.h"
51 #include "../common/video_utils.h"
53 using namespace std;
54 using namespace tidl;
55 using namespace cv;
58 #define NUM_VIDEO_FRAMES 100
59 #define DEFAULT_CONFIG "jdetnet_voc"
60 #define DEFAULT_INPUT "../test/testvecs/input/horse_768x320.y"
61 #define DEFAULT_INPUT_FRAMES (1)
62 #define DEFAULT_OBJECT_CLASSES_LIST_FILE "./jdetnet_voc_objects.json"
63 #define DEFAULT_OUTPUT_PROB_THRESHOLD 25
65 /* Enable this macro to record individual output files and */
66 /* resized, cropped network input files */
67 //#define DEBUG_FILES
69 std::unique_ptr<ObjectClasses> object_classes;
70 uint32_t orig_width;
71 uint32_t orig_height;
72 uint32_t num_frames_file;
74 bool RunConfiguration(const cmdline_opts_t& opts);
75 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
76 int layers_group_id);
77 bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
78 const Configuration& c, const cmdline_opts_t& opts,
79 VideoCapture &cap, ifstream &ifs);
80 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
81 const Configuration& c, const cmdline_opts_t& opts,
82 float confidence_value);
83 static void DisplayHelp();
85 /***************************************************************/
86 /* Slider to control detection confidence level */
87 /***************************************************************/
88 static void on_trackbar( int slider_id, void *inst )
89 {
90 //This function is invoked on every slider move.
91 //No action required, since prob_slider is automatically updated.
92 //But, for any additional operation on slider move, this is the place to insert code.
93 }
96 int main(int argc, char *argv[])
97 {
98 // Catch ctrl-c to ensure a clean exit
99 signal(SIGABRT, exit);
100 signal(SIGTERM, exit);
102 // If there are no devices capable of offloading TIDL on the SoC, exit
103 uint32_t num_eves = Executor::GetNumDevices(DeviceType::EVE);
104 uint32_t num_dsps = Executor::GetNumDevices(DeviceType::DSP);
105 if (num_eves == 0 && num_dsps == 0)
106 {
107 cout << "ssd_multibox requires EVE or DSP for execution." << endl;
108 return EXIT_SUCCESS;
109 }
111 // Process arguments
112 cmdline_opts_t opts;
113 opts.config = DEFAULT_CONFIG;
114 opts.object_classes_list_file = DEFAULT_OBJECT_CLASSES_LIST_FILE;
115 opts.num_eves = num_eves > 0 ? 1 : 0;
116 opts.num_dsps = num_dsps > 0 ? 1 : 0;
117 opts.input_file = DEFAULT_INPUT;
118 opts.output_prob_threshold = DEFAULT_OUTPUT_PROB_THRESHOLD;
119 if (! ProcessArgs(argc, argv, opts))
120 {
121 DisplayHelp();
122 exit(EXIT_SUCCESS);
123 }
124 assert(opts.num_dsps != 0 || opts.num_eves != 0);
125 if (opts.num_frames == 0)
126 opts.num_frames = (opts.is_camera_input || opts.is_video_input) ?
127 NUM_VIDEO_FRAMES :
128 ((opts.input_file == DEFAULT_INPUT) ?
129 DEFAULT_INPUT_FRAMES : 1);
130 cout << "Input: " << opts.input_file << endl;
132 // Get object classes list
133 object_classes = std::unique_ptr<ObjectClasses>(
134 new ObjectClasses(opts.object_classes_list_file));
135 if (object_classes->GetNumClasses() == 0)
136 {
137 cout << "No object classes defined for this config." << endl;
138 return EXIT_FAILURE;
139 }
141 // Run network
142 bool status = RunConfiguration(opts);
143 if (!status)
144 {
145 cout << "ssd_multibox FAILED" << endl;
146 return EXIT_FAILURE;
147 }
149 cout << "ssd_multibox PASSED" << endl;
150 return EXIT_SUCCESS;
151 }
153 bool RunConfiguration(const cmdline_opts_t& opts)
154 {
155 int prob_slider = opts.output_prob_threshold;
156 // Read the TI DL configuration file
157 Configuration c;
158 std::string config_file = "../test/testvecs/config/infer/tidl_config_"
159 + opts.config + ".txt";
160 bool status = c.ReadFromFile(config_file);
161 if (!status)
162 {
163 cerr << "Error in configuration file: " << config_file << endl;
164 return false;
165 }
166 c.enableApiTrace = opts.verbose;
167 if (opts.num_eves == 0 || opts.num_dsps == 0)
168 c.runFullNet = true;
169 // DSP only execution requires larger NETWORK HEAP size
170 if (opts.num_eves == 0)
171 c.NETWORK_HEAP_SIZE = 75000000;
173 // setup camera/video input
174 VideoCapture cap;
175 if (! SetVideoInputOutput(cap, opts, "SSD_Multibox")) return false;
177 if (opts.is_camera_input || opts.is_video_input)
178 {
179 std::string TrackbarName("Confidence(%):");
180 createTrackbar( TrackbarName.c_str(), "SSD_Multibox",
181 &prob_slider, 100, on_trackbar );
182 std::cout << TrackbarName << std::endl;
183 }
185 // setup preprocessed input
186 ifstream ifs;
187 if (opts.is_preprocessed_input)
188 {
189 ifs.open(opts.input_file, ios::binary | ios::ate);
190 if (! ifs.good())
191 {
192 cerr << "Cannot open " << opts.input_file << endl;
193 return false;
194 }
195 num_frames_file = ((int) ifs.tellg()) /
196 (c.inWidth * c.inHeight * c.inNumChannels);
197 }
199 try
200 {
201 // Create Executors with the approriate core type, number of cores
202 // and configuration specified
203 // EVE will run layersGroupId 1 in the network, while
204 // DSP will run layersGroupId 2 in the network
205 Executor* e_eve = CreateExecutor(DeviceType::EVE, opts.num_eves, c, 1);
206 Executor* e_dsp = CreateExecutor(DeviceType::DSP, opts.num_dsps, c, 2);
207 vector<ExecutionObjectPipeline *> eops;
209 if (e_eve != nullptr && e_dsp != nullptr)
210 {
211 // Construct ExecutionObjectPipeline that utilizes multiple
212 // ExecutionObjects to process a single frame, each ExecutionObject
213 // processes one layerGroup of the network
214 //
215 // Pipeline depth can enable more optimized pipeline execution:
216 // Given one EVE and one DSP as an example, with different
217 // pipeline_depth, we have different execution behavior:
218 // If pipeline_depth is set to 1,
219 // we create one EOP: eop0 (eve0, dsp0)
220 // pipeline execution of multiple frames over time is as follows:
221 // --------------------- time ------------------->
222 // eop0: [eve0...][dsp0]
223 // eop0: [eve0...][dsp0]
224 // eop0: [eve0...][dsp0]
225 // eop0: [eve0...][dsp0]
226 // If pipeline_depth is set to 2,
227 // we create two EOPs: eop0 (eve0, dsp0), eop1(eve0, dsp0)
228 // pipeline execution of multiple frames over time is as follows:
229 // --------------------- time ------------------->
230 // eop0: [eve0...][dsp0]
231 // eop1: [eve0...][dsp0]
232 // eop0: [eve0...][dsp0]
233 // eop1: [eve0...][dsp0]
234 // Additional benefit of setting pipeline_depth to 2 is that
235 // it can also overlap host ReadFrame() with device processing:
236 // --------------------- time ------------------->
237 // eop0: [RF][eve0...][dsp0]
238 // eop1: [RF] [eve0...][dsp0]
239 // eop0: [RF][eve0...][dsp0]
240 // eop1: [RF][eve0...][dsp0]
241 uint32_t pipeline_depth = 2; // 2 EOs in EOP -> depth 2
242 for (uint32_t j = 0; j < pipeline_depth; j++)
243 for (uint32_t i = 0; i < max(opts.num_eves, opts.num_dsps); i++)
244 eops.push_back(new ExecutionObjectPipeline(
245 {(*e_eve)[i%opts.num_eves], (*e_dsp)[i%opts.num_dsps]}));
246 }
247 else
248 {
249 // Construct ExecutionObjectPipeline that utilizes a
250 // ExecutionObject to process a single frame, each ExecutionObject
251 // processes the full network
252 //
253 // Use duplicate EOPs to do double buffering on frame input/output
254 // because each EOP has its own set of input/output buffers,
255 // so that host ReadFrame() can overlap device processing
256 // Use one EO as an example, with different buffer_factor,
257 // we have different execution behavior:
258 // If buffer_factor is set to 1 -> single buffering
259 // we create one EOP: eop0 (eo0)
260 // pipeline execution of multiple frames over time is as follows:
261 // --------------------- time ------------------->
262 // eop0: [RF][eo0.....][WF]
263 // eop0: [RF][eo0.....][WF]
264 // eop0: [RF][eo0.....][WF]
265 // If buffer_factor is set to 2 -> double buffering
266 // we create two EOPs: eop0 (eo0), eop1(eo0)
267 // pipeline execution of multiple frames over time is as follows:
268 // --------------------- time ------------------->
269 // eop0: [RF][eo0.....][WF]
270 // eop1: [RF] [eo0.....][WF]
271 // eop0: [RF] [eo0.....][WF]
272 // eop1: [RF] [eo0.....][WF]
273 uint32_t buffer_factor = 2; // set to 1 for single buffering
274 for (uint32_t j = 0; j < buffer_factor; j++)
275 {
276 for (uint32_t i = 0; i < opts.num_eves; i++)
277 eops.push_back(new ExecutionObjectPipeline({(*e_eve)[i]}));
278 for (uint32_t i = 0; i < opts.num_dsps; i++)
279 eops.push_back(new ExecutionObjectPipeline({(*e_dsp)[i]}));
280 }
281 }
282 uint32_t num_eops = eops.size();
284 // Allocate input/output memory for each EOP
285 AllocateMemory(eops);
287 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
288 tloop0 = chrono::steady_clock::now();
290 // Process frames with available eops in a pipelined manner
291 // additional num_eops iterations to flush pipeline (epilogue)
292 for (uint32_t frame_idx = 0;
293 frame_idx < opts.num_frames + num_eops; frame_idx++)
294 {
295 ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];
297 // Wait for previous frame on the same eop to finish processing
298 if (eop->ProcessFrameWait())
299 WriteFrameOutput(*eop, c, opts, (float)prob_slider);
301 // Read a frame and start processing it with current eo
302 if (ReadFrame(*eop, frame_idx, c, opts, cap, ifs))
303 eop->ProcessFrameStartAsync();
304 }
306 tloop1 = chrono::steady_clock::now();
307 chrono::duration<float> elapsed = tloop1 - tloop0;
308 cout << "Loop total time (including read/write/opencv/print/etc): "
309 << setw(6) << setprecision(4)
310 << (elapsed.count() * 1000) << "ms" << endl;
312 FreeMemory(eops);
313 for (auto eop : eops) delete eop;
314 delete e_eve;
315 delete e_dsp;
316 }
317 catch (tidl::Exception &e)
318 {
319 cerr << e.what() << endl;
320 status = false;
321 }
323 return status;
324 }
326 // Create an Executor with the specified type and number of EOs
327 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
328 int layers_group_id)
329 {
330 if (num == 0) return nullptr;
332 DeviceIds ids;
333 for (uint32_t i = 0; i < num; i++)
334 ids.insert(static_cast<DeviceId>(i));
336 Executor* e = new Executor(dt, ids, c, layers_group_id);
337 assert(e != nullptr);
338 return e;
339 }
341 bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
342 const Configuration& c, const cmdline_opts_t& opts,
343 VideoCapture &cap, ifstream &ifs)
344 {
345 if ((uint32_t)frame_idx >= opts.num_frames)
346 return false;
348 eop.SetFrameIndex(frame_idx);
350 char* frame_buffer = eop.GetInputBufferPtr();
351 assert (frame_buffer != nullptr);
352 int channel_size = c.inWidth * c.inHeight;
353 int frame_size = channel_size * c.inNumChannels;
355 Mat image;
356 if (!opts.is_camera_input && !opts.is_video_input)
357 {
358 if (opts.is_preprocessed_input)
359 {
360 orig_width = c.inWidth;
361 orig_height = c.inHeight;
362 ifs.seekg((frame_idx % num_frames_file) * frame_size);
363 ifs.read(frame_buffer, frame_size);
364 return ifs.good();
365 }
366 else
367 {
368 image = cv::imread(opts.input_file, CV_LOAD_IMAGE_COLOR);
369 if (image.empty())
370 {
371 cerr << "Unable to read from: " << opts.input_file << endl;
372 return false;
373 }
374 }
375 }
376 else
377 {
378 if(opts.is_camera_input)
379 {
380 if (! cap.grab()) return false;
381 if (! cap.retrieve(image)) return false;
382 }
383 else
384 { // Video clip
385 if (cap.grab())
386 {
387 if (! cap.retrieve(image)) return false;
388 } else {
389 //Rewind!
390 std::cout << "Video clip rewinded!" << std::endl;
391 cap.set(CAP_PROP_POS_FRAMES, 0);
392 if (! cap.grab()) return false;
393 if (! cap.retrieve(image)) return false;
394 }
395 }
396 }
398 // Scale to network input size:
399 Mat s_image, bgr_frames[3];
400 orig_width = image.cols;
401 orig_height = image.rows;
402 if (!opts.is_camera_input && !opts.is_video_input)
403 {
404 cv::resize(image, s_image, Size(c.inWidth, c.inHeight),
405 0, 0, cv::INTER_AREA);
406 }
407 else
408 {
409 // Preserve aspect ratio, by doing central cropping
410 // Choose vertical or horizontal central cropping
411 // based on dimension reduction
412 if(orig_width > orig_height)
413 {
414 float change_width = (float)c.inWidth / (float)orig_width;
415 float change_height = (float)c.inHeight / (float)orig_height;
416 if(change_width < change_height)
417 {
418 // E.g. for 1920x1080->512x512, we first crop central part
419 // roi(420, 0, 1080, 1080), then resize to (512x512)
420 int offset_x = (int)round(0.5 * ((float)orig_width -
421 ((float)orig_height * (float)c.inWidth / (float)c.inHeight)));
422 cv::resize(image(Rect(offset_x, 0, orig_width - 2 * offset_x,
423 orig_height)), s_image,
424 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
425 } else {
426 // E.g. for 1920x1080->768x320, we first crop central part
427 // roi(0, 140, 1920, 800), then resize to (768x320)
428 int offset_y = (int)round(0.5 * ((float)orig_height -
429 ((float)orig_width * (float)c.inHeight / (float)c.inWidth)));
430 cv::resize(image(Rect(0, offset_y, orig_width,
431 orig_height - 2 * offset_y)), s_image,
432 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
433 }
434 } else {
435 // E.g. for 540x960->512x512, we first crop central part
436 // roi(0, 210, 540, 540), then resize to (512x512)
437 // E.g. for 540x960->768x320, we first crop central part
438 // roi(0, 367, 540, 225), then resize to (768x320)
439 int offset_y = (int)round(0.5 * ((float)orig_height -
440 ((float)orig_width * (float)c.inHeight / (float)c.inWidth)));
441 cv::resize(image(Rect(0, offset_y, orig_width, orig_height -
442 2 * offset_y)), s_image,
443 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
444 }
445 }
447 #ifdef DEBUG_FILES
448 {
449 // Image files can be converted into video using, example script
450 // (on desktop Ubuntu, with ffmpeg installed):
451 // ffmpeg -i netin_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
452 // Update width 768, height 320, if necessary
453 char netin_name[80];
454 sprintf(netin_name, "netin_%04d.png", frame_idx);
455 cv::imwrite(netin_name, s_image);
456 std::cout << "Video input, width:" << orig_width << " height:"
457 << orig_height << " Network width:" << c.inWidth
458 << " height:" << c.inHeight << std::endl;
459 }
460 #endif
462 cv::split(s_image, bgr_frames);
463 memcpy(frame_buffer, bgr_frames[0].ptr(), channel_size);
464 memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
465 memcpy(frame_buffer+2*channel_size, bgr_frames[2].ptr(), channel_size);
466 return true;
467 }
469 // Create frame with boxes drawn around classified objects
470 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
471 const Configuration& c, const cmdline_opts_t& opts,
472 float confidence_value)
473 {
474 // Asseembly original frame
475 int width = c.inWidth;
476 int height = c.inHeight;
477 int channel_size = width * height;
478 Mat frame, bgr[3];
480 unsigned char *in = (unsigned char *) eop.GetInputBufferPtr();
481 bgr[0] = Mat(height, width, CV_8UC(1), in);
482 bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
483 bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
484 cv::merge(bgr, 3, frame);
486 int frame_index = eop.GetFrameIndex();
487 char outfile_name[64];
488 if (opts.is_preprocessed_input)
489 {
490 snprintf(outfile_name, 64, "frame_%d.png", frame_index);
491 cv::imwrite(outfile_name, frame);
492 printf("Saving frame %d to: %s\n", frame_index, outfile_name);
493 }
495 // Draw boxes around classified objects
496 float *out = (float *) eop.GetOutputBufferPtr();
497 int num_floats = eop.GetOutputBufferSizeInBytes() / sizeof(float);
498 for (int i = 0; i < num_floats / 7; i++)
499 {
500 int index = (int) out[i * 7 + 0];
501 if (index < 0) break;
503 float score = out[i * 7 + 2];
504 if (score * 100 < confidence_value) continue;
506 int label = (int) out[i * 7 + 1];
507 int xmin = (int) (out[i * 7 + 3] * width);
508 int ymin = (int) (out[i * 7 + 4] * height);
509 int xmax = (int) (out[i * 7 + 5] * width);
510 int ymax = (int) (out[i * 7 + 6] * height);
512 const ObjectClass& object_class = object_classes->At(label);
514 if(opts.verbose) {
515 printf("%2d: (%d, %d) -> (%d, %d): %s, score=%f\n",
516 i, xmin, ymin, xmax, ymax, object_class.label.c_str(), score);
517 }
519 if (xmin < 0) xmin = 0;
520 if (ymin < 0) ymin = 0;
521 if (xmax > width) xmax = width;
522 if (ymax > height) ymax = height;
523 cv::rectangle(frame, Point(xmin, ymin), Point(xmax, ymax),
524 Scalar(object_class.color.blue,
525 object_class.color.green,
526 object_class.color.red), 2);
527 }
529 if (opts.is_camera_input || opts.is_video_input)
530 {
531 cv::imshow("SSD_Multibox", frame);
532 #ifdef DEBUG_FILES
533 // Image files can be converted into video using, example script
534 // (on desktop Ubuntu, with ffmpeg installed):
535 // ffmpeg -i multibox_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
536 // Update width 768, height 320, if necessary
537 snprintf(outfile_name, 64, "multibox_%04d.png", frame_index);
538 cv::imwrite(outfile_name, r_frame);
539 #endif
540 waitKey(1);
541 }
542 else
543 {
544 // Resize to output width/height, keep aspect ratio
545 Mat r_frame;
546 uint32_t output_width = opts.output_width;
547 if (output_width == 0) output_width = orig_width;
548 uint32_t output_height = (output_width*1.0f) / orig_width * orig_height;
549 cv::resize(frame, r_frame, Size(output_width, output_height));
551 snprintf(outfile_name, 64, "multibox_%d.png", frame_index);
552 cv::imwrite(outfile_name, frame);
553 printf("Saving frame %d with SSD multiboxes to: %s\n",
554 frame_index, outfile_name);
555 }
557 return true;
558 }
560 void DisplayHelp()
561 {
562 std::cout <<
563 "Usage: ssd_multibox\n"
564 " Will run partitioned ssd_multibox network to perform "
565 "multi-objects detection\n"
566 " and classification. First part of network "
567 "(layersGroupId 1) runs on EVE,\n"
568 " second part (layersGroupId 2) runs on DSP.\n"
569 " Use -c to run a different segmentation network. Default is jdetnet_voc.\n"
570 "Optional arguments:\n"
571 " -c <config> Valid configs: jdetnet_voc, jdetnet \n"
572 " -d <number> Number of dsp cores to use\n"
573 " -e <number> Number of eve cores to use\n"
574 " -i <image> Path to the image file as input\n"
575 " Default are 9 frames in testvecs\n"
576 " -i camera<number> Use camera as input\n"
577 " video input port: /dev/video<number>\n"
578 " -i <name>.{mp4,mov,avi} Use video file as input\n"
579 " -l <objects_list> Path to the object classes list file\n"
580 " -f <number> Number of frames to process\n"
581 " -w <number> Output image/video width\n"
582 " -p <number> Output probability threshold in percentage\n"
583 " Default is 25 percent or higher\n"
584 " -v Verbose output during execution\n"
585 " -h Help\n";
586 }