1 /******************************************************************************
2 * Copyright (c) 2018, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <signal.h>
29 #include <iostream>
30 #include <iomanip>
31 #include <fstream>
32 #include <cassert>
33 #include <string>
34 #include <functional>
35 #include <algorithm>
36 #include <time.h>
37 #include <unistd.h>
39 #include <queue>
40 #include <vector>
41 #include <cstdio>
42 #include <string>
43 #include <chrono>
45 #include "executor.h"
46 #include "execution_object.h"
47 #include "execution_object_pipeline.h"
48 #include "configuration.h"
49 #include "../common/object_classes.h"
50 #include "../common/utils.h"
51 #include "../common/video_utils.h"
53 using namespace std;
54 using namespace tidl;
55 using namespace cv;
58 #define NUM_VIDEO_FRAMES 100
59 #define DEFAULT_CONFIG "jdetnet_voc"
60 #define DEFAULT_INPUT "../test/testvecs/input/horse_768x320.y"
61 #define DEFAULT_INPUT_FRAMES (1)
62 #define DEFAULT_OBJECT_CLASSES_LIST_FILE "./jdetnet_voc_objects.json"
63 #define DEFAULT_OUTPUT_PROB_THRESHOLD 25
65 /* Enable this macro to record individual output files and */
66 /* resized, cropped network input files */
67 //#define DEBUG_FILES
69 std::unique_ptr<ObjectClasses> object_classes;
70 uint32_t orig_width;
71 uint32_t orig_height;
72 uint32_t num_frames_file;
74 bool RunConfiguration(const cmdline_opts_t& opts);
75 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
76 int layers_group_id);
77 bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
78 const Configuration& c, const cmdline_opts_t& opts,
79 VideoCapture &cap, ifstream &ifs);
80 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
81 const Configuration& c, const cmdline_opts_t& opts,
82 float confidence_value);
83 static void DisplayHelp();
85 /***************************************************************/
86 /* Slider to control detection confidence level */
87 /***************************************************************/
88 static void on_trackbar( int slider_id, void *inst )
89 {
90 //This function is invoked on every slider move.
91 //No action required, since prob_slider is automatically updated.
92 //But, for any additional operation on slider move, this is the place to insert code.
93 }
96 int main(int argc, char *argv[])
97 {
98 // Catch ctrl-c to ensure a clean exit
99 signal(SIGABRT, exit);
100 signal(SIGTERM, exit);
102 // If there are no devices capable of offloading TIDL on the SoC, exit
103 uint32_t num_eves = Executor::GetNumDevices(DeviceType::EVE);
104 uint32_t num_dsps = Executor::GetNumDevices(DeviceType::DSP);
105 if (num_eves == 0 && num_dsps == 0)
106 {
107 cout << "ssd_multibox requires EVE or DSP for execution." << endl;
108 return EXIT_SUCCESS;
109 }
111 // Process arguments
112 cmdline_opts_t opts;
113 opts.config = DEFAULT_CONFIG;
114 opts.object_classes_list_file = DEFAULT_OBJECT_CLASSES_LIST_FILE;
115 opts.num_eves = num_eves > 0 ? 1 : 0;
116 opts.num_dsps = num_dsps > 0 ? 1 : 0;
117 opts.input_file = DEFAULT_INPUT;
118 opts.output_prob_threshold = DEFAULT_OUTPUT_PROB_THRESHOLD;
119 if (! ProcessArgs(argc, argv, opts))
120 {
121 DisplayHelp();
122 exit(EXIT_SUCCESS);
123 }
124 assert(opts.num_dsps != 0 || opts.num_eves != 0);
125 if (opts.num_frames == 0)
126 opts.num_frames = (opts.is_camera_input || opts.is_video_input) ?
127 NUM_VIDEO_FRAMES :
128 ((opts.input_file == DEFAULT_INPUT) ?
129 DEFAULT_INPUT_FRAMES : 1);
130 cout << "Input: " << opts.input_file << endl;
132 // Get object classes list
133 object_classes = std::unique_ptr<ObjectClasses>(
134 new ObjectClasses(opts.object_classes_list_file));
135 if (object_classes->GetNumClasses() == 0)
136 {
137 cout << "No object classes defined for this config." << endl;
138 return EXIT_FAILURE;
139 }
141 // Run network
142 bool status = RunConfiguration(opts);
143 if (!status)
144 {
145 cout << "ssd_multibox FAILED" << endl;
146 return EXIT_FAILURE;
147 }
149 cout << "ssd_multibox PASSED" << endl;
150 return EXIT_SUCCESS;
151 }
153 bool RunConfiguration(const cmdline_opts_t& opts)
154 {
155 int prob_slider = opts.output_prob_threshold;
156 // Read the TI DL configuration file
157 Configuration c;
158 std::string config_file = "../test/testvecs/config/infer/tidl_config_"
159 + opts.config + ".txt";
160 bool status = c.ReadFromFile(config_file);
161 if (!status)
162 {
163 cerr << "Error in configuration file: " << config_file << endl;
164 return false;
165 }
166 c.enableApiTrace = opts.verbose;
167 if (opts.num_eves == 0 || opts.num_dsps == 0)
168 c.runFullNet = true;
170 // setup camera/video input
171 VideoCapture cap;
172 if (! SetVideoInputOutput(cap, opts, "SSD_Multibox")) return false;
174 if (opts.is_camera_input || opts.is_video_input)
175 {
176 std::string TrackbarName("Confidence(%):");
177 createTrackbar( TrackbarName.c_str(), "SSD_Multibox",
178 &prob_slider, 100, on_trackbar );
179 std::cout << TrackbarName << std::endl;
180 }
182 // setup preprocessed input
183 ifstream ifs;
184 if (opts.is_preprocessed_input)
185 {
186 ifs.open(opts.input_file, ios::binary | ios::ate);
187 if (! ifs.good())
188 {
189 cerr << "Cannot open " << opts.input_file << endl;
190 return false;
191 }
192 num_frames_file = ((int) ifs.tellg()) /
193 (c.inWidth * c.inHeight * c.inNumChannels);
194 }
196 try
197 {
198 // Create Executors with the approriate core type, number of cores
199 // and configuration specified
200 // EVE will run layersGroupId 1 in the network, while
201 // DSP will run layersGroupId 2 in the network
202 Executor* e_eve = CreateExecutor(DeviceType::EVE, opts.num_eves, c, 1);
203 Executor* e_dsp = CreateExecutor(DeviceType::DSP, opts.num_dsps, c, 2);
204 vector<ExecutionObjectPipeline *> eops;
206 if (e_eve != nullptr && e_dsp != nullptr)
207 {
208 // Construct ExecutionObjectPipeline that utilizes multiple
209 // ExecutionObjects to process a single frame, each ExecutionObject
210 // processes one layerGroup of the network
211 //
212 // Pipeline depth can enable more optimized pipeline execution:
213 // Given one EVE and one DSP as an example, with different
214 // pipeline_depth, we have different execution behavior:
215 // If pipeline_depth is set to 1,
216 // we create one EOP: eop0 (eve0, dsp0)
217 // pipeline execution of multiple frames over time is as follows:
218 // --------------------- time ------------------->
219 // eop0: [eve0...][dsp0]
220 // eop0: [eve0...][dsp0]
221 // eop0: [eve0...][dsp0]
222 // eop0: [eve0...][dsp0]
223 // If pipeline_depth is set to 2,
224 // we create two EOPs: eop0 (eve0, dsp0), eop1(eve0, dsp0)
225 // pipeline execution of multiple frames over time is as follows:
226 // --------------------- time ------------------->
227 // eop0: [eve0...][dsp0]
228 // eop1: [eve0...][dsp0]
229 // eop0: [eve0...][dsp0]
230 // eop1: [eve0...][dsp0]
231 // Additional benefit of setting pipeline_depth to 2 is that
232 // it can also overlap host ReadFrame() with device processing:
233 // --------------------- time ------------------->
234 // eop0: [RF][eve0...][dsp0]
235 // eop1: [RF] [eve0...][dsp0]
236 // eop0: [RF][eve0...][dsp0]
237 // eop1: [RF][eve0...][dsp0]
238 uint32_t pipeline_depth = 2; // 2 EOs in EOP -> depth 2
239 for (uint32_t j = 0; j < pipeline_depth; j++)
240 for (uint32_t i = 0; i < max(opts.num_eves, opts.num_dsps); i++)
241 eops.push_back(new ExecutionObjectPipeline(
242 {(*e_eve)[i%opts.num_eves], (*e_dsp)[i%opts.num_dsps]}));
243 }
244 else
245 {
246 // Construct ExecutionObjectPipeline that utilizes a
247 // ExecutionObject to process a single frame, each ExecutionObject
248 // processes the full network
249 //
250 // Use duplicate EOPs to do double buffering on frame input/output
251 // because each EOP has its own set of input/output buffers,
252 // so that host ReadFrame() can overlap device processing
253 // Use one EO as an example, with different buffer_factor,
254 // we have different execution behavior:
255 // If buffer_factor is set to 1 -> single buffering
256 // we create one EOP: eop0 (eo0)
257 // pipeline execution of multiple frames over time is as follows:
258 // --------------------- time ------------------->
259 // eop0: [RF][eo0.....][WF]
260 // eop0: [RF][eo0.....][WF]
261 // eop0: [RF][eo0.....][WF]
262 // If buffer_factor is set to 2 -> double buffering
263 // we create two EOPs: eop0 (eo0), eop1(eo0)
264 // pipeline execution of multiple frames over time is as follows:
265 // --------------------- time ------------------->
266 // eop0: [RF][eo0.....][WF]
267 // eop1: [RF] [eo0.....][WF]
268 // eop0: [RF] [eo0.....][WF]
269 // eop1: [RF] [eo0.....][WF]
270 uint32_t buffer_factor = 2; // set to 1 for single buffering
271 for (uint32_t j = 0; j < buffer_factor; j++)
272 {
273 for (uint32_t i = 0; i < opts.num_eves; i++)
274 eops.push_back(new ExecutionObjectPipeline({(*e_eve)[i]}));
275 for (uint32_t i = 0; i < opts.num_dsps; i++)
276 eops.push_back(new ExecutionObjectPipeline({(*e_dsp)[i]}));
277 }
278 }
279 uint32_t num_eops = eops.size();
281 // Allocate input/output memory for each EOP
282 AllocateMemory(eops);
284 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
285 tloop0 = chrono::steady_clock::now();
287 // Process frames with available eops in a pipelined manner
288 // additional num_eops iterations to flush pipeline (epilogue)
289 for (uint32_t frame_idx = 0;
290 frame_idx < opts.num_frames + num_eops; frame_idx++)
291 {
292 ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];
294 // Wait for previous frame on the same eop to finish processing
295 if (eop->ProcessFrameWait())
296 WriteFrameOutput(*eop, c, opts, (float)prob_slider);
298 // Read a frame and start processing it with current eo
299 if (ReadFrame(*eop, frame_idx, c, opts, cap, ifs))
300 eop->ProcessFrameStartAsync();
301 }
303 tloop1 = chrono::steady_clock::now();
304 chrono::duration<float> elapsed = tloop1 - tloop0;
305 cout << "Loop total time (including read/write/opencv/print/etc): "
306 << setw(6) << setprecision(4)
307 << (elapsed.count() * 1000) << "ms" << endl;
309 FreeMemory(eops);
310 for (auto eop : eops) delete eop;
311 delete e_eve;
312 delete e_dsp;
313 }
314 catch (tidl::Exception &e)
315 {
316 cerr << e.what() << endl;
317 status = false;
318 }
320 return status;
321 }
323 // Create an Executor with the specified type and number of EOs
324 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
325 int layers_group_id)
326 {
327 if (num == 0) return nullptr;
329 DeviceIds ids;
330 for (uint32_t i = 0; i < num; i++)
331 ids.insert(static_cast<DeviceId>(i));
333 Executor* e = new Executor(dt, ids, c, layers_group_id);
334 assert(e != nullptr);
335 return e;
336 }
338 bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
339 const Configuration& c, const cmdline_opts_t& opts,
340 VideoCapture &cap, ifstream &ifs)
341 {
342 if ((uint32_t)frame_idx >= opts.num_frames)
343 return false;
345 eop.SetFrameIndex(frame_idx);
347 char* frame_buffer = eop.GetInputBufferPtr();
348 assert (frame_buffer != nullptr);
349 int channel_size = c.inWidth * c.inHeight;
350 int frame_size = channel_size * c.inNumChannels;
352 Mat image;
353 if (!opts.is_camera_input && !opts.is_video_input)
354 {
355 if (opts.is_preprocessed_input)
356 {
357 orig_width = c.inWidth;
358 orig_height = c.inHeight;
359 ifs.seekg((frame_idx % num_frames_file) * frame_size);
360 ifs.read(frame_buffer, frame_size);
361 return ifs.good();
362 }
363 else
364 {
365 image = cv::imread(opts.input_file, CV_LOAD_IMAGE_COLOR);
366 if (image.empty())
367 {
368 cerr << "Unable to read from: " << opts.input_file << endl;
369 return false;
370 }
371 }
372 }
373 else
374 {
375 if(opts.is_camera_input)
376 {
377 if (! cap.grab()) return false;
378 if (! cap.retrieve(image)) return false;
379 }
380 else
381 { // Video clip
382 if (cap.grab())
383 {
384 if (! cap.retrieve(image)) return false;
385 } else {
386 //Rewind!
387 std::cout << "Video clip rewinded!" << std::endl;
388 cap.set(CAP_PROP_POS_FRAMES, 0);
389 if (! cap.grab()) return false;
390 if (! cap.retrieve(image)) return false;
391 }
392 }
393 }
395 // Scale to network input size:
396 Mat s_image, bgr_frames[3];
397 orig_width = image.cols;
398 orig_height = image.rows;
399 if (!opts.is_camera_input && !opts.is_video_input)
400 {
401 cv::resize(image, s_image, Size(c.inWidth, c.inHeight),
402 0, 0, cv::INTER_AREA);
403 }
404 else
405 {
406 // Preserve aspect ratio, by doing central cropping
407 // Choose vertical or horizontal central cropping
408 // based on dimension reduction
409 if(orig_width > orig_height)
410 {
411 float change_width = (float)c.inWidth / (float)orig_width;
412 float change_height = (float)c.inHeight / (float)orig_height;
413 if(change_width < change_height)
414 {
415 // E.g. for 1920x1080->512x512, we first crop central part
416 // roi(420, 0, 1080, 1080), then resize to (512x512)
417 int offset_x = (int)round(0.5 * ((float)orig_width -
418 ((float)orig_height * (float)c.inWidth / (float)c.inHeight)));
419 cv::resize(image(Rect(offset_x, 0, orig_width - 2 * offset_x,
420 orig_height)), s_image,
421 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
422 } else {
423 // E.g. for 1920x1080->768x320, we first crop central part
424 // roi(0, 140, 1920, 800), then resize to (768x320)
425 int offset_y = (int)round(0.5 * ((float)orig_height -
426 ((float)orig_width * (float)c.inHeight / (float)c.inWidth)));
427 cv::resize(image(Rect(0, offset_y, orig_width,
428 orig_height - 2 * offset_y)), s_image,
429 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
430 }
431 } else {
432 // E.g. for 540x960->512x512, we first crop central part
433 // roi(0, 210, 540, 540), then resize to (512x512)
434 // E.g. for 540x960->768x320, we first crop central part
435 // roi(0, 367, 540, 225), then resize to (768x320)
436 int offset_y = (int)round(0.5 * ((float)orig_height -
437 ((float)orig_width * (float)c.inHeight / (float)c.inWidth)));
438 cv::resize(image(Rect(0, offset_y, orig_width, orig_height -
439 2 * offset_y)), s_image,
440 Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
441 }
442 }
444 #ifdef DEBUG_FILES
445 {
446 // Image files can be converted into video using, example script
447 // (on desktop Ubuntu, with ffmpeg installed):
448 // ffmpeg -i netin_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
449 // Update width 768, height 320, if necessary
450 char netin_name[80];
451 sprintf(netin_name, "netin_%04d.png", frame_idx);
452 cv::imwrite(netin_name, s_image);
453 std::cout << "Video input, width:" << orig_width << " height:"
454 << orig_height << " Network width:" << c.inWidth
455 << " height:" << c.inHeight << std::endl;
456 }
457 #endif
459 cv::split(s_image, bgr_frames);
460 memcpy(frame_buffer, bgr_frames[0].ptr(), channel_size);
461 memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
462 memcpy(frame_buffer+2*channel_size, bgr_frames[2].ptr(), channel_size);
463 return true;
464 }
466 // Create frame with boxes drawn around classified objects
467 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
468 const Configuration& c, const cmdline_opts_t& opts,
469 float confidence_value)
470 {
471 // Asseembly original frame
472 int width = c.inWidth;
473 int height = c.inHeight;
474 int channel_size = width * height;
475 Mat frame, bgr[3];
477 unsigned char *in = (unsigned char *) eop.GetInputBufferPtr();
478 bgr[0] = Mat(height, width, CV_8UC(1), in);
479 bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
480 bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
481 cv::merge(bgr, 3, frame);
483 int frame_index = eop.GetFrameIndex();
484 char outfile_name[64];
485 if (opts.is_preprocessed_input)
486 {
487 snprintf(outfile_name, 64, "frame_%d.png", frame_index);
488 cv::imwrite(outfile_name, frame);
489 printf("Saving frame %d to: %s\n", frame_index, outfile_name);
490 }
492 // Draw boxes around classified objects
493 float *out = (float *) eop.GetOutputBufferPtr();
494 int num_floats = eop.GetOutputBufferSizeInBytes() / sizeof(float);
495 for (int i = 0; i < num_floats / 7; i++)
496 {
497 int index = (int) out[i * 7 + 0];
498 if (index < 0) break;
500 float score = out[i * 7 + 2];
501 if (score * 100 < confidence_value) continue;
503 int label = (int) out[i * 7 + 1];
504 int xmin = (int) (out[i * 7 + 3] * width);
505 int ymin = (int) (out[i * 7 + 4] * height);
506 int xmax = (int) (out[i * 7 + 5] * width);
507 int ymax = (int) (out[i * 7 + 6] * height);
509 const ObjectClass& object_class = object_classes->At(label);
511 if(opts.verbose) {
512 printf("%2d: (%d, %d) -> (%d, %d): %s, score=%f\n",
513 i, xmin, ymin, xmax, ymax, object_class.label.c_str(), score);
514 }
516 if (xmin < 0) xmin = 0;
517 if (ymin < 0) ymin = 0;
518 if (xmax > width) xmax = width;
519 if (ymax > height) ymax = height;
520 cv::rectangle(frame, Point(xmin, ymin), Point(xmax, ymax),
521 Scalar(object_class.color.blue,
522 object_class.color.green,
523 object_class.color.red), 2);
524 }
526 if (opts.is_camera_input || opts.is_video_input)
527 {
528 cv::imshow("SSD_Multibox", frame);
529 #ifdef DEBUG_FILES
530 // Image files can be converted into video using, example script
531 // (on desktop Ubuntu, with ffmpeg installed):
532 // ffmpeg -i multibox_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
533 // Update width 768, height 320, if necessary
534 snprintf(outfile_name, 64, "multibox_%04d.png", frame_index);
535 cv::imwrite(outfile_name, r_frame);
536 #endif
537 waitKey(1);
538 }
539 else
540 {
541 // Resize to output width/height, keep aspect ratio
542 Mat r_frame;
543 uint32_t output_width = opts.output_width;
544 if (output_width == 0) output_width = orig_width;
545 uint32_t output_height = (output_width*1.0f) / orig_width * orig_height;
546 cv::resize(frame, r_frame, Size(output_width, output_height));
548 snprintf(outfile_name, 64, "multibox_%d.png", frame_index);
549 cv::imwrite(outfile_name, frame);
550 printf("Saving frame %d with SSD multiboxes to: %s\n",
551 frame_index, outfile_name);
552 }
554 return true;
555 }
557 void DisplayHelp()
558 {
559 std::cout <<
560 "Usage: ssd_multibox\n"
561 " Will run partitioned ssd_multibox network to perform "
562 "multi-objects detection\n"
563 " and classification. First part of network "
564 "(layersGroupId 1) runs on EVE,\n"
565 " second part (layersGroupId 2) runs on DSP.\n"
566 " Use -c to run a different segmentation network. Default is jdetnet_voc.\n"
567 "Optional arguments:\n"
568 " -c <config> Valid configs: jdetnet_voc, jdetnet \n"
569 " -d <number> Number of dsp cores to use\n"
570 " -e <number> Number of eve cores to use\n"
571 " -i <image> Path to the image file as input\n"
572 " Default are 9 frames in testvecs\n"
573 " -i camera<number> Use camera as input\n"
574 " video input port: /dev/video<number>\n"
575 " -i <name>.{mp4,mov,avi} Use video file as input\n"
576 " -l <objects_list> Path to the object classes list file\n"
577 " -f <number> Number of frames to process\n"
578 " -w <number> Output image/video width\n"
579 " -p <number> Output probability threshold in percentage\n"
580 " Default is 25 percent or higher\n"
581 " -v Verbose output during execution\n"
582 " -h Help\n";
583 }