b302cfa7529128f69580152adcc17169af6db815
1 /******************************************************************************
2 * Copyright (c) 2018, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <signal.h>
29 #include <getopt.h>
30 #include <iostream>
31 #include <iomanip>
32 #include <fstream>
33 #include <cassert>
34 #include <string>
35 #include <functional>
36 #include <algorithm>
37 #include <time.h>
38 #include <unistd.h>
40 #include <queue>
41 #include <vector>
42 #include <cstdio>
44 #include "executor.h"
45 #include "execution_object.h"
46 #include "execution_object_pipeline.h"
47 #include "configuration.h"
48 #include "../segmentation/object_classes.h"
50 #include "opencv2/core.hpp"
51 #include "opencv2/imgproc.hpp"
52 #include "opencv2/highgui.hpp"
53 #include "opencv2/videoio.hpp"
55 #define NUM_VIDEO_FRAMES 100
56 #define DEFAULT_CONFIG "jdetnet"
57 #define DEFAULT_INPUT "../test/testvecs/input/preproc_0_768x320.y"
59 bool __TI_show_debug_ = false;
60 bool is_default_input = false;
61 bool is_preprocessed_input = false;
62 bool is_camera_input = false;
63 int orig_width;
64 int orig_height;
65 object_class_table_t *object_class_table;
67 using namespace tidl;
68 using namespace cv;
71 bool RunConfiguration(const std::string& config_file,
72 uint32_t num_dsps, uint32_t num_eves,
73 DeviceType device_type, std::string& input_file);
74 bool ReadFrame(ExecutionObjectPipeline& eop, int frame_idx,
75 const Configuration& configuration, int num_frames,
76 std::string& image_file, VideoCapture &cap);
77 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
78 const Configuration& configuration);
80 void ReportTime(int frame_index, std::string device_name, double elapsed_host,
81 double elapsed_device);
83 static void ProcessArgs(int argc, char *argv[],
84 std::string& config,
85 uint32_t& num_dsps,
86 uint32_t& num_eves,
87 DeviceType& device_type,
88 std::string& input_file);
90 static void DisplayHelp();
92 static double ms_diff(struct timespec &t0, struct timespec &t1)
93 { return (t1.tv_sec - t0.tv_sec) * 1e3 + (t1.tv_nsec - t0.tv_nsec) / 1e6; }
96 int main(int argc, char *argv[])
97 {
98 // Catch ctrl-c to ensure a clean exit
99 signal(SIGABRT, exit);
100 signal(SIGTERM, exit);
102 // If there are no devices capable of offloading TIDL on the SoC, exit
103 uint32_t num_eve = Executor::GetNumDevices(DeviceType::EVE);
104 uint32_t num_dsp = Executor::GetNumDevices(DeviceType::DSP);
105 if (num_eve == 0 || num_dsp == 0)
106 {
107 std::cout << "ssd_multibox requires both EVE and DSP for execution."
108 << std::endl;
109 return EXIT_SUCCESS;
110 }
112 // Process arguments
113 std::string config = DEFAULT_CONFIG;
114 std::string input_file = DEFAULT_INPUT;
115 uint32_t num_dsps = 1;
116 uint32_t num_eves = 1;
117 DeviceType device_type = DeviceType::EVE;
118 ProcessArgs(argc, argv, config, num_dsps, num_eves,
119 device_type, input_file);
121 if ((object_class_table = GetObjectClassTable(config)) == nullptr)
122 {
123 std::cout << "No object classes defined for this config." << std::endl;
124 return EXIT_FAILURE;
125 }
127 if (input_file == DEFAULT_INPUT) is_default_input = true;
128 if (input_file == "camera") is_camera_input = true;
129 if (input_file.length() > 2 &&
130 input_file.compare(input_file.length() - 2, 2, ".y") == 0)
131 is_preprocessed_input = true;
132 std::cout << "Input: " << input_file << std::endl;
133 std::string config_file = "../test/testvecs/config/infer/tidl_config_"
134 + config + ".txt";
135 bool status = RunConfiguration(config_file, num_dsps, num_eves,
136 device_type, input_file);
138 if (!status)
139 {
140 std::cout << "ssd_multibox FAILED" << std::endl;
141 return EXIT_FAILURE;
142 }
144 std::cout << "ssd_multibox PASSED" << std::endl;
145 return EXIT_SUCCESS;
146 }
148 bool RunConfiguration(const std::string& config_file,
149 uint32_t num_dsps, uint32_t num_eves,
150 DeviceType device_type, std::string& input_file)
151 {
152 DeviceIds ids_eve, ids_dsp;
153 for (int i = 0; i < num_eves; i++)
154 ids_eve.insert(static_cast<DeviceId>(i));
155 for (int i = 0; i < num_dsps; i++)
156 ids_dsp.insert(static_cast<DeviceId>(i));
158 // Read the TI DL configuration file
159 Configuration configuration;
160 bool status = configuration.ReadFromFile(config_file);
161 if (!status)
162 {
163 std::cerr << "Error in configuration file: " << config_file
164 << std::endl;
165 return false;
166 }
168 // setup input
169 int num_frames = is_default_input ? 9 : 9;
170 VideoCapture cap;
171 std::string image_file;
172 if (is_camera_input)
173 {
174 cap = VideoCapture(1); // cap = VideoCapture("test.mp4");
175 if (! cap.isOpened())
176 {
177 std::cerr << "Cannot open camera input." << std::endl;
178 return false;
179 }
180 num_frames = NUM_VIDEO_FRAMES;
181 namedWindow("SSD_Multibox", WINDOW_AUTOSIZE | CV_GUI_NORMAL);
182 }
183 else
184 {
185 image_file = input_file;
186 }
188 try
189 {
190 // Create a executor with the approriate core type, number of cores
191 // and configuration specified
192 // EVE will run layersGroupId 1 in the network, while
193 // DSP will run layersGroupId 2 in the network
194 Executor exe_eve(DeviceType::EVE, ids_eve, configuration, 1);
195 Executor exe_dsp(DeviceType::DSP, ids_dsp, configuration, 2);
197 // Construct ExecutionObjectPipeline that utilizes multiple
198 // ExecutionObjects to process a single frame, each ExecutionObject
199 // processes one layerGroup of the network
200 int num_eops = std::max(num_eves, num_dsps);
201 std::vector<ExecutionObjectPipeline *> eops;
202 for (int i = 0; i < num_eops; i++)
203 eops.push_back(new ExecutionObjectPipeline({exe_eve[i%num_eves],
204 exe_dsp[i%num_dsps]}));
206 // Allocate input/output memory for each EOP
207 std::vector<void *> buffers;
208 for (auto eop : eops)
209 {
210 size_t in_size = eop->GetInputBufferSizeInBytes();
211 size_t out_size = eop->GetOutputBufferSizeInBytes();
212 void* in_ptr = malloc(in_size);
213 void* out_ptr = malloc(out_size);
214 assert(in_ptr != nullptr && out_ptr != nullptr);
215 buffers.push_back(in_ptr);
216 buffers.push_back(out_ptr);
218 ArgInfo in(in_ptr, in_size);
219 ArgInfo out(out_ptr, out_size);
220 eop->SetInputOutputBuffer(in, out);
221 }
223 struct timespec tloop0, tloop1;
224 clock_gettime(CLOCK_MONOTONIC, &tloop0);
226 // Process frames with ExecutionObjectPipelines in a pipelined manner
227 // additional num_eops iterations to flush pipeline (epilogue)
228 for (int frame_idx = 0; frame_idx < num_frames + num_eops; frame_idx++)
229 {
230 ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];
232 // Wait for previous frame on the same eop to finish processing
233 if (eop->ProcessFrameWait())
234 {
235 ReportTime(eop->GetFrameIndex(), eop->GetDeviceName(),
236 eop->GetHostProcessTimeInMilliSeconds(),
237 eop->GetProcessTimeInMilliSeconds());
238 WriteFrameOutput(*eop, configuration);
239 }
241 // Read a frame and start processing it with current eo
242 if (ReadFrame(*eop, frame_idx, configuration, num_frames,
243 image_file, cap))
244 {
245 eop->ProcessFrameStartAsync();
246 }
247 }
249 clock_gettime(CLOCK_MONOTONIC, &tloop1);
250 std::cout << "Loop total time (including read/write/print/etc): "
251 << std::setw(6) << std::setprecision(4)
252 << ms_diff(tloop0, tloop1) << "ms" << std::endl;
254 for (auto eop : eops)
255 delete eop;
256 for (auto b : buffers)
257 free(b);
258 }
259 catch (tidl::Exception &e)
260 {
261 std::cerr << e.what() << std::endl;
262 status = false;
263 }
265 return status;
266 }
268 void ReportTime(int frame_index, std::string device_name, double elapsed_host,
269 double elapsed_device)
270 {
271 double overhead = 100 - (elapsed_device/elapsed_host*100);
272 std::cout << "frame[" << frame_index << "]: "
273 << "Time on " << device_name << ": "
274 << std::setw(6) << std::setprecision(4)
275 << elapsed_device << "ms, "
276 << "host: "
277 << std::setw(6) << std::setprecision(4)
278 << elapsed_host << "ms ";
279 std::cout << "API overhead: "
280 << std::setw(6) << std::setprecision(3)
281 << overhead << " %" << std::endl;
282 }
285 bool ReadFrame(ExecutionObjectPipeline& eop, int frame_idx,
286 const Configuration& configuration, int num_frames,
287 std::string& image_file, VideoCapture &cap)
288 {
289 if (frame_idx >= num_frames)
290 return false;
291 eop.SetFrameIndex(frame_idx);
293 char* frame_buffer = eop.GetInputBufferPtr();
294 assert (frame_buffer != nullptr);
295 int channel_size = configuration.inWidth * configuration.inHeight;
297 Mat image;
298 if (! image_file.empty())
299 {
300 if (is_preprocessed_input)
301 {
302 std::ifstream ifs(image_file, std::ios::binary);
303 //ifs.seekg(frame_idx * channel_size * 3);
304 ifs.read(frame_buffer, channel_size * 3);
305 bool ifs_status = ifs.good();
306 ifs.close();
307 orig_width = configuration.inWidth;
308 orig_height = configuration.inHeight;
309 return ifs_status; // already PreProc-ed
310 }
311 else
312 {
313 image = cv::imread(image_file, CV_LOAD_IMAGE_COLOR);
314 if (image.empty())
315 {
316 std::cerr << "Unable to read from: " << image_file << std::endl;
317 return false;
318 }
319 }
320 }
321 else
322 {
323 // 640x480 camera input, process one in every 5 frames,
324 // can adjust number of skipped frames to match real time processing
325 if (! cap.grab()) return false;
326 if (! cap.grab()) return false;
327 if (! cap.grab()) return false;
328 if (! cap.grab()) return false;
329 if (! cap.grab()) return false;
330 if (! cap.retrieve(image)) return false;
331 }
333 // scale to network input size
334 Mat s_image, bgr_frames[3];
335 orig_width = image.cols;
336 orig_height = image.rows;
337 cv::resize(image, s_image,
338 Size(configuration.inWidth, configuration.inHeight),
339 0, 0, cv::INTER_AREA);
340 cv::split(s_image, bgr_frames);
341 memcpy(frame_buffer, bgr_frames[0].ptr(), channel_size);
342 memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
343 memcpy(frame_buffer+2*channel_size, bgr_frames[2].ptr(), channel_size);
344 return true;
345 }
347 // Create frame with boxes drawn around classified objects
348 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
349 const Configuration& configuration)
350 {
351 // Asseembly original frame
352 int width = configuration.inWidth;
353 int height = configuration.inHeight;
354 int channel_size = width * height;
355 Mat frame, r_frame, bgr[3];
357 unsigned char *in = (unsigned char *) eop.GetInputBufferPtr();
358 bgr[0] = Mat(height, width, CV_8UC(1), in);
359 bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
360 bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
361 cv::merge(bgr, 3, frame);
363 int frame_index = eop.GetFrameIndex();
364 char outfile_name[64];
365 if (! is_camera_input && is_preprocessed_input)
366 {
367 snprintf(outfile_name, 64, "frame_%d.png", frame_index);
368 cv::imwrite(outfile_name, frame);
369 printf("Saving frame %d to: %s\n", frame_index, outfile_name);
370 }
372 // Draw boxes around classified objects
373 float *out = (float *) eop.GetOutputBufferPtr();
374 int num_floats = eop.GetOutputBufferSizeInBytes() / sizeof(float);
375 for (int i = 0; i < num_floats / 7; i++)
376 {
377 int index = (int) out[i * 7 + 0];
378 if (index < 0) break;
380 int label = (int) out[i * 7 + 1];
381 float score = out[i * 7 + 2];
382 int xmin = (int) (out[i * 7 + 3] * width);
383 int ymin = (int) (out[i * 7 + 4] * height);
384 int xmax = (int) (out[i * 7 + 5] * width);
385 int ymax = (int) (out[i * 7 + 6] * height);
387 object_class_t *object_class = GetObjectClass(object_class_table,
388 label);
389 if (object_class == nullptr) continue;
391 #if 0
392 printf("(%d, %d) -> (%d, %d): %s, score=%f\n",
393 xmin, ymin, xmax, ymax, object_class->label, score);
394 #endif
396 cv::rectangle(frame, Point(xmin, ymin), Point(xmax, ymax),
397 Scalar(object_class->color.blue,
398 object_class->color.green,
399 object_class->color.red), 2);
400 }
402 // output
403 cv::resize(frame, r_frame, Size(orig_width, orig_height));
404 if (is_camera_input)
405 {
406 cv::imshow("SSD_Multibox", r_frame);
407 waitKey(1);
408 }
409 else
410 {
411 snprintf(outfile_name, 64, "multibox_%d.png", frame_index);
412 cv::imwrite(outfile_name, r_frame);
413 printf("Saving frame %d with SSD multiboxes to: %s\n",
414 frame_index, outfile_name);
415 }
417 return true;
418 }
421 void ProcessArgs(int argc, char *argv[], std::string& config,
422 uint32_t& num_dsps, uint32_t& num_eves,
423 DeviceType& device_type, std::string& input_file)
424 {
425 const struct option long_options[] =
426 {
427 {"config", required_argument, 0, 'c'},
428 {"num_dsps", required_argument, 0, 'd'},
429 {"num_eves", required_argument, 0, 'e'},
430 {"image_file", required_argument, 0, 'i'},
431 {"help", no_argument, 0, 'h'},
432 {"verbose", no_argument, 0, 'v'},
433 {0, 0, 0, 0}
434 };
436 int option_index = 0;
438 while (true)
439 {
440 int c = getopt_long(argc, argv, "c:d:e:i:hv", long_options,
441 &option_index);
443 if (c == -1)
444 break;
446 switch (c)
447 {
448 case 'c': config = optarg;
449 break;
451 case 'd': num_dsps = atoi(optarg);
452 assert (num_dsps > 0 && num_dsps <=
453 Executor::GetNumDevices(DeviceType::DSP));
454 break;
456 case 'e': num_eves = atoi(optarg);
457 assert (num_eves > 0 && num_eves <=
458 Executor::GetNumDevices(DeviceType::EVE));
459 break;
461 case 'i': input_file = optarg;
462 break;
464 case 'v': __TI_show_debug_ = true;
465 break;
467 case 'h': DisplayHelp();
468 exit(EXIT_SUCCESS);
469 break;
471 case '?': // Error in getopt_long
472 exit(EXIT_FAILURE);
473 break;
475 default:
476 std::cerr << "Unsupported option: " << c << std::endl;
477 break;
478 }
479 }
480 }
482 void DisplayHelp()
483 {
484 std::cout << "Usage: ssd_multibox\n"
485 " Will run partitioned ssd_multibox network to perform "
486 "multi-objects detection\n"
487 " and classification. First part of network "
488 "(layersGroupId 1) runs on EVE,\n"
489 " second part (layersGroupId 2) runs on DSP.\n"
490 " Use -c to run a different segmentation network. "
491 "Default is jdetnet.\n"
492 "Optional arguments:\n"
493 " -c <config> Valid configs: jdetnet \n"
494 " -d <number> Number of dsp cores to use\n"
495 " -e <number> Number of eve cores to use\n"
496 " -i <image> Path to the image file\n"
497 " Default is 1 frame in testvecs\n"
498 " -i camera Use camera as input\n"
499 " -v Verbose output during execution\n"
500 " -h Help\n";
501 }