35294c4d444a302dbf203369317d7adde1313a1b
1 /******************************************************************************
2 * Copyright (c) 2018, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <signal.h>
29 #include <iostream>
30 #include <iomanip>
31 #include <fstream>
32 #include <cassert>
33 #include <string>
34 #include <functional>
35 #include <algorithm>
36 #include <time.h>
37 #include <unistd.h>
39 #include <queue>
40 #include <vector>
41 #include <cstdio>
42 #include <chrono>
44 #include "executor.h"
45 #include "execution_object.h"
46 #include "configuration.h"
47 #include "object_classes.h"
48 #include "../common/utils.h"
49 #include "../common/video_utils.h"
51 using namespace std;
52 using namespace tidl;
53 using namespace cv;
56 #define NUM_VIDEO_FRAMES 300
57 #define DEFAULT_CONFIG "jseg21_tiscapes"
58 #define DEFAULT_INPUT "../test/testvecs/input/000100_1024x512_bgr.y"
59 #define DEFAULT_INPUT_FRAMES (9)
61 object_class_table_t *object_class_table;
62 uint32_t orig_width;
63 uint32_t orig_height;
66 bool RunConfiguration(const cmdline_opts_t& opts);
67 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c);
68 bool ReadFrame(ExecutionObjectPipeline& eop,
69 uint32_t frame_idx, const Configuration& c,
70 const cmdline_opts_t& opts, VideoCapture &cap);
71 bool WriteFrameOutput(const ExecutionObjectPipeline &eop,
72 const Configuration& c, const cmdline_opts_t& opts);
73 void DisplayHelp();
76 int main(int argc, char *argv[])
77 {
78 // Catch ctrl-c to ensure a clean exit
79 signal(SIGABRT, exit);
80 signal(SIGTERM, exit);
82 // If there are no devices capable of offloading TIDL on the SoC, exit
83 uint32_t num_eves = Executor::GetNumDevices(DeviceType::EVE);
84 uint32_t num_dsps = Executor::GetNumDevices(DeviceType::DSP);
85 if (num_eves == 0 && num_dsps == 0)
86 {
87 cout << "TI DL not supported on this SoC." << endl;
88 return EXIT_SUCCESS;
89 }
91 // Process arguments
92 cmdline_opts_t opts;
93 opts.config = DEFAULT_CONFIG;
94 if (num_eves != 0) { opts.num_eves = 1; opts.num_dsps = 0; }
95 else { opts.num_eves = 0; opts.num_dsps = 1; }
96 if (! ProcessArgs(argc, argv, opts))
97 {
98 DisplayHelp();
99 exit(EXIT_SUCCESS);
100 }
101 assert(opts.num_dsps != 0 || opts.num_eves != 0);
102 if (opts.num_frames == 0)
103 opts.num_frames = (opts.is_camera_input || opts.is_video_input) ?
104 NUM_VIDEO_FRAMES :
105 (opts.input_file.empty() ? DEFAULT_INPUT_FRAMES : 1);
106 if (opts.input_file.empty())
107 cout << "Input: " << DEFAULT_INPUT << endl;
108 else
109 cout << "Input: " << opts.input_file << endl;
111 // Get object class table
112 if ((object_class_table = GetObjectClassTable(opts.config)) == nullptr)
113 {
114 cout << "No object classes defined for this config." << endl;
115 return EXIT_FAILURE;
116 }
118 // Run network
119 bool status = RunConfiguration(opts);
120 if (!status)
121 {
122 cout << "segmentation FAILED" << endl;
123 return EXIT_FAILURE;
124 }
126 cout << "segmentation PASSED" << endl;
127 return EXIT_SUCCESS;
128 }
130 bool RunConfiguration(const cmdline_opts_t& opts)
131 {
132 // Read the TI DL configuration file
133 Configuration c;
134 std::string config_file = "../test/testvecs/config/infer/tidl_config_"
135 + opts.config + ".txt";
136 bool status = c.ReadFromFile(config_file);
137 if (!status)
138 {
139 cerr << "Error in configuration file: " << config_file << endl;
140 return false;
141 }
142 c.enableApiTrace = opts.verbose;
144 // setup camera/video input/output
145 VideoCapture cap;
146 if (! SetVideoInputOutput(cap, opts, "Segmentation")) return false;
148 try
149 {
150 // Create Executors with the approriate core type, number of cores
151 // and configuration specified
152 Executor* e_eve = CreateExecutor(DeviceType::EVE, opts.num_eves, c);
153 Executor* e_dsp = CreateExecutor(DeviceType::DSP, opts.num_dsps, c);
155 // Get ExecutionObjects from Executors
156 vector<ExecutionObject*> eos;
157 for (uint32_t i = 0; i < opts.num_eves; i++) eos.push_back((*e_eve)[i]);
158 for (uint32_t i = 0; i < opts.num_dsps; i++) eos.push_back((*e_dsp)[i]);
159 uint32_t num_eos = eos.size();
161 // Use duplicate EOPs to do double buffering on frame input/output
162 // because each EOP has its own set of input/output buffers,
163 // so that host ReadFrame() can be overlapped with device processing
164 // Use one EO as an example, with different buffer_factor,
165 // we have different execution behavior:
166 // If buffer_factor is set to 1 -> single buffering
167 // we create one EOP: eop0 (eo0)
168 // pipeline execution of multiple frames over time is as follows:
169 // --------------------- time ------------------->
170 // eop0: [RF][eo0.....][WF]
171 // eop0: [RF][eo0.....][WF]
172 // eop0: [RF][eo0.....][WF]
173 // If buffer_factor is set to 2 -> double buffering
174 // we create two EOPs: eop0 (eo0), eop1(eo0)
175 // pipeline execution of multiple frames over time is as follows:
176 // --------------------- time ------------------->
177 // eop0: [RF][eo0.....][WF]
178 // eop1: [RF] [eo0.....][WF]
179 // eop0: [RF] [eo0.....][WF]
180 // eop1: [RF] [eo0.....][WF]
181 vector<ExecutionObjectPipeline *> eops;
182 uint32_t buffer_factor = 2; // set to 1 for single buffering
183 for (uint32_t j = 0; j < buffer_factor; j++)
184 for (uint32_t i = 0; i < num_eos; i++)
185 eops.push_back(new ExecutionObjectPipeline({eos[i]}));
186 uint32_t num_eops = eops.size();
188 // Allocate input and output buffers for each EOP
189 AllocateMemory(eops);
191 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
192 tloop0 = chrono::steady_clock::now();
194 // Process frames with available eops in a pipelined manner
195 // additional num_eos iterations to flush the pipeline (epilogue)
196 for (uint32_t frame_idx = 0;
197 frame_idx < opts.num_frames + num_eops; frame_idx++)
198 {
199 ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];
201 // Wait for previous frame on the same eop to finish processing
202 if (eop->ProcessFrameWait())
203 {
204 ReportTime(eop);
205 WriteFrameOutput(*eop, c, opts);
206 }
208 // Read a frame and start processing it with current eop
209 if (ReadFrame(*eop, frame_idx, c, opts, cap))
210 eop->ProcessFrameStartAsync();
211 }
213 tloop1 = chrono::steady_clock::now();
214 chrono::duration<float> elapsed = tloop1 - tloop0;
215 cout << "Loop total time (including read/write/opencv/print/etc): "
216 << setw(6) << setprecision(4)
217 << (elapsed.count() * 1000) << "ms" << endl;
219 FreeMemory(eops);
220 for (auto eop : eops) delete eop;
221 delete e_eve;
222 delete e_dsp;
223 }
224 catch (tidl::Exception &e)
225 {
226 cerr << e.what() << endl;
227 status = false;
228 }
230 return status;
231 }
233 // Create an Executor with the specified type and number of EOs
234 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c)
235 {
236 if (num == 0) return nullptr;
238 DeviceIds ids;
239 for (uint32_t i = 0; i < num; i++)
240 ids.insert(static_cast<DeviceId>(i));
242 return new Executor(dt, ids, c);
243 }
245 bool ReadFrame(ExecutionObjectPipeline &eop,
246 uint32_t frame_idx, const Configuration& c,
247 const cmdline_opts_t& opts, VideoCapture &cap)
248 {
249 if (frame_idx >= opts.num_frames)
250 return false;
251 eop.SetFrameIndex(frame_idx);
253 char* frame_buffer = eop.GetInputBufferPtr();
254 assert (frame_buffer != nullptr);
255 int channel_size = c.inWidth * c.inHeight;
257 Mat image;
258 if (! opts.is_camera_input && ! opts.is_video_input)
259 {
260 if (opts.input_file.empty())
261 {
262 ifstream ifs(DEFAULT_INPUT, ios::binary);
263 ifs.seekg((frame_idx % DEFAULT_INPUT_FRAMES) * channel_size * 3);
264 ifs.read(frame_buffer, channel_size * 3);
265 bool ifs_status = ifs.good();
266 ifs.close();
267 orig_width = c.inWidth;
268 orig_height = c.inHeight;
269 return ifs_status; // already PreProc-ed
270 }
271 else
272 {
273 image = cv::imread(opts.input_file, CV_LOAD_IMAGE_COLOR);
274 if (image.empty())
275 {
276 cerr << "Unable to read from: " << opts.input_file << endl;
277 return false;
278 }
279 }
280 }
281 else
282 {
283 // 640x480 camera input, process one in every 5 frames,
284 // can adjust number of skipped frames to match real time processing
285 if (! cap.grab()) return false;
286 if (! cap.grab()) return false;
287 if (! cap.grab()) return false;
288 if (! cap.grab()) return false;
289 if (! cap.grab()) return false;
290 if (! cap.retrieve(image)) return false;
291 }
293 // scale to network input size 1024 x 512
294 Mat s_image, bgr_frames[3];
295 orig_width = image.cols;
296 orig_height = image.rows;
297 cv::resize(image, s_image, Size(c.inWidth, c.inHeight),
298 0, 0, cv::INTER_AREA);
299 cv::split(s_image, bgr_frames);
300 memcpy(frame_buffer, bgr_frames[0].ptr(), channel_size);
301 memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
302 memcpy(frame_buffer+2*channel_size, bgr_frames[2].ptr(), channel_size);
303 return true;
304 }
306 // Create Overlay mask for pixel-level segmentation
307 void CreateMask(uchar *classes, uchar *mb, uchar *mg, uchar* mr,
308 int channel_size)
309 {
310 for (int i = 0; i < channel_size; i++)
311 {
312 object_class_t *object_class = GetObjectClass(object_class_table,
313 classes[i]);
314 mb[i] = object_class->color.blue;
315 mg[i] = object_class->color.green;
316 mr[i] = object_class->color.red;
317 }
318 }
320 // Create frame overlayed with pixel-level segmentation
321 bool WriteFrameOutput(const ExecutionObjectPipeline &eop,
322 const Configuration& c,
323 const cmdline_opts_t& opts)
324 {
325 unsigned char *out = (unsigned char *) eop.GetOutputBufferPtr();
326 int width = c.inWidth;
327 int height = c.inHeight;
328 int channel_size = width * height;
330 Mat mask, frame, blend, r_blend, bgr[3];
331 // Create overlay mask
332 bgr[0] = Mat(height, width, CV_8UC(1));
333 bgr[1] = Mat(height, width, CV_8UC(1));
334 bgr[2] = Mat(height, width, CV_8UC(1));
335 CreateMask(out, bgr[0].ptr(), bgr[1].ptr(), bgr[2].ptr(), channel_size);
336 cv::merge(bgr, 3, mask);
338 // Asseembly original frame
339 unsigned char *in = (unsigned char *) eop.GetInputBufferPtr();
340 bgr[0] = Mat(height, width, CV_8UC(1), in);
341 bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
342 bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
343 cv::merge(bgr, 3, frame);
345 // Create overlayed frame
346 cv::addWeighted(frame, 0.7, mask, 0.3, 0.0, blend);
348 // Resize to output width/height, keep aspect ratio
349 uint32_t output_width = opts.output_width;
350 if (output_width == 0) output_width = orig_width;
351 uint32_t output_height = (output_width*1.0f) / orig_width * orig_height;
352 cv::resize(blend, r_blend, Size(output_width, output_height));
354 if (opts.is_camera_input || opts.is_video_input)
355 {
356 cv::imshow("Segmentation", r_blend);
357 waitKey(1);
358 }
359 else
360 {
361 int frame_index = eop.GetFrameIndex();
362 char outfile_name[64];
363 if (opts.input_file.empty())
364 {
365 snprintf(outfile_name, 64, "frame_%d.png", frame_index);
366 cv::imwrite(outfile_name, frame);
367 printf("Saving frame %d to: %s\n", frame_index, outfile_name);
368 }
370 snprintf(outfile_name, 64, "overlay_%d.png", frame_index);
371 cv::imwrite(outfile_name, r_blend);
372 printf("Saving frame %d overlayed with segmentation to: %s\n",
373 frame_index, outfile_name);
374 }
376 return true;
377 }
379 void DisplayHelp()
380 {
381 std::cout <<
382 "Usage: segmentation\n"
383 " Will run segmentation network to perform pixel-level"
384 " classification.\n Use -c to run a different"
385 " segmentation network. Default is jseg21_tiscapes.\n"
386 "Optional arguments:\n"
387 " -c <config> Valid configs: jseg21_tiscapes, jseg21\n"
388 " -d <number> Number of dsp cores to use\n"
389 " -e <number> Number of eve cores to use\n"
390 " -i <image> Path to the image file as input\n"
391 " Default are 9 frames in testvecs\n"
392 " -i camera<number> Use camera as input\n"
393 " video input port: /dev/video<number>\n"
394 " -i <name>.{mp4,mov,avi} Use video file as input\n"
395 " -f <number> Number of frames to process\n"
396 " -w <number> Output image/video width\n"
397 " -v Verbose output during execution\n"
398 " -h Help\n";
399 }