1 /******************************************************************************
2 * Copyright (c) 2018, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <signal.h>
29 #include <iostream>
30 #include <iomanip>
31 #include <fstream>
32 #include <cassert>
33 #include <string>
34 #include <functional>
35 #include <algorithm>
36 #include <time.h>
37 #include <unistd.h>
39 #include <queue>
40 #include <vector>
41 #include <cstdio>
42 #include <chrono>
44 #include "executor.h"
45 #include "execution_object.h"
46 #include "configuration.h"
47 #include "../common/object_classes.h"
48 #include "../common/utils.h"
49 #include "../common/video_utils.h"
51 using namespace std;
52 using namespace tidl;
53 using namespace cv;
56 #define NUM_VIDEO_FRAMES 300
57 #define DEFAULT_CONFIG "jseg21_tiscapes"
58 #define DEFAULT_INPUT "../test/testvecs/input/000100_1024x512_bgr.y"
59 #define DEFAULT_INPUT_FRAMES (9)
60 #define DEFAULT_OBJECT_CLASSES_LIST_FILE "jseg21_objects.json"
62 std::unique_ptr<ObjectClasses> object_classes;
63 uint32_t orig_width;
64 uint32_t orig_height;
67 bool RunConfiguration(const cmdline_opts_t& opts);
68 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c);
69 bool ReadFrame(ExecutionObjectPipeline& eop,
70 uint32_t frame_idx, const Configuration& c,
71 const cmdline_opts_t& opts, VideoCapture &cap);
72 bool WriteFrameOutput(const ExecutionObjectPipeline &eop,
73 const Configuration& c, const cmdline_opts_t& opts);
74 void DisplayHelp();
77 int main(int argc, char *argv[])
78 {
79 // Catch ctrl-c to ensure a clean exit
80 signal(SIGABRT, exit);
81 signal(SIGTERM, exit);
83 // If there are no devices capable of offloading TIDL on the SoC, exit
84 uint32_t num_eves = Executor::GetNumDevices(DeviceType::EVE);
85 uint32_t num_dsps = Executor::GetNumDevices(DeviceType::DSP);
86 if (num_eves == 0 && num_dsps == 0)
87 {
88 cout << "TI DL not supported on this SoC." << endl;
89 return EXIT_SUCCESS;
90 }
92 // Process arguments
93 cmdline_opts_t opts;
94 opts.config = DEFAULT_CONFIG;
95 opts.object_classes_list_file = DEFAULT_OBJECT_CLASSES_LIST_FILE;
96 if (num_eves != 0) { opts.num_eves = 1; opts.num_dsps = 0; }
97 else { opts.num_eves = 0; opts.num_dsps = 1; }
98 if (! ProcessArgs(argc, argv, opts))
99 {
100 DisplayHelp();
101 exit(EXIT_SUCCESS);
102 }
103 assert(opts.num_dsps != 0 || opts.num_eves != 0);
104 if (opts.num_frames == 0)
105 opts.num_frames = (opts.is_camera_input || opts.is_video_input) ?
106 NUM_VIDEO_FRAMES :
107 (opts.input_file.empty() ? DEFAULT_INPUT_FRAMES : 1);
108 if (opts.input_file.empty())
109 cout << "Input: " << DEFAULT_INPUT << endl;
110 else
111 cout << "Input: " << opts.input_file << endl;
113 // Get object classes list
114 object_classes = std::unique_ptr<ObjectClasses>(
115 new ObjectClasses(opts.object_classes_list_file));
116 if (object_classes->GetNumClasses() == 0)
117 {
118 cout << "No object classes defined for this config." << endl;
119 return EXIT_FAILURE;
120 }
122 // Run network
123 bool status = RunConfiguration(opts);
124 if (!status)
125 {
126 cout << "segmentation FAILED" << endl;
127 return EXIT_FAILURE;
128 }
130 cout << "segmentation PASSED" << endl;
131 return EXIT_SUCCESS;
132 }
134 bool RunConfiguration(const cmdline_opts_t& opts)
135 {
136 // Read the TI DL configuration file
137 Configuration c;
138 std::string config_file = "../test/testvecs/config/infer/tidl_config_"
139 + opts.config + ".txt";
140 bool status = c.ReadFromFile(config_file);
141 if (!status)
142 {
143 cerr << "Error in configuration file: " << config_file << endl;
144 return false;
145 }
146 c.enableApiTrace = opts.verbose;
148 // setup camera/video input/output
149 VideoCapture cap;
150 if (! SetVideoInputOutput(cap, opts, "Segmentation")) return false;
152 try
153 {
154 // Create Executors with the approriate core type, number of cores
155 // and configuration specified
156 Executor* e_eve = CreateExecutor(DeviceType::EVE, opts.num_eves, c);
157 Executor* e_dsp = CreateExecutor(DeviceType::DSP, opts.num_dsps, c);
159 // Get ExecutionObjects from Executors
160 vector<ExecutionObject*> eos;
161 for (uint32_t i = 0; i < opts.num_eves; i++) eos.push_back((*e_eve)[i]);
162 for (uint32_t i = 0; i < opts.num_dsps; i++) eos.push_back((*e_dsp)[i]);
163 uint32_t num_eos = eos.size();
165 // Use duplicate EOPs to do double buffering on frame input/output
166 // because each EOP has its own set of input/output buffers,
167 // so that host ReadFrame() can be overlapped with device processing
168 // Use one EO as an example, with different buffer_factor,
169 // we have different execution behavior:
170 // If buffer_factor is set to 1 -> single buffering
171 // we create one EOP: eop0 (eo0)
172 // pipeline execution of multiple frames over time is as follows:
173 // --------------------- time ------------------->
174 // eop0: [RF][eo0.....][WF]
175 // eop0: [RF][eo0.....][WF]
176 // eop0: [RF][eo0.....][WF]
177 // If buffer_factor is set to 2 -> double buffering
178 // we create two EOPs: eop0 (eo0), eop1(eo0)
179 // pipeline execution of multiple frames over time is as follows:
180 // --------------------- time ------------------->
181 // eop0: [RF][eo0.....][WF]
182 // eop1: [RF] [eo0.....][WF]
183 // eop0: [RF] [eo0.....][WF]
184 // eop1: [RF] [eo0.....][WF]
185 vector<ExecutionObjectPipeline *> eops;
186 uint32_t buffer_factor = 2; // set to 1 for single buffering
187 for (uint32_t j = 0; j < buffer_factor; j++)
188 for (uint32_t i = 0; i < num_eos; i++)
189 eops.push_back(new ExecutionObjectPipeline({eos[i]}));
190 uint32_t num_eops = eops.size();
192 // Allocate input and output buffers for each EOP
193 AllocateMemory(eops);
195 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
196 tloop0 = chrono::steady_clock::now();
198 // Process frames with available eops in a pipelined manner
199 // additional num_eos iterations to flush the pipeline (epilogue)
200 for (uint32_t frame_idx = 0;
201 frame_idx < opts.num_frames + num_eops; frame_idx++)
202 {
203 ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];
205 // Wait for previous frame on the same eop to finish processing
206 if (eop->ProcessFrameWait())
207 {
208 WriteFrameOutput(*eop, c, opts);
209 }
211 // Read a frame and start processing it with current eop
212 if (ReadFrame(*eop, frame_idx, c, opts, cap))
213 eop->ProcessFrameStartAsync();
214 }
216 tloop1 = chrono::steady_clock::now();
217 chrono::duration<float> elapsed = tloop1 - tloop0;
218 cout << "Loop total time (including read/write/opencv/print/etc): "
219 << setw(6) << setprecision(4)
220 << (elapsed.count() * 1000) << "ms" << endl;
222 FreeMemory(eops);
223 for (auto eop : eops) delete eop;
224 delete e_eve;
225 delete e_dsp;
226 }
227 catch (tidl::Exception &e)
228 {
229 cerr << e.what() << endl;
230 status = false;
231 }
233 return status;
234 }
236 // Create an Executor with the specified type and number of EOs
237 Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c)
238 {
239 if (num == 0) return nullptr;
241 DeviceIds ids;
242 for (uint32_t i = 0; i < num; i++)
243 ids.insert(static_cast<DeviceId>(i));
245 return new Executor(dt, ids, c);
246 }
248 bool ReadFrame(ExecutionObjectPipeline &eop,
249 uint32_t frame_idx, const Configuration& c,
250 const cmdline_opts_t& opts, VideoCapture &cap)
251 {
252 if (frame_idx >= opts.num_frames)
253 return false;
254 eop.SetFrameIndex(frame_idx);
256 char* frame_buffer = eop.GetInputBufferPtr();
257 assert (frame_buffer != nullptr);
258 int channel_size = c.inWidth * c.inHeight;
260 Mat image;
261 if (! opts.is_camera_input && ! opts.is_video_input)
262 {
263 if (opts.input_file.empty())
264 {
265 ifstream ifs(DEFAULT_INPUT, ios::binary);
266 ifs.seekg((frame_idx % DEFAULT_INPUT_FRAMES) * channel_size * 3);
267 ifs.read(frame_buffer, channel_size * 3);
268 bool ifs_status = ifs.good();
269 ifs.close();
270 orig_width = c.inWidth;
271 orig_height = c.inHeight;
272 return ifs_status; // already PreProc-ed
273 }
274 else
275 {
276 image = cv::imread(opts.input_file, CV_LOAD_IMAGE_COLOR);
277 if (image.empty())
278 {
279 cerr << "Unable to read from: " << opts.input_file << endl;
280 return false;
281 }
282 }
283 }
284 else
285 {
286 // 640x480 camera input, process one in every 5 frames,
287 // can adjust number of skipped frames to match real time processing
288 if (! cap.grab()) return false;
289 if (! cap.grab()) return false;
290 if (! cap.grab()) return false;
291 if (! cap.grab()) return false;
292 if (! cap.grab()) return false;
293 if (! cap.retrieve(image)) return false;
294 }
296 // scale to network input size 1024 x 512
297 Mat s_image, bgr_frames[3];
298 orig_width = image.cols;
299 orig_height = image.rows;
300 cv::resize(image, s_image, Size(c.inWidth, c.inHeight),
301 0, 0, cv::INTER_AREA);
302 cv::split(s_image, bgr_frames);
303 memcpy(frame_buffer, bgr_frames[0].ptr(), channel_size);
304 memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
305 memcpy(frame_buffer+2*channel_size, bgr_frames[2].ptr(), channel_size);
306 return true;
307 }
309 // Create Overlay mask for pixel-level segmentation
310 void CreateMask(uchar *classes, uchar *mb, uchar *mg, uchar* mr,
311 int channel_size)
312 {
313 for (int i = 0; i < channel_size; i++)
314 {
315 const ObjectClass& object_class = object_classes->At(classes[i]);
316 mb[i] = object_class.color.blue;
317 mg[i] = object_class.color.green;
318 mr[i] = object_class.color.red;
319 }
320 }
322 // Create frame overlayed with pixel-level segmentation
323 bool WriteFrameOutput(const ExecutionObjectPipeline &eop,
324 const Configuration& c,
325 const cmdline_opts_t& opts)
326 {
327 unsigned char *out = (unsigned char *) eop.GetOutputBufferPtr();
328 int width = c.inWidth;
329 int height = c.inHeight;
330 int channel_size = width * height;
332 Mat mask, frame, blend, r_blend, bgr[3];
333 // Create overlay mask
334 bgr[0] = Mat(height, width, CV_8UC(1));
335 bgr[1] = Mat(height, width, CV_8UC(1));
336 bgr[2] = Mat(height, width, CV_8UC(1));
337 CreateMask(out, bgr[0].ptr(), bgr[1].ptr(), bgr[2].ptr(), channel_size);
338 cv::merge(bgr, 3, mask);
340 // Asseembly original frame
341 unsigned char *in = (unsigned char *) eop.GetInputBufferPtr();
342 bgr[0] = Mat(height, width, CV_8UC(1), in);
343 bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
344 bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
345 cv::merge(bgr, 3, frame);
347 // Create overlayed frame
348 cv::addWeighted(frame, 0.7, mask, 0.3, 0.0, blend);
350 // Resize to output width/height, keep aspect ratio
351 uint32_t output_width = opts.output_width;
352 if (output_width == 0) output_width = orig_width;
353 uint32_t output_height = (output_width*1.0f) / orig_width * orig_height;
354 cv::resize(blend, r_blend, Size(output_width, output_height));
356 if (opts.is_camera_input || opts.is_video_input)
357 {
358 cv::imshow("Segmentation", r_blend);
359 waitKey(1);
360 }
361 else
362 {
363 int frame_index = eop.GetFrameIndex();
364 char outfile_name[64];
365 if (opts.input_file.empty())
366 {
367 snprintf(outfile_name, 64, "frame_%d.png", frame_index);
368 cv::imwrite(outfile_name, frame);
369 printf("Saving frame %d to: %s\n", frame_index, outfile_name);
370 }
372 snprintf(outfile_name, 64, "overlay_%d.png", frame_index);
373 cv::imwrite(outfile_name, r_blend);
374 printf("Saving frame %d overlayed with segmentation to: %s\n",
375 frame_index, outfile_name);
376 }
378 return true;
379 }
381 void DisplayHelp()
382 {
383 std::cout <<
384 "Usage: segmentation\n"
385 " Will run segmentation network to perform pixel-level"
386 " classification.\n Use -c to run a different"
387 " segmentation network. Default is jseg21_tiscapes.\n"
388 "Optional arguments:\n"
389 " -c <config> Valid configs: jseg21_tiscapes, jseg21\n"
390 " -d <number> Number of dsp cores to use\n"
391 " -e <number> Number of eve cores to use\n"
392 " -i <image> Path to the image file as input\n"
393 " Default are 9 frames in testvecs\n"
394 " -i camera<number> Use camera as input\n"
395 " video input port: /dev/video<number>\n"
396 " -i <name>.{mp4,mov,avi} Use video file as input\n"
397 " -l <objects_list> Path to the object classes list file\n"
398 " -f <number> Number of frames to process\n"
399 " -w <number> Output image/video width\n"
400 " -v Verbose output during execution\n"
401 " -h Help\n";
402 }