1 /******************************************************************************
2 * Copyright (c) 2018, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <signal.h>
29 #include <getopt.h>
30 #include <iostream>
31 #include <iomanip>
32 #include <fstream>
33 #include <cassert>
34 #include <string>
35 #include <functional>
36 #include <algorithm>
37 #include <time.h>
38 #include <unistd.h>
40 #include <queue>
41 #include <vector>
42 #include <cstdio>
44 #include "executor.h"
45 #include "execution_object.h"
46 #include "configuration.h"
48 #include "opencv2/core.hpp"
49 #include "opencv2/imgproc.hpp"
50 #include "opencv2/highgui.hpp"
51 #include "opencv2/videoio.hpp"
53 #define NUM_VIDEO_FRAMES 100
54 #define DEFAULT_CONFIG "jseg21_tiscapes"
55 #define DEFAULT_INPUT "../test/testvecs/input/000100_1024x512_bgr.y"
57 bool __TI_show_debug_ = false;
58 bool is_default_input = false;
59 bool is_preprocessed_input = false;
60 bool is_camera_input = false;
62 using namespace tinn;
63 using namespace cv;
66 bool RunConfiguration(const std::string& config_file, int num_devices,
67 DeviceType device_type, std::string& input_file);
68 bool RunAllConfigurations(int32_t num_devices, DeviceType device_type);
70 bool ReadFrame(ExecutionObject& eo, int frame_idx,
71 const Configuration& configuration, int num_frames,
72 std::string& image_file, VideoCapture &cap);
74 bool WriteFrameOutput(const ExecutionObject &eo);
76 static void ProcessArgs(int argc, char *argv[],
77 std::string& config,
78 int& num_devices,
79 DeviceType& device_type,
80 std::string& input_file);
82 static void DisplayHelp();
84 static double ms_diff(struct timespec &t0, struct timespec &t1)
85 { return (t1.tv_sec - t0.tv_sec) * 1e3 + (t1.tv_nsec - t0.tv_nsec) / 1e6; }
88 int main(int argc, char *argv[])
89 {
90 // Catch ctrl-c to ensure a clean exit
91 signal(SIGABRT, exit);
92 signal(SIGTERM, exit);
94 // If there are no devices capable of offloading TIDL on the SoC, exit
95 uint32_t num_dla = Executor::GetNumDevices(DeviceType::DLA);
96 uint32_t num_dsp = Executor::GetNumDevices(DeviceType::DSP);
97 if (num_dla == 0 && num_dsp == 0)
98 {
99 std::cout << "TI DL not supported on this SoC." << std::endl;
100 return EXIT_SUCCESS;
101 }
103 // Process arguments
104 std::string config = DEFAULT_CONFIG;
105 std::string input_file = DEFAULT_INPUT;
106 int num_devices = 1;
107 DeviceType device_type = DeviceType::DLA;
108 ProcessArgs(argc, argv, config, num_devices, device_type, input_file);
110 if (input_file == DEFAULT_INPUT) is_default_input = true;
111 if (input_file == "camera") is_camera_input = true;
112 if (input_file.length() > 2 &&
113 input_file.compare(input_file.length() - 2, 2, ".y") == 0)
114 is_preprocessed_input = true;
115 std::cout << "Input: " << input_file << std::endl;
116 std::string config_file = "../test/testvecs/config/infer/tidl_config_"
117 + config + ".txt";
118 bool status = RunConfiguration(config_file, num_devices, device_type,
119 input_file);
121 if (!status)
122 {
123 std::cout << "segmentation FAILED" << std::endl;
124 return EXIT_FAILURE;
125 }
127 std::cout << "segmentation PASSED" << std::endl;
128 return EXIT_SUCCESS;
129 }
131 bool RunConfiguration(const std::string& config_file, int num_devices,
132 DeviceType device_type, std::string& input_file)
133 {
134 DeviceIds ids;
135 for (int i = 0; i < num_devices; i++)
136 ids.insert(static_cast<DeviceId>(i));
138 // Read the TI DL configuration file
139 Configuration configuration;
140 bool status = configuration.ReadFromFile(config_file);
141 if (!status)
142 {
143 std::cerr << "Error in configuration file: " << config_file
144 << std::endl;
145 return false;
146 }
148 // setup input
149 int num_frames = is_default_input ? 3 : 1;
150 VideoCapture cap;
151 std::string image_file;
152 if (is_camera_input)
153 {
154 cap = VideoCapture(1); // cap = VideoCapture("test.mp4");
155 if (! cap.isOpened())
156 {
157 std::cerr << "Cannot open camera input." << std::endl;
158 return false;
159 }
160 num_frames = NUM_VIDEO_FRAMES;
161 namedWindow("Segmentation", WINDOW_AUTOSIZE | CV_GUI_NORMAL);
162 }
163 else
164 {
165 image_file = input_file;
166 }
168 // Determine input frame size from configuration
169 size_t frame_sz = configuration.inWidth * configuration.inHeight *
170 configuration.inNumChannels;
172 try
173 {
174 // Create a executor with the approriate core type, number of cores
175 // and configuration specified
176 Executor executor(device_type, ids, configuration);
178 // Query Executor for set of ExecutionObjects created
179 const ExecutionObjects& execution_objects =
180 executor.GetExecutionObjects();
181 int num_eos = execution_objects.size();
183 // Allocate input and output buffers for each execution object
184 std::vector<void *> buffers;
185 for (auto &eo : execution_objects)
186 {
187 ArgInfo in = { ArgInfo(malloc(frame_sz), frame_sz)};
188 ArgInfo out = { ArgInfo(malloc(frame_sz), frame_sz)};
189 eo->SetInputOutputBuffer(in, out);
191 buffers.push_back(in.ptr());
192 buffers.push_back(out.ptr());
193 }
195 #define MAX_NUM_EOS 4
196 struct timespec t0[MAX_NUM_EOS], t1;
198 // Process frames with available execution objects in a pipelined manner
199 // additional num_eos iterations to flush the pipeline (epilogue)
200 for (int frame_idx = 0;
201 frame_idx < num_frames + num_eos; frame_idx++)
202 {
203 ExecutionObject* eo = execution_objects[frame_idx % num_eos].get();
205 // Wait for previous frame on the same eo to finish processing
206 if (eo->ProcessFrameWait())
207 {
208 clock_gettime(CLOCK_MONOTONIC, &t1);
209 double elapsed_host =
210 ms_diff(t0[eo->GetFrameIndex() % num_eos], t1);
211 double elapsed_device = eo->GetProcessTimeInMilliSeconds();
212 double overhead = 100 - (elapsed_device/elapsed_host*100);
214 std::cout << "frame[" << eo->GetFrameIndex() << "]: "
215 << "Time on device: "
216 << std::setw(6) << std::setprecision(4)
217 << elapsed_device << "ms, "
218 << "host: "
219 << std::setw(6) << std::setprecision(4)
220 << elapsed_host << "ms ";
221 std::cout << "API overhead: "
222 << std::setw(6) << std::setprecision(3)
223 << overhead << " %" << std::endl;
225 WriteFrameOutput(*eo);
226 }
228 // Read a frame and start processing it with current eo
229 if (ReadFrame(*eo, frame_idx, configuration, num_frames,
230 image_file, cap))
231 {
232 clock_gettime(CLOCK_MONOTONIC, &t0[frame_idx % num_eos]);
233 eo->ProcessFrameStartAsync();
234 }
235 }
237 for (auto b : buffers)
238 free(b);
240 }
241 catch (tinn::Exception &e)
242 {
243 std::cerr << e.what() << std::endl;
244 status = false;
245 }
247 return status;
248 }
251 bool ReadFrame(ExecutionObject &eo, int frame_idx,
252 const Configuration& configuration, int num_frames,
253 std::string& image_file, VideoCapture &cap)
254 {
255 if (frame_idx >= num_frames)
256 return false;
257 eo.SetFrameIndex(frame_idx);
259 char* frame_buffer = eo.GetInputBufferPtr();
260 assert (frame_buffer != nullptr);
261 int channel_size = configuration.inWidth * configuration.inHeight;
263 Mat image;
264 if (! image_file.empty())
265 {
266 if (is_preprocessed_input)
267 {
268 std::ifstream ifs(image_file, std::ios::binary);
269 ifs.seekg(frame_idx * channel_size * 3);
270 ifs.read(frame_buffer, channel_size * 3);
271 bool ifs_status = ifs.good();
272 ifs.close();
273 return ifs_status; // already PreProc-ed
274 }
275 else
276 {
277 image = cv::imread(image_file, CV_LOAD_IMAGE_COLOR);
278 if (image.empty())
279 {
280 std::cerr << "Unable to read from: " << image_file << std::endl;
281 return false;
282 }
283 }
284 }
285 else
286 {
287 // 640x480 camera input, process one in every 5 frames,
288 // can adjust number of skipped frames to match real time processing
289 if (! cap.grab()) return false;
290 if (! cap.grab()) return false;
291 if (! cap.grab()) return false;
292 if (! cap.grab()) return false;
293 if (! cap.grab()) return false;
294 if (! cap.retrieve(image)) return false;
295 }
297 // scale to network input size 1024 x 512
298 Mat s_image, bgr_frames[3];
299 cv::resize(image, s_image,
300 Size(configuration.inWidth, configuration.inHeight),
301 0, 0, cv::INTER_AREA);
302 cv::split(s_image, bgr_frames);
303 memcpy(frame_buffer, bgr_frames[0].ptr(), channel_size);
304 memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
305 memcpy(frame_buffer+2*channel_size, bgr_frames[2].ptr(), channel_size);
306 return true;
307 }
309 // Create Overlay mask for pixel-level segmentation
310 void CreateMask(uchar *classes, uchar *mb, uchar *mg, uchar* mr)
311 {
312 for (int i = 0; i < 1024 * 512; i++)
313 {
314 switch(classes[i])
315 {
316 case 0x00: mb[i] = 0xFF; mg[i] = 0xFF; mr[i] = 0xFF; break;
317 case 0x01: mb[i] = 0xFF; mg[i] = 0x00; mr[i] = 0x00; break;
318 case 0x02: mb[i] = 0x00; mg[i] = 0x00; mr[i] = 0xFF; break;
319 case 0x03: mb[i] = 0x00; mg[i] = 0xFF; mr[i] = 0x00; break;
320 case 0x04: mb[i] = 0x00; mg[i] = 0xFF; mr[i] = 0xFF; break;
321 default: mb[i] = 0x00; mg[i] = 0x00; mr[i] = 0x00; break;
322 }
323 }
324 }
326 // Create frame overlayed with pixel-level segmentation
327 bool WriteFrameOutput(const ExecutionObject &eo)
328 {
329 const int k = 5;
330 unsigned char *out = (unsigned char *) eo.GetOutputBufferPtr();
331 int out_size = eo.GetOutputBufferSizeInBytes();
333 Mat mask, frame, blend, bgr[3];
334 // Create overlay mask
335 bgr[0] = Mat(512, 1024, CV_8UC(1));
336 bgr[1] = Mat(512, 1024, CV_8UC(1));
337 bgr[2] = Mat(512, 1024, CV_8UC(1));
338 CreateMask(out, bgr[0].ptr(), bgr[1].ptr(), bgr[2].ptr());
339 cv::merge(bgr, 3, mask);
341 // Asseembly original frame
342 unsigned char *in = (unsigned char *) eo.GetInputBufferPtr();
343 bgr[0] = Mat(512, 1024, CV_8UC(1), in);
344 bgr[1] = Mat(512, 1024, CV_8UC(1), in + 512*1024);
345 bgr[2] = Mat(512, 1024, CV_8UC(1), in + 512*1024*2);
346 cv::merge(bgr, 3, frame);
348 // Create overlayed frame
349 cv::addWeighted(frame, 0.7, mask, 0.3, 0.0, blend);
351 if (is_camera_input)
352 {
353 Mat r_blend;
354 cv::resize(blend, r_blend, Size(640, 480));
355 cv::imshow("Segmentation", r_blend);
356 waitKey(1);
357 }
358 else
359 {
360 int frame_index = eo.GetFrameIndex();
361 char outfile_name[64];
362 if (is_preprocessed_input)
363 {
364 snprintf(outfile_name, 64, "frame_%d.png", frame_index);
365 cv::imwrite(outfile_name, frame);
366 printf("Saving frame %d to: %s\n", frame_index, outfile_name);
367 }
369 snprintf(outfile_name, 64, "overlay_%d.png", frame_index);
370 cv::imwrite(outfile_name, blend);
371 printf("Saving frame %d overlayed with segmentation to: %s\n",
372 frame_index, outfile_name);
373 }
375 return true;
376 }
379 void ProcessArgs(int argc, char *argv[], std::string& config,
380 int& num_devices, DeviceType& device_type,
381 std::string& input_file)
382 {
383 const struct option long_options[] =
384 {
385 {"config", required_argument, 0, 'c'},
386 {"num_devices", required_argument, 0, 'n'},
387 {"device_type", required_argument, 0, 't'},
388 {"image_file", required_argument, 0, 'i'},
389 {"help", no_argument, 0, 'h'},
390 {"verbose", no_argument, 0, 'v'},
391 {0, 0, 0, 0}
392 };
394 int option_index = 0;
396 while (true)
397 {
398 int c = getopt_long(argc, argv, "c:n:t:i:hv", long_options, &option_index);
400 if (c == -1)
401 break;
403 switch (c)
404 {
405 case 'c': config = optarg;
406 break;
408 case 'n': num_devices = atoi(optarg);
409 assert (num_devices > 0 && num_devices <= 4);
410 break;
412 case 't': if (*optarg == 'e')
413 device_type = DeviceType::DLA;
414 else if (*optarg == 'd')
415 device_type = DeviceType::DSP;
416 else
417 {
418 std::cerr << "Invalid argument to -t, only e or d"
419 " allowed" << std::endl;
420 exit(EXIT_FAILURE);
421 }
422 break;
424 case 'i': input_file = optarg;
425 break;
427 case 'v': __TI_show_debug_ = true;
428 break;
430 case 'h': DisplayHelp();
431 exit(EXIT_SUCCESS);
432 break;
434 case '?': // Error in getopt_long
435 exit(EXIT_FAILURE);
436 break;
438 default:
439 std::cerr << "Unsupported option: " << c << std::endl;
440 break;
441 }
442 }
443 }
445 void DisplayHelp()
446 {
447 std::cout << "Usage: segmentation\n"
448 " Will run segmentation network to perform pixel-level"
449 " classification.\n Use -c to run a different"
450 " segmentation network. Default is jseg21_tiscapes.\n"
451 "Optional arguments:\n"
452 " -c <config> Valid configs: jseg21_tiscapes, jseg21\n"
453 " -n <number of cores> Number of cores to use (1 - 4)\n"
454 " -t <d|e> Type of core. d -> DSP, e -> DLA\n"
455 " -i <image> Path to the image file\n"
456 " Default are 3 frames in testvecs\n"
457 " -i camera Use camera as input\n"
458 " -v Verbose output during execution\n"
459 " -h Help\n";
460 }