1 /******************************************************************************
2 * Copyright (c) 2017, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <signal.h>
29 #include <getopt.h>
30 #include <iostream>
31 #include <iomanip>
32 #include <fstream>
33 #include <cassert>
34 #include <string>
35 #include <functional>
36 #include <algorithm>
37 #include <time.h>
39 #include "executor.h"
40 #include "execution_object.h"
41 #include "configuration.h"
43 bool __TI_show_debug_ = false;
45 using namespace tidl;
47 bool RunMultipleExecutors(const std::string& config_file_1,
48 const std::string& config_file_2,
49 uint32_t num_devices_available);
51 bool RunConfiguration(const std::string& config_file, int num_devices,
52 DeviceType device_type);
53 bool RunAllConfigurations(int32_t num_devices, DeviceType device_type);
55 bool ReadFrame(ExecutionObject& eo,
56 int frame_idx,
57 const Configuration& configuration,
58 std::istream& input_file);
60 bool WriteFrame(const ExecutionObject &eo,
61 std::ostream& output_file);
63 static void ProcessArgs(int argc, char *argv[],
64 std::string& config_file,
65 int& num_devices,
66 DeviceType& device_type);
68 static void DisplayHelp();
70 static double ms_diff(struct timespec &t0, struct timespec &t1)
71 { return (t1.tv_sec - t0.tv_sec) * 1e3 + (t1.tv_nsec - t0.tv_nsec) / 1e6; }
74 int main(int argc, char *argv[])
75 {
76 // Catch ctrl-c to ensure a clean exit
77 signal(SIGABRT, exit);
78 signal(SIGTERM, exit);
80 // If there are no devices capable of offloading TIDL on the SoC, exit
81 uint32_t num_dla = Executor::GetNumDevices(DeviceType::DLA);
82 uint32_t num_dsp = Executor::GetNumDevices(DeviceType::DSP);
83 if (num_dla == 0 && num_dsp == 0)
84 {
85 std::cout << "TI DL not supported on this SoC." << std::endl;
86 return EXIT_SUCCESS;
87 }
88 std::cout << "API Version: " << Executor::GetAPIVersion() << std::endl;
90 // Process arguments
91 std::string config_file;
92 int num_devices = 1;
93 DeviceType device_type = DeviceType::DLA;
94 ProcessArgs(argc, argv, config_file, num_devices, device_type);
96 bool status = true;
97 if (!config_file.empty())
98 status = RunConfiguration(config_file, num_devices, device_type);
99 else
100 {
101 if (num_dla > 0)
102 {
103 //TODO: Use memory availability to determine # devices
104 // Run on 2 devices because there is not enough CMEM available by
105 // default
106 if (num_dla = 4) num_dla = 2;
107 status = RunAllConfigurations(num_dla, DeviceType::DLA);
108 status &= RunMultipleExecutors(
109 "testvecs/config/infer/tidl_config_j11_v2.txt",
110 "testvecs/config/infer/tidl_config_j11_cifar.txt",
111 num_dla);
112 }
114 if (num_dsp > 0)
115 {
116 status &= RunAllConfigurations(num_dsp, DeviceType::DSP);
117 }
118 }
120 if (!status)
121 {
122 std::cout << "tidl FAILED" << std::endl;
123 return EXIT_FAILURE;
124 }
126 std::cout << "tidl PASSED" << std::endl;
127 return EXIT_SUCCESS;
128 }
130 bool RunConfiguration(const std::string& config_file, int num_devices,
131 DeviceType device_type)
132 {
133 DeviceIds ids;
134 for (int i = 0; i < num_devices; i++)
135 ids.insert(static_cast<DeviceId>(i));
137 // Read the TI DL configuration file
138 Configuration configuration;
139 bool status = configuration.ReadFromFile(config_file);
140 if (!status)
141 {
142 std::cerr << "Error in configuration file: " << config_file
143 << std::endl;
144 return false;
145 }
147 // Open input and output files
148 std::ifstream input_data_file(configuration.inData, std::ios::binary);
149 std::ofstream output_data_file(configuration.outData, std::ios::binary);
150 assert (input_data_file.good());
151 assert (output_data_file.good());
153 try
154 {
155 // Create a executor with the approriate core type, number of cores
156 // and configuration specified
157 Executor executor(device_type, ids, configuration);
159 // Query Executor for set of ExecutionObjects created
160 const ExecutionObjects& execution_objects =
161 executor.GetExecutionObjects();
162 int num_eos = execution_objects.size();
164 // Allocate input and output buffers for each execution object
165 std::vector<void *> buffers;
166 for (auto &eo : execution_objects)
167 {
168 size_t in_size = eo->GetInputBufferSizeInBytes();
169 size_t out_size = eo->GetOutputBufferSizeInBytes();
170 ArgInfo in = { ArgInfo(malloc(in_size), in_size)};
171 ArgInfo out = { ArgInfo(malloc(out_size), out_size)};
172 eo->SetInputOutputBuffer(in, out);
174 buffers.push_back(in.ptr());
175 buffers.push_back(out.ptr());
176 }
178 #define MAX_NUM_EOS 4
179 struct timespec t0[MAX_NUM_EOS], t1;
181 // Process frames with available execution objects in a pipelined manner
182 // additional num_eos iterations to flush the pipeline (epilogue)
183 for (int frame_idx = 0;
184 frame_idx < configuration.numFrames + num_eos; frame_idx++)
185 {
186 ExecutionObject* eo = execution_objects[frame_idx % num_eos].get();
188 // Wait for previous frame on the same eo to finish processing
189 if (eo->ProcessFrameWait())
190 {
191 clock_gettime(CLOCK_MONOTONIC, &t1);
192 double elapsed_host =
193 ms_diff(t0[eo->GetFrameIndex() % num_eos], t1);
194 double elapsed_device = eo->GetProcessTimeInMilliSeconds();
195 double overhead = 100 - (elapsed_device/elapsed_host*100);
197 std::cout << "frame[" << eo->GetFrameIndex() << "]: "
198 << "Time on device: "
199 << std::setw(6) << std::setprecision(4)
200 << elapsed_device << "ms, "
201 << "host: "
202 << std::setw(6) << std::setprecision(4)
203 << elapsed_host << "ms ";
204 std::cout << "API overhead: "
205 << std::setw(6) << std::setprecision(3)
206 << overhead << " %" << std::endl;
208 WriteFrame(*eo, output_data_file);
209 }
211 // Read a frame and start processing it with current eo
212 if (ReadFrame(*eo, frame_idx, configuration, input_data_file))
213 {
214 clock_gettime(CLOCK_MONOTONIC, &t0[frame_idx % num_eos]);
215 eo->ProcessFrameStartAsync();
216 }
217 }
219 for (auto b : buffers)
220 free(b);
222 }
223 catch (tidl::Exception &e)
224 {
225 std::cerr << e.what() << std::endl;
226 status = false;
227 }
230 input_data_file.close();
231 output_data_file.close();
233 return status;
234 }
236 namespace tidl {
237 extern bool CompareFiles (const std::string &F1, const std::string &F2);
238 extern bool CompareFrames(const std::string &F1, const std::string &F2,
239 int numFrames, int width, int height);
240 }
242 bool RunAllConfigurations(int32_t num_devices, DeviceType device_type)
243 {
244 std::vector<std::string> configurations;
246 if (device_type == DeviceType::DLA)
247 configurations = {"dense_1x1", "j11_bn", "j11_cifar",
248 "j11_controlLayers", "j11_prelu", "j11_v2",
249 "jseg21", "jseg21_tiscapes", "smallRoi", "squeeze1_1"};
250 else
251 configurations = {"j11_bn",
252 "j11_controlLayers", "j11_v2",
253 "jseg21", "jseg21_tiscapes", "smallRoi", "squeeze1_1"};
255 int errors = 0;
256 for (auto config : configurations)
257 {
258 std::string config_file = "testvecs/config/infer/tidl_config_"
259 + config + ".txt";
260 std::cout << "Running " << config << " on " << num_devices
261 << " devices, type "
262 << ((device_type == DeviceType::DLA) ? "EVE" : "DSP")
263 << std::endl;
265 Configuration configuration;
266 bool status = configuration.ReadFromFile(config_file);
267 if (!status) { errors++; continue; }
269 status = RunConfiguration(config_file, num_devices, device_type);
271 if (!status) { errors++; continue; }
273 // Check output against reference output
274 std::string reference_output = "testvecs/reference/"
275 + config + "_ref.bin";
277 // Reference for jseg21_tiscapes only has one frame
278 if (config.compare("jseg21_tiscapes") == 0)
279 status = CompareFrames(configuration.outData, reference_output,
280 1, 1024, 512);
281 else
282 status = CompareFiles(configuration.outData, reference_output);
284 if (status) std::cout << config << " : PASSED" << std::endl;
285 else std::cout << config << " : FAILED" << std::endl;
287 if (!status) errors++;
288 }
290 if (errors > 0) return false;
292 return true;
293 }
297 bool ReadFrame(ExecutionObject &eo, int frame_idx,
298 const Configuration& configuration,
299 std::istream& input_file)
300 {
301 if (frame_idx >= configuration.numFrames)
302 return false;
304 char* frame_buffer = eo.GetInputBufferPtr();
305 assert (frame_buffer != nullptr);
307 input_file.read(eo.GetInputBufferPtr(),
308 eo.GetInputBufferSizeInBytes());
310 if (input_file.eof())
311 return false;
313 assert (input_file.good());
315 // Set the frame index being processed by the EO. This is used to
316 // sort the frames before they are output
317 eo.SetFrameIndex(frame_idx);
319 if (input_file.good())
320 return true;
322 return false;
323 }
325 bool WriteFrame(const ExecutionObject &eo, std::ostream& output_file)
326 {
327 output_file.write(
328 eo.GetOutputBufferPtr(), eo.GetOutputBufferSizeInBytes());
329 assert(output_file.good() == true);
331 if (output_file.good())
332 return true;
334 return false;
335 }
338 void ProcessArgs(int argc, char *argv[], std::string& config_file,
339 int& num_devices, DeviceType& device_type)
340 {
341 const struct option long_options[] =
342 {
343 {"config_file", required_argument, 0, 'c'},
344 {"num_devices", required_argument, 0, 'n'},
345 {"device_type", required_argument, 0, 't'},
346 {"help", no_argument, 0, 'h'},
347 {"verbose", no_argument, 0, 'v'},
348 {0, 0, 0, 0}
349 };
351 int option_index = 0;
353 while (true)
354 {
355 int c = getopt_long(argc, argv, "c:n:t:hv", long_options, &option_index);
357 if (c == -1)
358 break;
360 switch (c)
361 {
362 case 'c': config_file = optarg;
363 break;
365 case 'n': num_devices = atoi(optarg);
366 assert (num_devices > 0 && num_devices <= 4);
367 break;
369 case 't': if (*optarg == 'e')
370 device_type = DeviceType::DLA;
371 else if (*optarg == 'd')
372 device_type = DeviceType::DSP;
373 else
374 {
375 std::cerr << "Invalid argument to -t, only e or d"
376 " allowed" << std::endl;
377 exit(EXIT_FAILURE);
378 }
379 break;
381 case 'v': __TI_show_debug_ = true;
382 break;
384 case 'h': DisplayHelp();
385 exit(EXIT_SUCCESS);
386 break;
388 case '?': // Error in getopt_long
389 exit(EXIT_FAILURE);
390 break;
392 default:
393 std::cerr << "Unsupported option: " << c << std::endl;
394 break;
395 }
396 }
397 }
399 void DisplayHelp()
400 {
401 std::cout << "Usage: test_tidl\n"
402 " Will run all available networks if invoked without"
403 " any arguments.\n Use -c to run a single network.\n"
404 "Optional arguments:\n"
405 " -c Path to the configuration file\n"
406 " -n <number of cores> Number of cores to use (1 - 4)\n"
407 " -t <d|e> Type of core. d -> DSP, e -> DLA\n"
408 " -v Verbose output during execution\n"
409 " -h Help\n";
410 }