1 /******************************************************************************
2 * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
29 #include <assert.h>
30 #include "executor.h"
31 #include "executor_impl.h"
32 #include "parameters.h"
33 #include "util.h"
34 #include "trace.h"
37 using namespace tidl;
39 using std::unique_ptr;
41 Executor::Executor(DeviceType core_type, const DeviceIds& ids,
42 const Configuration& configuration, int layers_group_id)
43 {
44 TRACE::enabled = configuration.enableApiTrace;
46 TRACE::print("-> Executor::Executor()\n");
48 pimpl_m = unique_ptr<ExecutorImpl>
49 { new ExecutorImpl(core_type, ids, layers_group_id) };
50 pimpl_m->Initialize(configuration);
52 TRACE::print("<- Executor::Executor()\n");
53 }
57 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
58 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
59 // unique_ptr's destructor requires a complete type in order to invoke delete
60 // By writing it yourself in the implementation file, you force it to be
61 // defined in a place where impl is already defined, and this successfully
62 // prevents the compiler from trying to automatically generate the destructor
63 // on demand in the caller’s code where impl is not defined.
64 Executor::~Executor() = default;
66 uint32_t Executor::GetNumDevices(DeviceType device_type)
67 {
68 return Device::GetNumDevices(device_type);
69 }
71 #define STRING(S) XSTRING(S)
72 #define XSTRING(S) #S
73 std::string Executor::GetAPIVersion()
74 {
75 static std::string version = STRING(_BUILD_VER);
76 version += ".";
77 version += STRING(_BUILD_SHA);
78 return version;
79 }
82 ExecutorImpl::ExecutorImpl(DeviceType core_type, const DeviceIds& ids,
83 int layers_group_id):
84 configuration_m(),
85 shared_networkparam_heap_m(nullptr, &__free_ddr),
86 device_ids_m(ids),
87 core_type_m(core_type),
88 layers_group_id_m(layers_group_id)
89 {
90 std::string name;
91 if (core_type_m == DeviceType::DSP)
92 name = "";
93 else if (core_type_m == DeviceType::EVE)
94 name = STRING(SETUP_KERNEL) ";" STRING(INIT_KERNEL) ";" STRING(PROCESS_KERNEL) ";" STRING(CLEANUP_KERNEL);
96 device_m = Device::Create(core_type_m, ids, name);
97 }
100 const ExecutionObjects& Executor::GetExecutionObjects() const
101 {
102 return pimpl_m->execution_objects_m;
103 }
105 ExecutionObject* Executor::operator[](uint32_t index) const
106 {
107 assert(index < pimpl_m->execution_objects_m.size());
108 return pimpl_m->execution_objects_m[index].get();
109 }
111 uint32_t Executor::GetNumExecutionObjects() const
112 {
113 return pimpl_m->execution_objects_m.size();
114 }
116 bool ExecutorImpl::Initialize(const Configuration& configuration)
117 {
118 configuration_m = configuration;
120 // Allocate, initialize TIDL_CreateParams object
121 up_malloc_ddr<TIDL_CreateParams> shared_createparam(
122 malloc_ddr<TIDL_CreateParams>(),
123 &__free_ddr);
124 InitializeNetworkCreateParam(shared_createparam.get());
126 // Read network from file into network struct in TIDL_CreateParams
127 sTIDL_Network_t *net = &(shared_createparam.get())->net;
129 bool status = ReadBinary(configuration_m.netBinFile,
130 reinterpret_cast<char *>(net),
131 sizeof(sTIDL_Network_t));
132 assert(status != false);
134 // Force to run full network if runFullNet is set
135 if (configuration.runFullNet)
136 {
137 for (int i = 0; i < net->numLayers; i++)
138 if (net->TIDLLayers[i].layerType != TIDL_DataLayer)
139 net->TIDLLayers[i].layersGroupId = layers_group_id_m;
140 }
142 // If the user has specified an override mapping, apply it
143 else if (!configuration.layerIndex2LayerGroupId.empty())
144 {
145 for (const auto &item : configuration.layerIndex2LayerGroupId)
146 if (item.first < net->numLayers)
147 net->TIDLLayers[item.first].layersGroupId = item.second;
148 }
150 // Call a setup kernel to allocate and fill network parameters
151 InitializeNetworkParams(shared_createparam.get());
153 const ArgInfo create_arg(shared_createparam.get(),
154 sizeof(TIDL_CreateParams));
155 const ArgInfo param_heap_arg(shared_networkparam_heap_m.get(),
156 configuration_m.PARAM_HEAP_SIZE);
157 for (auto ids : device_ids_m)
158 {
159 uint8_t index = static_cast<uint8_t>(ids);
160 execution_objects_m.push_back(
161 unique_ptr<ExecutionObject>
162 {new ExecutionObject(device_m.get(), index,
163 create_arg, param_heap_arg,
164 configuration_m,
165 layers_group_id_m)} );
166 }
168 for (auto &eo : execution_objects_m)
169 eo->RunAsync(ExecutionObject::CallType::INIT);
171 for (auto &eo : execution_objects_m)
172 eo->Wait(ExecutionObject::CallType::INIT);
174 return true;
175 }
178 bool ExecutorImpl::InitializeNetworkParams(TIDL_CreateParams *cp)
179 {
180 // Determine size of network parameters buffer, allocate it
181 size_t networkparam_size =
182 GetBinaryFileSize(configuration_m.paramsBinFile);
184 up_malloc_ddr<char> networkparam(malloc_ddr<char>(networkparam_size),
185 &__free_ddr);
187 // Read network parameters from bin file into buffer
188 bool status = ReadBinary(configuration_m.paramsBinFile,
189 networkparam.get(),
190 networkparam_size);
191 assert(status != false);
193 // Allocate a buffer for passing parameters to the kernel
194 up_malloc_ddr<OCL_TIDL_SetupParams> setupParams(
195 malloc_ddr<OCL_TIDL_SetupParams>(),
196 &__free_ddr);
198 // Set up execution trace specified in the configuration
199 EnableExecutionTrace(configuration_m, &setupParams->enableTrace);
201 setupParams->networkParamHeapSize = configuration_m.PARAM_HEAP_SIZE;
202 setupParams->noZeroCoeffsPercentage = configuration_m.noZeroCoeffsPercentage;
203 setupParams->sizeofTIDL_CreateParams = sizeof(TIDL_CreateParams);
204 setupParams->offsetofNet = offsetof(TIDL_CreateParams, net);
206 // Allocate buffer for a network parameter heap. Used by the setup
207 // kernel to allocate and initialize network parameters for the layers
208 shared_networkparam_heap_m.reset(malloc_ddr<char>(setupParams->networkParamHeapSize));
210 KernelArgs args = { DeviceArgInfo(cp, sizeof(TIDL_CreateParams),
211 DeviceArgInfo::Kind::BUFFER),
212 DeviceArgInfo(networkparam.get(), networkparam_size,
213 DeviceArgInfo::Kind::BUFFER),
214 DeviceArgInfo(shared_networkparam_heap_m.get(),
215 setupParams->networkParamHeapSize,
216 DeviceArgInfo::Kind::BUFFER),
217 DeviceArgInfo(setupParams.get(),
218 sizeof(OCL_TIDL_SetupParams),
219 DeviceArgInfo::Kind::BUFFER) };
221 // Execute kernel on first available device in the Executor
222 uint8_t id = static_cast<uint8_t>(*(device_ids_m.cbegin()));
223 unique_ptr<Kernel> K {new Kernel(device_m.get(), STRING(SETUP_KERNEL),
224 args, id)};
225 K->RunAsync();
226 K->Wait();
228 if (setupParams->errorCode != OCL_TIDL_SUCCESS)
229 throw Exception(setupParams->errorCode,
230 __FILE__, __FUNCTION__, __LINE__);
232 return status;
233 }
236 void ExecutorImpl::Cleanup()
237 {
238 for (auto &eo : execution_objects_m)
239 eo->RunAsync(ExecutionObject::CallType::CLEANUP);
241 for (auto &eo : execution_objects_m)
242 eo->Wait(ExecutionObject::CallType::CLEANUP);
243 }
246 void ExecutorImpl::InitializeNetworkCreateParam(TIDL_CreateParams *CP)
247 {
248 CP->currCoreId = layers_group_id_m;
249 CP->currLayersGroupId = layers_group_id_m;
250 CP->l1MemSize = tidl::internal::DMEM0_SIZE;
251 CP->l2MemSize = tidl::internal::DMEM1_SIZE;
252 CP->l3MemSize = tidl::internal::OCMC_SIZE;
254 CP->quantHistoryParam1 = tidl::internal::QUANT_HISTORY_PARAM1;
255 CP->quantHistoryParam2 = tidl::internal::QUANT_HISTORY_PARAM2;
256 CP->quantMargin = tidl::internal::QUANT_MARGIN;
258 // If trace is enabled, setup the device TIDL library to allocate separate
259 // output buffers for each layer. This makes it possible for the host
260 // to access the output of each layer after a frame is processed.
261 if (configuration_m.enableOutputTrace)
262 CP->optimiseExtMem = TIDL_optimiseExtMemL0;
263 else
264 CP->optimiseExtMem = TIDL_optimiseExtMemL1;
265 }
267 Exception::Exception(const std::string& error, const std::string& file,
268 const std::string& func, uint32_t line_no)
269 {
271 message_m = "TIDL Error: [";
272 message_m += file;
273 message_m += ", ";
274 message_m += func;
275 message_m += ", ";
276 message_m += std::to_string(line_no);
277 message_m += "]: ";
278 message_m += error;
279 }
281 // Refer ti-opencl/builtins/include/custom.h for error codes
282 Exception::Exception(int32_t errorCode, const std::string& file,
283 const std::string& func, uint32_t line_no)
284 {
285 message_m = "TIDL Error: [";
286 message_m += file;
287 message_m += ", ";
288 message_m += func;
289 message_m += ", ";
290 message_m += std::to_string(line_no);
291 message_m += "]: ";
293 switch (errorCode)
294 {
295 case OCL_TIDL_ERROR:
296 message_m += "";
297 break;
298 case OCL_TIDL_ALLOC_FAIL:
299 case OCL_TIDL_MEMREC_ALLOC_FAIL:
300 message_m += "Memory allocation failed on device";
301 break;
302 case OCL_TIDL_PROCESS_FAIL:
303 message_m += "Process call failed on device";
304 break;
305 case OCL_TIDL_CREATE_PARAMS_MISMATCH:
306 message_m += "TIDL API headers inconsistent with OpenCL";
307 break;
308 case OCL_TIDL_INIT_FAIL:
309 message_m += "Initialization failed on device";
310 break;
311 default:
312 message_m += std::to_string(errorCode);
313 break;
314 }
315 }
317 const char* Exception::what() const noexcept
318 {
319 return message_m.c_str();
320 }