864551d3dd7d230e70473a0e1eafb3f0331a09f3
1 /******************************************************************************
2 * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
30 #include <cstdlib>
31 #include <cassert>
32 using std::size_t;
34 #include <iostream>
36 #include "ocl_device.h"
37 #include "ocl_util.h"
38 #include "trace.h"
40 using namespace tidl;
42 static const char* error2string(cl_int err);
43 static void errorCheck(cl_int ret, int line);
45 Device::Device(cl_device_type t, const DeviceIds& ids, const char* name):
46 device_type_m(t), device_ids_m(ids)
47 {
48 TRACE::print("\tOCL Device: %s created\n",
49 device_type_m == CL_DEVICE_TYPE_CUSTOM ? name : "Unknown");
51 for (int i = 0; i < MAX_DEVICES; i++)
52 queue_m[i] = nullptr;
54 }
56 DspDevice::DspDevice(const DeviceIds& ids, const std::string &kernel_names):
57 Device(CL_DEVICE_TYPE_CUSTOM, ids, "DSP")
58 {
59 cl_int errcode;
60 cl_device_id device_ids[MAX_DEVICES];
61 cl_device_id out_device_ids[MAX_DEVICES];
62 cl_uint num_compute_units;
63 cl_uint num_out_devices;
65 if (! GetDevices(DeviceType::DSP, device_ids, nullptr, &num_compute_units))
66 throw Exception("OpenCL DSP device not found",
67 __FILE__, __FUNCTION__, __LINE__);
69 if (num_compute_units == 1)
70 {
71 num_out_devices = 1;
72 out_device_ids[0] = device_ids[0];
73 }
74 else
75 {
76 // Create 2 sub-device's, each consisting of a C66x DSP
77 cl_device_partition_property properties[3] =
78 { CL_DEVICE_PARTITION_EQUALLY, 1, 0 };
80 // Query the number of sub-devices that can be created
81 const cl_uint NUM_SUB_DEVICES = 2;
82 errcode = clCreateSubDevices(device_ids[0], // in_device
83 properties, // properties
84 0, // num_devices
85 NULL, // out_devices
86 &num_out_devices); // num_devices_ret
87 errorCheck(errcode, __LINE__);
89 assert(num_out_devices == NUM_SUB_DEVICES);
91 // Create the sub-devices
92 errcode = clCreateSubDevices(device_ids[0], // in_device
93 properties, // properties
94 num_out_devices, // num_devices
95 out_device_ids, // out_devices
96 nullptr); // num_devices_ret
97 errorCheck(errcode, __LINE__);
98 }
100 // Create a context containing the out-devices
101 context_m = clCreateContext(NULL, // properties
102 num_out_devices, // num_devices
103 out_device_ids, // devices
104 NULL, // pfn_notify
105 NULL, // user_data
106 &errcode); // errcode_ret
107 errorCheck(errcode, __LINE__);
109 // Create queues to each out device
110 for (auto id : device_ids_m)
111 {
112 cl_uint index = static_cast<cl_uint>(id);
113 assert(index < num_out_devices);
114 queue_m[index] = clCreateCommandQueue(context_m,
115 out_device_ids[index],
116 CL_QUEUE_PROFILING_ENABLE|
117 CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
118 &errcode);
119 errorCheck(errcode, __LINE__);
120 }
122 // Build kernel program
123 BuildBuiltInProgram(kernel_names, out_device_ids, num_out_devices);
125 // Query device frequency
126 errcode = clGetDeviceInfo(device_ids[0],
127 CL_DEVICE_MAX_CLOCK_FREQUENCY,
128 sizeof(freq_in_mhz_m),
129 &freq_in_mhz_m,
130 nullptr);
131 errorCheck(errcode, __LINE__);
132 }
135 EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names):
136 Device(CL_DEVICE_TYPE_CUSTOM, ids, "EVE")
137 {
138 cl_int errcode;
139 cl_device_id all_device_ids[MAX_DEVICES];
140 cl_uint num_devices;
141 if (! GetDevices(DeviceType::EVE, all_device_ids, &num_devices, nullptr))
142 throw Exception("OpenCL EVE device not found",
143 __FILE__, __FUNCTION__, __LINE__);
145 assert (num_devices >= device_ids_m.size());
147 context_m = clCreateContextFromType(0, // properties
148 device_type_m, // device_type
149 0, // pfn_notify
150 0, // user_data
151 &errcode);
152 errorCheck(errcode, __LINE__);
154 // Create command queues to OpenCL devices specified by the
155 // device_ids_m set.
156 for (auto id : device_ids_m)
157 {
158 int index = static_cast<int>(id);
159 queue_m[index] = clCreateCommandQueue(context_m,
160 all_device_ids[index],
161 CL_QUEUE_PROFILING_ENABLE|
162 CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
163 &errcode);
164 errorCheck(errcode, __LINE__);
165 }
167 BuildBuiltInProgram(kernel_names, all_device_ids, device_ids_m.size());
169 errcode = clGetDeviceInfo(all_device_ids[0],
170 CL_DEVICE_MAX_CLOCK_FREQUENCY,
171 sizeof(freq_in_mhz_m),
172 &freq_in_mhz_m,
173 nullptr);
174 errorCheck(errcode, __LINE__);
175 }
177 bool DspDevice::BuildBuiltInProgram(const std::string& kernel_names,
178 cl_device_id device_ids[],
179 int num_devices)
180 {
181 cl_int err;
182 program_m = clCreateProgramWithBuiltInKernels(context_m,
183 num_devices,
184 device_ids, // device_list
185 kernel_names.c_str(),
186 &err);
187 errorCheck(err, __LINE__);
189 return true;
190 }
192 bool EveDevice::BuildBuiltInProgram(const std::string& kernel_names,
193 cl_device_id device_ids[],
194 int num_devices)
195 {
196 cl_int err;
197 cl_device_id executor_device_ids[MAX_DEVICES];
199 int i = 0;
200 for (auto id : device_ids_m)
201 executor_device_ids[i++] = device_ids[static_cast<int>(id)];
203 program_m = clCreateProgramWithBuiltInKernels(context_m,
204 num_devices,
205 executor_device_ids, // device_list
206 kernel_names.c_str(),
207 &err);
208 errorCheck(err, __LINE__);
210 return true;
211 }
213 Kernel::Kernel(Device* device, const std::string& name,
214 const KernelArgs& args, uint8_t device_index):
215 name_m(name), device_m(device), device_index_m(device_index)
216 {
217 TRACE::print("Creating kernel %s\n", name.c_str());
218 cl_int err;
219 kernel_m = clCreateKernel(device_m->program_m, name_m.c_str(), &err);
220 errorCheck(err, __LINE__);
222 for (int i=0; i < tidl::internal::NUM_CONTEXTS; i++)
223 event_m[i] = nullptr;
225 int arg_index = 0;
226 for (const auto& arg : args)
227 {
228 if (!arg.isLocal())
229 {
230 if (arg.kind() == DeviceArgInfo::Kind::BUFFER)
231 {
232 cl_mem buffer = device_m->CreateBuffer(arg);
234 clSetKernelArg(kernel_m, arg_index, sizeof(cl_mem), &buffer);
235 TRACE::print(" Arg[%d]: %p\n", arg_index, buffer);
237 if (buffer)
238 buffers_m.push_back(buffer);
239 }
240 else if (arg.kind() == DeviceArgInfo::Kind::SCALAR)
241 {
242 clSetKernelArg(kernel_m, arg_index, arg.size(), arg.ptr());
243 TRACE::print(" Arg[%d]: %p\n", arg_index, arg.ptr());
244 }
245 else
246 {
247 assert ("DeviceArgInfo kind not supported");
248 }
249 }
250 else
251 {
252 clSetKernelArg(kernel_m, arg_index, arg.size(), NULL);
253 TRACE::print(" Arg[%d]: local, %d\n", arg_index, arg.size());
254 }
255 arg_index++;
257 }
258 }
260 bool Kernel::UpdateScalarArg(uint32_t index, size_t size, const void *value)
261 {
262 cl_int ret = clSetKernelArg(kernel_m, index, size, value);
263 return ret == CL_SUCCESS;
264 }
266 Kernel& Kernel::RunAsync(uint32_t context_idx)
267 {
268 // Execute kernel
269 TRACE::print("\tKernel: device %d executing %s, context %d\n",
270 device_index_m, name_m.c_str(), context_idx);
271 cl_int ret = clEnqueueTask(device_m->queue_m[device_index_m],
272 kernel_m, 0, 0, &event_m[context_idx]);
273 errorCheck(ret, __LINE__);
275 return *this;
276 }
278 bool Kernel::Wait(uint32_t context_idx)
279 {
280 // Wait called without a corresponding RunAsync
281 if (event_m[context_idx] == nullptr)
282 return false;
284 TRACE::print("\tKernel: waiting context %d...\n", context_idx);
285 cl_int ret = clWaitForEvents(1, &event_m[context_idx]);
286 errorCheck(ret, __LINE__);
288 ret = clReleaseEvent(event_m[context_idx]);
289 errorCheck(ret, __LINE__);
290 event_m[context_idx] = nullptr;
292 TRACE::print("\tKernel: finished execution\n");
294 return true;
295 }
297 extern void CallbackWrapper(void *user_data) __attribute__((weak));
299 static
300 void EventCallback(cl_event event, cl_int exec_status, void *user_data)
301 {
302 if (exec_status != CL_SUCCESS || user_data == nullptr) return;
303 if (CallbackWrapper) CallbackWrapper(user_data);
304 }
306 bool Kernel::AddCallback(void *user_data, uint32_t context_idx)
307 {
308 if (event_m[context_idx] == nullptr)
309 return false;
311 return clSetEventCallback(event_m[context_idx], CL_COMPLETE, EventCallback,
312 user_data) == CL_SUCCESS;
313 }
315 Kernel::~Kernel()
316 {
317 for (auto b : buffers_m)
318 device_m->ReleaseBuffer(b);
320 clReleaseKernel(kernel_m);
321 }
323 cl_mem Device::CreateBuffer(const DeviceArgInfo &Arg)
324 {
325 size_t size = Arg.size();
326 void *host_ptr = Arg.ptr();
328 if (host_ptr == nullptr)
329 {
330 TRACE::print("\tOCL Create B:%p\n", nullptr);
331 return nullptr;
332 }
334 bool hostPtrInCMEM = __is_in_malloced_region(host_ptr);
336 // Conservative till we have sufficient information.
337 cl_mem_flags flag = CL_MEM_READ_WRITE;
339 if (hostPtrInCMEM) flag |= (cl_mem_flags)CL_MEM_USE_HOST_PTR;
340 else flag |= (cl_mem_flags)CL_MEM_COPY_HOST_PTR;
342 cl_int errcode;
343 cl_mem buffer = clCreateBuffer(context_m,
344 flag,
345 size,
346 host_ptr,
347 &errcode);
348 errorCheck(errcode, __LINE__);
350 TRACE::print("\tOCL Create B:%p\n", buffer);
352 return buffer;
353 }
355 void Device::ReleaseBuffer(cl_mem M)
356 {
357 TRACE::print("\tOCL Release B:%p\n", M);
358 clReleaseMemObject(M);
359 }
361 /// Release resources associated with an OpenCL device
362 Device::~Device()
363 {
364 TRACE::print("\tOCL Device: deleted\n");
365 for (unsigned int i = 0; i < device_ids_m.size(); i++)
366 {
367 clFinish(queue_m[i]);
368 clReleaseCommandQueue (queue_m[i]);
369 }
371 clReleaseProgram (program_m);
372 clReleaseContext (context_m);
373 }
375 void errorCheck(cl_int ret, int line)
376 {
377 if (ret != CL_SUCCESS)
378 {
379 std::cerr << "ERROR: [ Line: " << line << "] " << error2string(ret) << std::endl;
380 exit(ret);
381 }
382 }
384 /// Convert OpenCL error codes to a string
385 const char* error2string(cl_int err)
386 {
387 switch(err)
388 {
389 case 0: return "CL_SUCCESS";
390 case -1: return "CL_DEVICE_NOT_FOUND";
391 case -2: return "CL_DEVICE_NOT_AVAILABLE";
392 case -3: return "CL_COMPILER_NOT_AVAILABLE";
393 case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
394 case -5: return "CL_OUT_OF_RESOURCES";
395 case -6: return "CL_OUT_OF_HOST_MEMORY";
396 case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
397 case -8: return "CL_MEM_COPY_OVERLAP";
398 case -9: return "CL_IMAGE_FORMAT_MISMATCH";
399 case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
400 case -11: return "CL_BUILD_PROGRAM_FAILURE";
401 case -12: return "CL_MAP_FAILURE";
403 case -30: return "CL_INVALID_VALUE";
404 case -31: return "CL_INVALID_DEVICE_TYPE";
405 case -32: return "CL_INVALID_PLATFORM";
406 case -33: return "CL_INVALID_DEVICE";
407 case -34: return "CL_INVALID_CONTEXT";
408 case -35: return "CL_INVALID_QUEUE_PROPERTIES";
409 case -36: return "CL_INVALID_COMMAND_QUEUE";
410 case -37: return "CL_INVALID_HOST_PTR";
411 case -38: return "CL_INVALID_MEM_OBJECT";
412 case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
413 case -40: return "CL_INVALID_IMAGE_SIZE";
414 case -41: return "CL_INVALID_SAMPLER";
415 case -42: return "CL_INVALID_BINARY";
416 case -43: return "CL_INVALID_BUILD_OPTIONS";
417 case -44: return "CL_INVALID_PROGRAM";
418 case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
419 case -46: return "CL_INVALID_KERNEL_NAME";
420 case -47: return "CL_INVALID_KERNEL_DEFINITION";
421 case -48: return "CL_INVALID_KERNEL";
422 case -49: return "CL_INVALID_ARG_INDEX";
423 case -50: return "CL_INVALID_ARG_VALUE";
424 case -51: return "CL_INVALID_ARG_SIZE";
425 case -52: return "CL_INVALID_KERNEL_ARGS";
426 case -53: return "CL_INVALID_WORK_DIMENSION";
427 case -54: return "CL_INVALID_WORK_GROUP_SIZE";
428 case -55: return "CL_INVALID_WORK_ITEM_SIZE";
429 case -56: return "CL_INVALID_GLOBAL_OFFSET";
430 case -57: return "CL_INVALID_EVENT_WAIT_LIST";
431 case -58: return "CL_INVALID_EVENT";
432 case -59: return "CL_INVALID_OPERATION";
433 case -60: return "CL_INVALID_GL_OBJECT";
434 case -61: return "CL_INVALID_BUFFER_SIZE";
435 case -62: return "CL_INVALID_MIP_LEVEL";
436 case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
437 default: return "Unknown OpenCL error";
438 }
439 }
441 Device::Ptr Device::Create(DeviceType core_type, const DeviceIds& ids,
442 const std::string& name)
443 {
444 Device::Ptr p(nullptr);
445 if (core_type == DeviceType::DSP)
446 p.reset(new DspDevice(ids, name));
447 else if (core_type == DeviceType::EVE)
448 p.reset(new EveDevice(ids, name));
450 return p;
451 }
453 // Minimum version of OpenCL required for this version of TIDL API
454 #define MIN_OCL_VERSION "01.01.18.00"
455 static bool CheckOpenCLVersion(cl_platform_id id)
456 {
457 cl_int err;
458 size_t length;
459 err = clGetPlatformInfo(id, CL_PLATFORM_VERSION, 0, nullptr, &length);
460 if (err != CL_SUCCESS) return false;
462 std::unique_ptr<char> version(new char[length]);
463 err = clGetPlatformInfo(id, CL_PLATFORM_VERSION, length, version.get(),
464 nullptr);
465 if (err != CL_SUCCESS) return false;
467 std::string v(version.get());
469 if (v.substr(v.find("01."), sizeof(MIN_OCL_VERSION)) >= MIN_OCL_VERSION)
470 return true;
472 std::cerr << "TIDL API Error: OpenCL " << MIN_OCL_VERSION
473 << " or higher required." << std::endl;
475 return false;
476 }
478 static bool PlatformIsAM57()
479 {
480 cl_platform_id id;
481 cl_int err;
483 err = clGetPlatformIDs(1, &id, nullptr);
484 if (err != CL_SUCCESS) return false;
486 if (!CheckOpenCLVersion(id))
487 return false;
489 // Check if the device name is AM57
490 size_t length;
491 err = clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, nullptr, &length);
492 if (err != CL_SUCCESS) return false;
494 std::unique_ptr<char> name(new char[length]);
496 err = clGetPlatformInfo(id, CL_PLATFORM_NAME, length, name.get(), nullptr);
497 if (err != CL_SUCCESS) return false;
499 std::string platform_name(name.get());
501 if (platform_name.find("AM57") == std::string::npos)
502 return false;
504 return true;
505 }
507 // TI DL is supported on AM57x - EVE or C66x devices
508 bool Device::GetDevices(DeviceType device_type,
509 cl_device_id cl_d_ids[],
510 cl_uint *p_num_devices,
511 cl_uint *p_num_compute_units)
512 {
513 if (!PlatformIsAM57()) return false;
515 // Convert DeviceType to OpenCL device type
516 cl_device_type t = CL_DEVICE_TYPE_CUSTOM;
518 // Find all the OpenCL custom devices available
519 cl_uint num_devices_found;
520 cl_device_id all_device_ids[MAX_DEVICES];
522 cl_int errcode = clGetDeviceIDs(0, // platform
523 t, // device_type
524 MAX_DEVICES, // num_entries
525 all_device_ids, // devices
526 &num_devices_found); // num_devices
529 if (errcode != CL_SUCCESS) return false;
530 if (num_devices_found == 0) return false;
532 // Find devices according to device_type
533 // DSP: ACCELERATOR | CUSTOM
534 // EVE: CUSTOM
535 cl_uint num_devices = 0;
536 for (cl_uint i = 0; i < num_devices_found; i++)
537 {
538 cl_device_type cl_d_type;
539 errcode = clGetDeviceInfo(all_device_ids[i], CL_DEVICE_TYPE,
540 sizeof(cl_device_type), &cl_d_type, nullptr);
541 if (errcode != CL_SUCCESS) return false;
543 if ((device_type == DeviceType::DSP &&
544 ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) != 0)) ||
545 (device_type == DeviceType::EVE &&
546 ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) == 0)))
547 cl_d_ids[num_devices++] = all_device_ids[i];
548 }
549 if (p_num_devices != nullptr) *p_num_devices = num_devices;
551 // DSP, return the number of compute units
552 if (device_type == DeviceType::DSP &&
553 num_devices > 0 && p_num_compute_units != nullptr)
554 {
555 errcode = clGetDeviceInfo(cl_d_ids[0],
556 CL_DEVICE_MAX_COMPUTE_UNITS,
557 sizeof(cl_int),
558 p_num_compute_units,
559 nullptr);
560 if (errcode != CL_SUCCESS) return false;
561 }
563 return true;
564 }
566 uint32_t Device::GetNumDevices(DeviceType device_type)
567 {
568 cl_device_id cl_d_ids[MAX_DEVICES];
569 cl_uint num_devices = 0;
570 cl_uint num_cus = 0;
572 if (! GetDevices(device_type, cl_d_ids, &num_devices, &num_cus)) return 0;
574 // EVE, return the number of devices since each EVE is a device
575 // DSP, return the number of compute units since we maintain a
576 // queue to each compute unit (i.e. C66x DSP)
577 return device_type == DeviceType::EVE ? num_devices : num_cus;
578 }