]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - tidl/tidl-api.git/blob - tidl_api/src/ocl_device.cpp
Use DSP Built-in Kernels in TIDL-API
[tidl/tidl-api.git] / tidl_api / src / ocl_device.cpp
1 /******************************************************************************
2  * Copyright (c) 2017-2018  Texas Instruments Incorporated - http://www.ti.com/
3  *   All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Texas Instruments Incorporated nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
30 #include <cstdlib>
31 #include <cassert>
32 using std::size_t;
34 #include <iostream>
36 #include "ocl_device.h"
37 #include "ocl_util.h"
38 #include "trace.h"
40 using namespace tidl;
42 static const char* error2string(cl_int err);
43 static void        errorCheck(cl_int ret, int line);
45 Device::Device(cl_device_type t, const DeviceIds& ids, const char* name):
46                 device_type_m(t), device_ids_m(ids)
47 {
48     TRACE::print("\tOCL Device: %s created\n",
49                  device_type_m == CL_DEVICE_TYPE_CUSTOM ? name : "Unknown");
51     for (int i = 0; i < MAX_DEVICES; i++)
52         queue_m[i] = nullptr;
54 }
56 DspDevice::DspDevice(const DeviceIds& ids, const std::string &kernel_names):
57               Device(CL_DEVICE_TYPE_CUSTOM, ids, "DSP")
58 {
59     cl_int       errcode;
60     cl_device_id device_ids[MAX_DEVICES];
61     cl_device_id out_device_ids[MAX_DEVICES];
62     cl_uint      num_compute_units;
63     cl_uint      num_out_devices;
65     if (! GetDevices(DeviceType::DSP, device_ids, nullptr, &num_compute_units))
66         throw Exception("OpenCL DSP device not found",
67                         __FILE__, __FUNCTION__, __LINE__);
69     if (num_compute_units == 1)
70     {
71         num_out_devices   = 1;
72         out_device_ids[0] = device_ids[0];
73     }
74     else
75     {
76         // Create 2 sub-device's, each consisting of a C66x DSP
77         cl_device_partition_property properties[3] =
78                                         { CL_DEVICE_PARTITION_EQUALLY, 1, 0 };
80         // Query the number of sub-devices that can be created
81         const cl_uint NUM_SUB_DEVICES = 2;
82         errcode = clCreateSubDevices(device_ids[0],      // in_device
83                                      properties,         // properties
84                                      0,                  // num_devices
85                                      NULL,               // out_devices
86                                      &num_out_devices);  // num_devices_ret
87         errorCheck(errcode, __LINE__);
89         assert(num_out_devices == NUM_SUB_DEVICES);
91         // Create the sub-devices
92         errcode = clCreateSubDevices(device_ids[0],        // in_device
93                                      properties,           // properties
94                                      num_out_devices,      // num_devices
95                                      out_device_ids,       // out_devices
96                                      nullptr);             // num_devices_ret
97         errorCheck(errcode, __LINE__);
98     }
100     // Create a context containing the out-devices
101     context_m = clCreateContext(NULL,               // properties
102                                 num_out_devices,    // num_devices
103                                 out_device_ids,     // devices
104                                 NULL,               // pfn_notify
105                                 NULL,               // user_data
106                                 &errcode);          // errcode_ret
107     errorCheck(errcode, __LINE__);
109     // Create queues to each out device
110     for (auto id : device_ids_m)
111     {
112         cl_uint index = static_cast<cl_uint>(id);
113         assert(index < num_out_devices);
114         queue_m[index] = clCreateCommandQueue(context_m,
115                                         out_device_ids[index],
116                                         CL_QUEUE_PROFILING_ENABLE|
117                                         CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
118                                         &errcode);
119         errorCheck(errcode, __LINE__);
120     }
122     // Build kernel program
123     BuildBuiltInProgram(kernel_names, out_device_ids, num_out_devices);
125     // Query device frequency
126     errcode = clGetDeviceInfo(device_ids[0],
127                               CL_DEVICE_MAX_CLOCK_FREQUENCY,
128                               sizeof(freq_in_mhz_m),
129                               &freq_in_mhz_m,
130                               nullptr);
131     errorCheck(errcode, __LINE__);
135 EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names):
136             Device(CL_DEVICE_TYPE_CUSTOM, ids, "EVE")
138     cl_int       errcode;
139     cl_device_id all_device_ids[MAX_DEVICES];
140     cl_uint      num_devices;
141     if (! GetDevices(DeviceType::EVE, all_device_ids, &num_devices, nullptr))
142         throw Exception("OpenCL EVE device not found",
143                         __FILE__, __FUNCTION__, __LINE__);
145     assert (num_devices >= device_ids_m.size());
147     context_m = clCreateContextFromType(0,              // properties
148                                         device_type_m,  // device_type
149                                         0,              // pfn_notify
150                                         0,              // user_data
151                                         &errcode);
152     errorCheck(errcode, __LINE__);
154     // Create command queues to OpenCL devices specified by the
155     // device_ids_m set.
156     for (auto id : device_ids_m)
157     {
158         int index = static_cast<int>(id);
159         queue_m[index] = clCreateCommandQueue(context_m,
160                                       all_device_ids[index],
161                                       CL_QUEUE_PROFILING_ENABLE|
162                                       CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
163                                       &errcode);
164         errorCheck(errcode, __LINE__);
165     }
167     BuildBuiltInProgram(kernel_names, all_device_ids, device_ids_m.size());
169     errcode = clGetDeviceInfo(all_device_ids[0],
170                                 CL_DEVICE_MAX_CLOCK_FREQUENCY,
171                                 sizeof(freq_in_mhz_m),
172                                 &freq_in_mhz_m,
173                                 nullptr);
174     errorCheck(errcode, __LINE__);
177 bool DspDevice::BuildBuiltInProgram(const std::string& kernel_names,
178                                     cl_device_id device_ids[],
179                                     int num_devices)
181     cl_int err;
182     program_m = clCreateProgramWithBuiltInKernels(context_m,
183                                           num_devices,
184                                           device_ids,  // device_list
185                                           kernel_names.c_str(),
186                                           &err);
187     errorCheck(err, __LINE__);
189     return true;
192 bool EveDevice::BuildBuiltInProgram(const std::string& kernel_names,
193                                     cl_device_id device_ids[],
194                                     int num_devices)
196     cl_int err;
197     cl_device_id executor_device_ids[MAX_DEVICES];
199     int i = 0;
200     for (auto id : device_ids_m)
201         executor_device_ids[i++] = device_ids[static_cast<int>(id)];
203     program_m = clCreateProgramWithBuiltInKernels(context_m,
204                                           num_devices,
205                                           executor_device_ids,  // device_list
206                                           kernel_names.c_str(),
207                                           &err);
208     errorCheck(err, __LINE__);
210     return true;
213 Kernel::Kernel(Device* device, const std::string& name,
214                const KernelArgs& args, uint8_t device_index):
215            name_m(name), device_m(device), device_index_m(device_index)
217     TRACE::print("Creating kernel %s\n", name.c_str());
218     cl_int err;
219     kernel_m = clCreateKernel(device_m->program_m, name_m.c_str(), &err);
220     errorCheck(err, __LINE__);
222     for (int i=0; i < tidl::internal::NUM_CONTEXTS; i++)
223         event_m[i] = nullptr;
225     int arg_index = 0;
226     for (const auto& arg : args)
227     {
228         if (!arg.isLocal())
229         {
230             if (arg.kind() == DeviceArgInfo::Kind::BUFFER)
231             {
232                 cl_mem buffer = device_m->CreateBuffer(arg);
234                 clSetKernelArg(kernel_m, arg_index, sizeof(cl_mem), &buffer);
235                 TRACE::print("  Arg[%d]: %p\n", arg_index, buffer);
237                 if (buffer)
238                     buffers_m.push_back(buffer);
239             }
240             else if (arg.kind() == DeviceArgInfo::Kind::SCALAR)
241             {
242                 clSetKernelArg(kernel_m, arg_index, arg.size(), arg.ptr());
243                 TRACE::print("  Arg[%d]: %p\n", arg_index, arg.ptr());
244             }
245             else
246             {
247                 assert ("DeviceArgInfo kind not supported");
248             }
249         }
250         else
251         {
252             clSetKernelArg(kernel_m, arg_index, arg.size(), NULL);
253             TRACE::print("  Arg[%d]: local, %d\n", arg_index, arg.size());
254         }
255         arg_index++;
257     }
260 bool Kernel::UpdateScalarArg(uint32_t index, size_t size, const void *value)
262     cl_int ret = clSetKernelArg(kernel_m, index, size, value);
263     return ret == CL_SUCCESS;
266 Kernel& Kernel::RunAsync(uint32_t context_idx)
268     // Execute kernel
269     TRACE::print("\tKernel: device %d executing %s, context %d\n",
270                  device_index_m, name_m.c_str(), context_idx);
271     cl_int ret = clEnqueueTask(device_m->queue_m[device_index_m],
272                                kernel_m, 0, 0, &event_m[context_idx]);
273     errorCheck(ret, __LINE__);
275     return *this;
278 bool Kernel::Wait(uint32_t context_idx)
280     // Wait called without a corresponding RunAsync
281     if (event_m[context_idx] == nullptr)
282         return false;
284     TRACE::print("\tKernel: waiting context %d...\n", context_idx);
285     cl_int ret = clWaitForEvents(1, &event_m[context_idx]);
286     errorCheck(ret, __LINE__);
288     ret = clReleaseEvent(event_m[context_idx]);
289     errorCheck(ret, __LINE__);
290     event_m[context_idx] = nullptr;
292     TRACE::print("\tKernel: finished execution\n");
294     return true;
297 extern void CallbackWrapper(void *user_data) __attribute__((weak));
299 static
300 void EventCallback(cl_event event, cl_int exec_status, void *user_data)
302     if (exec_status != CL_SUCCESS || user_data == nullptr)  return;
303     if (CallbackWrapper)  CallbackWrapper(user_data);
306 bool Kernel::AddCallback(void *user_data, uint32_t context_idx)
308     if (event_m[context_idx] == nullptr)
309         return false;
311     return clSetEventCallback(event_m[context_idx], CL_COMPLETE, EventCallback,
312                               user_data) == CL_SUCCESS;
315 Kernel::~Kernel()
317     for (auto b : buffers_m)
318         device_m->ReleaseBuffer(b);
320     clReleaseKernel(kernel_m);
323 cl_mem Device::CreateBuffer(const DeviceArgInfo &Arg)
325     size_t  size     = Arg.size();
326     void   *host_ptr = Arg.ptr();
328     if (host_ptr == nullptr)
329     {
330         TRACE::print("\tOCL Create B:%p\n", nullptr);
331         return nullptr;
332     }
334     bool hostPtrInCMEM = __is_in_malloced_region(host_ptr);
336     // Conservative till we have sufficient information.
337     cl_mem_flags flag = CL_MEM_READ_WRITE;
339     if (hostPtrInCMEM) flag |= (cl_mem_flags)CL_MEM_USE_HOST_PTR;
340     else               flag |= (cl_mem_flags)CL_MEM_COPY_HOST_PTR;
342     cl_int       errcode;
343     cl_mem buffer = clCreateBuffer(context_m,
344                                    flag,
345                                    size,
346                                    host_ptr,
347                                    &errcode);
348     errorCheck(errcode, __LINE__);
350     TRACE::print("\tOCL Create B:%p\n", buffer);
352     return buffer;
355 void Device::ReleaseBuffer(cl_mem M)
357     TRACE::print("\tOCL Release B:%p\n", M);
358     clReleaseMemObject(M);
361 /// Release resources associated with an OpenCL device
362 Device::~Device()
364     TRACE::print("\tOCL Device: deleted\n");
365     for (unsigned int i = 0; i < device_ids_m.size(); i++)
366     {
367         clFinish(queue_m[i]);
368         clReleaseCommandQueue (queue_m[i]);
369     }
371     clReleaseProgram      (program_m);
372     clReleaseContext      (context_m);
375 void errorCheck(cl_int ret, int line)
377     if (ret != CL_SUCCESS)
378     {
379         std::cerr << "ERROR: [ Line: " << line << "] " << error2string(ret) << std::endl;
380         exit(ret);
381     }
384 /// Convert OpenCL error codes to a string
385 const char* error2string(cl_int err)
387     switch(err)
388     {
389          case   0: return "CL_SUCCESS";
390          case  -1: return "CL_DEVICE_NOT_FOUND";
391          case  -2: return "CL_DEVICE_NOT_AVAILABLE";
392          case  -3: return "CL_COMPILER_NOT_AVAILABLE";
393          case  -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
394          case  -5: return "CL_OUT_OF_RESOURCES";
395          case  -6: return "CL_OUT_OF_HOST_MEMORY";
396          case  -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
397          case  -8: return "CL_MEM_COPY_OVERLAP";
398          case  -9: return "CL_IMAGE_FORMAT_MISMATCH";
399          case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
400          case -11: return "CL_BUILD_PROGRAM_FAILURE";
401          case -12: return "CL_MAP_FAILURE";
403          case -30: return "CL_INVALID_VALUE";
404          case -31: return "CL_INVALID_DEVICE_TYPE";
405          case -32: return "CL_INVALID_PLATFORM";
406          case -33: return "CL_INVALID_DEVICE";
407          case -34: return "CL_INVALID_CONTEXT";
408          case -35: return "CL_INVALID_QUEUE_PROPERTIES";
409          case -36: return "CL_INVALID_COMMAND_QUEUE";
410          case -37: return "CL_INVALID_HOST_PTR";
411          case -38: return "CL_INVALID_MEM_OBJECT";
412          case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
413          case -40: return "CL_INVALID_IMAGE_SIZE";
414          case -41: return "CL_INVALID_SAMPLER";
415          case -42: return "CL_INVALID_BINARY";
416          case -43: return "CL_INVALID_BUILD_OPTIONS";
417          case -44: return "CL_INVALID_PROGRAM";
418          case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
419          case -46: return "CL_INVALID_KERNEL_NAME";
420          case -47: return "CL_INVALID_KERNEL_DEFINITION";
421          case -48: return "CL_INVALID_KERNEL";
422          case -49: return "CL_INVALID_ARG_INDEX";
423          case -50: return "CL_INVALID_ARG_VALUE";
424          case -51: return "CL_INVALID_ARG_SIZE";
425          case -52: return "CL_INVALID_KERNEL_ARGS";
426          case -53: return "CL_INVALID_WORK_DIMENSION";
427          case -54: return "CL_INVALID_WORK_GROUP_SIZE";
428          case -55: return "CL_INVALID_WORK_ITEM_SIZE";
429          case -56: return "CL_INVALID_GLOBAL_OFFSET";
430          case -57: return "CL_INVALID_EVENT_WAIT_LIST";
431          case -58: return "CL_INVALID_EVENT";
432          case -59: return "CL_INVALID_OPERATION";
433          case -60: return "CL_INVALID_GL_OBJECT";
434          case -61: return "CL_INVALID_BUFFER_SIZE";
435          case -62: return "CL_INVALID_MIP_LEVEL";
436          case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
437          default: return "Unknown OpenCL error";
438     }
441 Device::Ptr Device::Create(DeviceType core_type, const DeviceIds& ids,
442                            const std::string& name)
444     Device::Ptr p(nullptr);
445     if (core_type == DeviceType::DSP)
446         p.reset(new DspDevice(ids, name));
447     else if (core_type == DeviceType::EVE)
448         p.reset(new EveDevice(ids, name));
450     return p;
453 // Minimum version of OpenCL required for this version of TIDL API
454 #define MIN_OCL_VERSION "01.01.18.00"
455 static bool CheckOpenCLVersion(cl_platform_id id)
457     cl_int err;
458     size_t length;
459     err = clGetPlatformInfo(id, CL_PLATFORM_VERSION, 0, nullptr, &length);
460     if (err != CL_SUCCESS) return false;
462     std::unique_ptr<char> version(new char[length]);
463     err = clGetPlatformInfo(id, CL_PLATFORM_VERSION, length, version.get(),
464                             nullptr);
465     if (err != CL_SUCCESS) return false;
467     std::string v(version.get());
469     if (v.substr(v.find("01."), sizeof(MIN_OCL_VERSION)) >= MIN_OCL_VERSION)
470         return true;
472     std::cerr << "TIDL API Error: OpenCL " << MIN_OCL_VERSION
473               << " or higher required." << std::endl;
475     return false;
478 static bool PlatformIsAM57()
480     cl_platform_id id;
481     cl_int err;
483     err = clGetPlatformIDs(1, &id, nullptr);
484     if (err != CL_SUCCESS) return false;
486     if (!CheckOpenCLVersion(id))
487        return false;
489     // Check if the device name is AM57
490     size_t length;
491     err = clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, nullptr, &length);
492     if (err != CL_SUCCESS) return false;
494     std::unique_ptr<char> name(new char[length]);
496     err = clGetPlatformInfo(id, CL_PLATFORM_NAME, length, name.get(), nullptr);
497     if (err != CL_SUCCESS) return false;
499     std::string platform_name(name.get());
501     if (platform_name.find("AM57") == std::string::npos)
502         return false;
504     return true;
507 // TI DL is supported on AM57x - EVE or C66x devices
508 bool Device::GetDevices(DeviceType device_type,
509                         cl_device_id cl_d_ids[],
510                         cl_uint *p_num_devices,
511                         cl_uint *p_num_compute_units)
513     if (!PlatformIsAM57()) return false;
515     // Convert DeviceType to OpenCL device type
516     cl_device_type t = CL_DEVICE_TYPE_CUSTOM;
518     // Find all the OpenCL custom devices available
519     cl_uint num_devices_found;
520     cl_device_id all_device_ids[MAX_DEVICES];
522     cl_int errcode = clGetDeviceIDs(0,                   // platform
523                                     t,                   // device_type
524                                     MAX_DEVICES,         // num_entries
525                                     all_device_ids,      // devices
526                                     &num_devices_found); // num_devices
529     if (errcode != CL_SUCCESS)            return false;
530     if (num_devices_found == 0)           return false;
532     // Find devices according to device_type
533     // DSP: ACCELERATOR | CUSTOM
534     // EVE: CUSTOM
535     cl_uint num_devices = 0;
536     for (cl_uint i = 0; i < num_devices_found; i++)
537     {
538         cl_device_type cl_d_type;
539         errcode = clGetDeviceInfo(all_device_ids[i], CL_DEVICE_TYPE,
540                                   sizeof(cl_device_type), &cl_d_type, nullptr);
541         if (errcode != CL_SUCCESS) return false;
543         if ((device_type == DeviceType::DSP &&
544                ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) != 0)) ||
545             (device_type == DeviceType::EVE &&
546                ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) == 0)))
547             cl_d_ids[num_devices++] = all_device_ids[i];
548     }
549     if (p_num_devices != nullptr)  *p_num_devices = num_devices;
551     // DSP, return the number of compute units
552     if (device_type == DeviceType::DSP &&
553         num_devices > 0 && p_num_compute_units != nullptr)
554     {
555         errcode = clGetDeviceInfo(cl_d_ids[0],
556                                 CL_DEVICE_MAX_COMPUTE_UNITS,
557                                 sizeof(cl_int),
558                                 p_num_compute_units,
559                                 nullptr);
560         if (errcode != CL_SUCCESS)  return false;
561     }
563     return true;
566 uint32_t Device::GetNumDevices(DeviceType device_type)
568     cl_device_id cl_d_ids[MAX_DEVICES];
569     cl_uint num_devices = 0;
570     cl_uint num_cus     = 0;
572     if (! GetDevices(device_type, cl_d_ids, &num_devices, &num_cus))  return 0;
574     // EVE, return the number of devices since each EVE is a device
575     // DSP, return the number of compute units since we maintain a
576     //      queue to each compute unit (i.e. C66x DSP)
577     return device_type == DeviceType::EVE ? num_devices : num_cus;