Fix unique_ptr that holds an allocated array
[tidl/tidl-api.git] / tidl_api / src / ocl_device.cpp
1 /******************************************************************************
2  * Copyright (c) 2017-2018  Texas Instruments Incorporated - http://www.ti.com/
3  *   All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *     * Redistributions of source code must retain the above copyright
8  *       notice, this list of conditions and the following disclaimer.
9  *     * Redistributions in binary form must reproduce the above copyright
10  *       notice, this list of conditions and the following disclaimer in the
11  *       documentation and/or other materials provided with the distribution.
12  *     * Neither the name of Texas Instruments Incorporated nor the
13  *       names of its contributors may be used to endorse or promote products
14  *       derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
30 #include <cstdlib>
31 #include <cassert>
32 using std::size_t;
34 #include <iostream>
36 #include "ocl_device.h"
37 #include "ocl_util.h"
38 #include "trace.h"
40 using namespace tidl;
42 static const char* error2string(cl_int err);
43 static void        errorCheck(cl_int ret, int line);
45 Device::Device(cl_device_type t, const DeviceIds& ids, const char* name):
46                 device_type_m(t), device_ids_m(ids)
47 {
48     TRACE::print("\tOCL Device: %s created\n",
49                  device_type_m == CL_DEVICE_TYPE_CUSTOM ? name : "Unknown");
51     for (int i = 0; i < MAX_DEVICES; i++)
52         queue_m[i] = nullptr;
54 }
56 DspDevice::DspDevice(const DeviceIds& ids, const std::string &kernel_names):
57               Device(CL_DEVICE_TYPE_CUSTOM, ids, "DSP")
58 {
59     cl_int       errcode;
60     cl_device_id device_ids[MAX_DEVICES];
61     cl_device_id out_device_ids[MAX_DEVICES];
62     cl_uint      num_compute_units;
63     cl_uint      num_out_devices;
65     if (! GetDevices(DeviceType::DSP, device_ids, nullptr, &num_compute_units))
66         throw Exception("OpenCL DSP device not found",
67                         __FILE__, __FUNCTION__, __LINE__);
69     if (num_compute_units == 1)
70     {
71         num_out_devices   = 1;
72         out_device_ids[0] = device_ids[0];
73     }
74     else
75     {
76         // Create 2 sub-device's, each consisting of a C66x DSP
77         cl_device_partition_property properties[3] =
78                                         { CL_DEVICE_PARTITION_EQUALLY, 1, 0 };
80         // Query the number of sub-devices that can be created
81         const cl_uint NUM_SUB_DEVICES = 2;
82         errcode = clCreateSubDevices(device_ids[0],      // in_device
83                                      properties,         // properties
84                                      0,                  // num_devices
85                                      NULL,               // out_devices
86                                      &num_out_devices);  // num_devices_ret
87         errorCheck(errcode, __LINE__);
89         assert(num_out_devices == NUM_SUB_DEVICES);
91         // Create the sub-devices
92         errcode = clCreateSubDevices(device_ids[0],        // in_device
93                                      properties,           // properties
94                                      num_out_devices,      // num_devices
95                                      out_device_ids,       // out_devices
96                                      nullptr);             // num_devices_ret
97         errorCheck(errcode, __LINE__);
98     }
100     // Create a context containing the out-devices
101     context_m = clCreateContext(NULL,               // properties
102                                 num_out_devices,    // num_devices
103                                 out_device_ids,     // devices
104                                 NULL,               // pfn_notify
105                                 NULL,               // user_data
106                                 &errcode);          // errcode_ret
107     errorCheck(errcode, __LINE__);
109     // Create queues to each out device
110     for (auto id : device_ids_m)
111     {
112         cl_uint index = static_cast<cl_uint>(id);
113         assert(index < num_out_devices);
114         queue_m[index] = clCreateCommandQueue(context_m,
115                                         out_device_ids[index],
116                                         CL_QUEUE_PROFILING_ENABLE|
117                                         CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
118                                         &errcode);
119         errorCheck(errcode, __LINE__);
120     }
122     // Build kernel program
123     BuildBuiltInProgram(kernel_names, out_device_ids, num_out_devices);
125     // Query device frequency
126     errcode = clGetDeviceInfo(device_ids[0],
127                               CL_DEVICE_MAX_CLOCK_FREQUENCY,
128                               sizeof(freq_in_mhz_m),
129                               &freq_in_mhz_m,
130                               nullptr);
131     errorCheck(errcode, __LINE__);
135 EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names):
136             Device(CL_DEVICE_TYPE_CUSTOM, ids, "EVE")
138     cl_int       errcode;
139     cl_device_id all_device_ids[MAX_DEVICES];
140     cl_uint      num_devices;
141     if (! GetDevices(DeviceType::EVE, all_device_ids, &num_devices, nullptr))
142         throw Exception("OpenCL EVE device not found",
143                         __FILE__, __FUNCTION__, __LINE__);
145     assert (num_devices >= device_ids_m.size());
147     context_m = clCreateContextFromType(0,              // properties
148                                         device_type_m,  // device_type
149                                         0,              // pfn_notify
150                                         0,              // user_data
151                                         &errcode);
152     errorCheck(errcode, __LINE__);
154     // Create command queues to OpenCL devices specified by the
155     // device_ids_m set.
156     for (auto id : device_ids_m)
157     {
158         int index = static_cast<int>(id);
159         queue_m[index] = clCreateCommandQueue(context_m,
160                                       all_device_ids[index],
161                                       CL_QUEUE_PROFILING_ENABLE|
162                                       CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
163                                       &errcode);
164         errorCheck(errcode, __LINE__);
165     }
167     BuildBuiltInProgram(kernel_names, all_device_ids, device_ids_m.size());
169     errcode = clGetDeviceInfo(all_device_ids[0],
170                                 CL_DEVICE_MAX_CLOCK_FREQUENCY,
171                                 sizeof(freq_in_mhz_m),
172                                 &freq_in_mhz_m,
173                                 nullptr);
174     errorCheck(errcode, __LINE__);
177 bool DspDevice::BuildBuiltInProgram(const std::string& kernel_names,
178                                     cl_device_id device_ids[],
179                                     int num_devices)
181     cl_int err;
182     program_m = clCreateProgramWithBuiltInKernels(context_m,
183                                           num_devices,
184                                           device_ids,  // device_list
185                                           kernel_names.c_str(),
186                                           &err);
187     errorCheck(err, __LINE__);
189     return true;
192 bool EveDevice::BuildBuiltInProgram(const std::string& kernel_names,
193                                     cl_device_id device_ids[],
194                                     int num_devices)
196     cl_int err;
197     cl_device_id executor_device_ids[MAX_DEVICES];
199     int i = 0;
200     for (auto id : device_ids_m)
201         executor_device_ids[i++] = device_ids[static_cast<int>(id)];
203     program_m = clCreateProgramWithBuiltInKernels(context_m,
204                                           num_devices,
205                                           executor_device_ids,  // device_list
206                                           kernel_names.c_str(),
207                                           &err);
208     errorCheck(err, __LINE__);
210     return true;
213 Kernel::Kernel(Device* device, const std::string& name,
214                const KernelArgs& args, uint8_t device_index):
215            name_m(name), device_m(device), device_index_m(device_index)
217     TRACE::print("Creating kernel %s\n", name.c_str());
218     cl_int err;
219     kernel_m = clCreateKernel(device_m->program_m, name_m.c_str(), &err);
220     errorCheck(err, __LINE__);
222     for (int i=0; i < tidl::internal::NUM_CONTEXTS; i++)
223         event_m[i] = nullptr;
225     int arg_index = 0;
226     for (const auto& arg : args)
227     {
228         if (!arg.isLocal())
229         {
230             if (arg.kind() == DeviceArgInfo::Kind::BUFFER)
231             {
232                 cl_mem buffer = device_m->CreateBuffer(arg);
234                 clSetKernelArg(kernel_m, arg_index, sizeof(cl_mem), &buffer);
235                 TRACE::print("  Arg[%d]: %p\n", arg_index, buffer);
237                 if (buffer)
238                     buffers_m.push_back(buffer);
239             }
240             else if (arg.kind() == DeviceArgInfo::Kind::SCALAR)
241             {
242                 clSetKernelArg(kernel_m, arg_index, arg.size(), arg.ptr());
243                 TRACE::print("  Arg[%d]: %p\n", arg_index, arg.ptr());
244             }
245             else
246             {
247                 assert ("DeviceArgInfo kind not supported");
248             }
249         }
250         else
251         {
252             clSetKernelArg(kernel_m, arg_index, arg.size(), NULL);
253             TRACE::print("  Arg[%d]: local, %d\n", arg_index, arg.size());
254         }
255         arg_index++;
257     }
260 bool Kernel::UpdateScalarArg(uint32_t index, size_t size, const void *value)
262     cl_int ret = clSetKernelArg(kernel_m, index, size, value);
263     return ret == CL_SUCCESS;
266 Kernel& Kernel::RunAsync(uint32_t context_idx)
268     // Execute kernel
269     TRACE::print("\tKernel: %s device %d executing %s, context %d\n",
270                  device_m->GetDeviceName().c_str(),
271                  device_index_m, name_m.c_str(), context_idx);
272     cl_int ret = clEnqueueTask(device_m->queue_m[device_index_m],
273                                kernel_m, 0, 0, &event_m[context_idx]);
274     errorCheck(ret, __LINE__);
276     return *this;
279 bool Kernel::Wait(uint32_t context_idx)
281     // Wait called without a corresponding RunAsync
282     if (event_m[context_idx] == nullptr)
283         return false;
285     TRACE::print("\tKernel: waiting context %d...\n", context_idx);
286     cl_int ret = clWaitForEvents(1, &event_m[context_idx]);
287     errorCheck(ret, __LINE__);
289     ret = clReleaseEvent(event_m[context_idx]);
290     errorCheck(ret, __LINE__);
291     event_m[context_idx] = nullptr;
293     TRACE::print("\tKernel: finished execution\n");
295     return true;
298 extern void CallbackWrapper(void *user_data) __attribute__((weak));
300 static
301 void EventCallback(cl_event event, cl_int exec_status, void *user_data)
303     if (exec_status != CL_SUCCESS || user_data == nullptr)  return;
304     if (CallbackWrapper)  CallbackWrapper(user_data);
307 bool Kernel::AddCallback(void *user_data, uint32_t context_idx)
309     if (event_m[context_idx] == nullptr)
310         return false;
312     return clSetEventCallback(event_m[context_idx], CL_COMPLETE, EventCallback,
313                               user_data) == CL_SUCCESS;
316 Kernel::~Kernel()
318     for (auto b : buffers_m)
319         device_m->ReleaseBuffer(b);
321     clReleaseKernel(kernel_m);
324 cl_mem Device::CreateBuffer(const DeviceArgInfo &Arg)
326     size_t  size     = Arg.size();
327     void   *host_ptr = Arg.ptr();
329     if (host_ptr == nullptr)
330     {
331         TRACE::print("\tOCL Create B:%p\n", nullptr);
332         return nullptr;
333     }
335     bool hostPtrInCMEM = __is_in_malloced_region(host_ptr);
337     // Conservative till we have sufficient information.
338     cl_mem_flags flag = CL_MEM_READ_WRITE;
340     if (hostPtrInCMEM) flag |= (cl_mem_flags)CL_MEM_USE_HOST_PTR;
341     else               flag |= (cl_mem_flags)CL_MEM_COPY_HOST_PTR;
343     cl_int       errcode;
344     cl_mem buffer = clCreateBuffer(context_m,
345                                    flag,
346                                    size,
347                                    host_ptr,
348                                    &errcode);
349     errorCheck(errcode, __LINE__);
351     TRACE::print("\tOCL Create B:%p\n", buffer);
353     return buffer;
356 void Device::ReleaseBuffer(cl_mem M)
358     TRACE::print("\tOCL Release B:%p\n", M);
359     clReleaseMemObject(M);
362 /// Release resources associated with an OpenCL device
363 Device::~Device()
365     TRACE::print("\tOCL Device: deleted\n");
366     for (unsigned int i = 0; i < device_ids_m.size(); i++)
367     {
368         clFinish(queue_m[i]);
369         clReleaseCommandQueue (queue_m[i]);
370     }
372     clReleaseProgram      (program_m);
373     clReleaseContext      (context_m);
376 void errorCheck(cl_int ret, int line)
378     if (ret != CL_SUCCESS)
379     {
380         std::cerr << "ERROR: [ Line: " << line << "] " << error2string(ret) << std::endl;
381         exit(ret);
382     }
385 /// Convert OpenCL error codes to a string
386 const char* error2string(cl_int err)
388     switch(err)
389     {
390          case   0: return "CL_SUCCESS";
391          case  -1: return "CL_DEVICE_NOT_FOUND";
392          case  -2: return "CL_DEVICE_NOT_AVAILABLE";
393          case  -3: return "CL_COMPILER_NOT_AVAILABLE";
394          case  -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
395          case  -5: return "CL_OUT_OF_RESOURCES";
396          case  -6: return "CL_OUT_OF_HOST_MEMORY";
397          case  -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
398          case  -8: return "CL_MEM_COPY_OVERLAP";
399          case  -9: return "CL_IMAGE_FORMAT_MISMATCH";
400          case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
401          case -11: return "CL_BUILD_PROGRAM_FAILURE";
402          case -12: return "CL_MAP_FAILURE";
404          case -30: return "CL_INVALID_VALUE";
405          case -31: return "CL_INVALID_DEVICE_TYPE";
406          case -32: return "CL_INVALID_PLATFORM";
407          case -33: return "CL_INVALID_DEVICE";
408          case -34: return "CL_INVALID_CONTEXT";
409          case -35: return "CL_INVALID_QUEUE_PROPERTIES";
410          case -36: return "CL_INVALID_COMMAND_QUEUE";
411          case -37: return "CL_INVALID_HOST_PTR";
412          case -38: return "CL_INVALID_MEM_OBJECT";
413          case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
414          case -40: return "CL_INVALID_IMAGE_SIZE";
415          case -41: return "CL_INVALID_SAMPLER";
416          case -42: return "CL_INVALID_BINARY";
417          case -43: return "CL_INVALID_BUILD_OPTIONS";
418          case -44: return "CL_INVALID_PROGRAM";
419          case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
420          case -46: return "CL_INVALID_KERNEL_NAME";
421          case -47: return "CL_INVALID_KERNEL_DEFINITION";
422          case -48: return "CL_INVALID_KERNEL";
423          case -49: return "CL_INVALID_ARG_INDEX";
424          case -50: return "CL_INVALID_ARG_VALUE";
425          case -51: return "CL_INVALID_ARG_SIZE";
426          case -52: return "CL_INVALID_KERNEL_ARGS";
427          case -53: return "CL_INVALID_WORK_DIMENSION";
428          case -54: return "CL_INVALID_WORK_GROUP_SIZE";
429          case -55: return "CL_INVALID_WORK_ITEM_SIZE";
430          case -56: return "CL_INVALID_GLOBAL_OFFSET";
431          case -57: return "CL_INVALID_EVENT_WAIT_LIST";
432          case -58: return "CL_INVALID_EVENT";
433          case -59: return "CL_INVALID_OPERATION";
434          case -60: return "CL_INVALID_GL_OBJECT";
435          case -61: return "CL_INVALID_BUFFER_SIZE";
436          case -62: return "CL_INVALID_MIP_LEVEL";
437          case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
438          default: return "Unknown OpenCL error";
439     }
442 Device::Ptr Device::Create(DeviceType core_type, const DeviceIds& ids,
443                            const std::string& name)
445     Device::Ptr p(nullptr);
446     if (core_type == DeviceType::DSP)
447         p.reset(new DspDevice(ids, name));
448     else if (core_type == DeviceType::EVE)
449         p.reset(new EveDevice(ids, name));
451     return p;
454 // Minimum version of OpenCL required for this version of TIDL API
455 #define MIN_OCL_VERSION "01.01.18.00"
456 static bool CheckOpenCLVersion(cl_platform_id id)
458     cl_int err;
459     size_t length;
460     err = clGetPlatformInfo(id, CL_PLATFORM_VERSION, 0, nullptr, &length);
461     if (err != CL_SUCCESS) return false;
463     std::unique_ptr<char[]> version(new char[length]);
464     err = clGetPlatformInfo(id, CL_PLATFORM_VERSION, length, version.get(),
465                             nullptr);
466     if (err != CL_SUCCESS) return false;
468     std::string v(version.get());
470     if (v.substr(v.find("01."), sizeof(MIN_OCL_VERSION)) >= MIN_OCL_VERSION)
471         return true;
473     std::cerr << "TIDL API Error: OpenCL " << MIN_OCL_VERSION
474               << " or higher required." << std::endl;
476     return false;
479 static bool PlatformIsAM57()
481     cl_platform_id id;
482     cl_int err;
484     err = clGetPlatformIDs(1, &id, nullptr);
485     if (err != CL_SUCCESS) return false;
487     if (!CheckOpenCLVersion(id))
488        return false;
490     // Check if the device name is AM57
491     size_t length;
492     err = clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, nullptr, &length);
493     if (err != CL_SUCCESS) return false;
495     std::unique_ptr<char[]> name(new char[length]);
497     err = clGetPlatformInfo(id, CL_PLATFORM_NAME, length, name.get(), nullptr);
498     if (err != CL_SUCCESS) return false;
500     std::string platform_name(name.get());
502     if (platform_name.find("AM57") == std::string::npos)
503         return false;
505     return true;
508 // TI DL is supported on AM57x - EVE or C66x devices
509 bool Device::GetDevices(DeviceType device_type,
510                         cl_device_id cl_d_ids[],
511                         cl_uint *p_num_devices,
512                         cl_uint *p_num_compute_units)
514     if (!PlatformIsAM57()) return false;
516     // Convert DeviceType to OpenCL device type
517     cl_device_type t = CL_DEVICE_TYPE_CUSTOM;
519     // Find all the OpenCL custom devices available
520     cl_uint num_devices_found;
521     cl_device_id all_device_ids[MAX_DEVICES];
523     cl_int errcode = clGetDeviceIDs(0,                   // platform
524                                     t,                   // device_type
525                                     MAX_DEVICES,         // num_entries
526                                     all_device_ids,      // devices
527                                     &num_devices_found); // num_devices
530     if (errcode != CL_SUCCESS)            return false;
531     if (num_devices_found == 0)           return false;
533     // Find devices according to device_type
534     // DSP: ACCELERATOR | CUSTOM
535     // EVE: CUSTOM
536     cl_uint num_devices = 0;
537     for (cl_uint i = 0; i < num_devices_found; i++)
538     {
539         cl_device_type cl_d_type;
540         errcode = clGetDeviceInfo(all_device_ids[i], CL_DEVICE_TYPE,
541                                   sizeof(cl_device_type), &cl_d_type, nullptr);
542         if (errcode != CL_SUCCESS) return false;
544         if ((device_type == DeviceType::DSP &&
545                ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) != 0)) ||
546             (device_type == DeviceType::EVE &&
547                ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) == 0)))
548             cl_d_ids[num_devices++] = all_device_ids[i];
549     }
550     if (p_num_devices != nullptr)  *p_num_devices = num_devices;
552     // DSP, return the number of compute units
553     if (device_type == DeviceType::DSP &&
554         num_devices > 0 && p_num_compute_units != nullptr)
555     {
556         errcode = clGetDeviceInfo(cl_d_ids[0],
557                                 CL_DEVICE_MAX_COMPUTE_UNITS,
558                                 sizeof(cl_int),
559                                 p_num_compute_units,
560                                 nullptr);
561         if (errcode != CL_SUCCESS)  return false;
562     }
564     return true;
567 uint32_t Device::GetNumDevices(DeviceType device_type)
569     cl_device_id cl_d_ids[MAX_DEVICES];
570     cl_uint num_devices = 0;
571     cl_uint num_cus     = 0;
573     if (! GetDevices(device_type, cl_d_ids, &num_devices, &num_cus))  return 0;
575     // EVE, return the number of devices since each EVE is a device
576     // DSP, return the number of compute units since we maintain a
577     //      queue to each compute unit (i.e. C66x DSP)
578     return device_type == DeviceType::EVE ? num_devices : num_cus;