Use DSP Built-in Kernels in TIDL-API
authorYuan Zhao <yuanzhao@ti.com>
Tue, 12 Mar 2019 17:34:56 +0000 (12:34 -0500)
committerYuan Zhao <yuanzhao@ti.com>
Wed, 13 Mar 2019 16:42:23 +0000 (11:42 -0500)
- Replace previously used kernel wrappers
- MCT-1143, MCT-1154

tidl_api/Makefile
tidl_api/dsp/Makefile [deleted file]
tidl_api/dsp/ocl_wrapper.cl [deleted file]
tidl_api/src/execution_object.cpp
tidl_api/src/executor.cpp
tidl_api/src/ocl_device.cpp
tidl_api/src/ocl_device.h

index abae078564733d1f36d547f274741a3a8566719c..8da13e482cc408f2f852506a2c98bd8c5131cd27 100644 (file)
@@ -26,8 +26,6 @@
 PYTHON_INCLUDE_DIR ?= $(wildcard $(TARGET_ROOTDIR)/usr/include/python3*)
 
 LIB_NAME = tidl_api.a
-DSP_SRCFILE = dsp/ocl_wrapper.cl
-DSP_OUTFILE = dsp/ocl_wrapper.dsp_h
 LIB_IMGUTIL_NAME = tidl_imgutil.a
 PY_LIB_NAME = tidl.so
 
@@ -78,12 +76,6 @@ PY_INCLUDE = -I$(PYTHON_INCLUDE_DIR) -I$(PYBIND11_INC_DIR)
 # prevent name clashed when multiple shared libraries use pybind11
 $(HOST_OBJ_PYBIND_FILES): CXXFLAGS += -fvisibility=hidden
 
-$(DSP_OUTFILE): $(DSP_SRCFILE)
-       $(MAKE) -C dsp
-
-src/ocl_device.cpp: $(DSP_OUTFILE)
-       touch $@
-
 $(HOST_OBJ_PYBIND_FILES): obj/%.o: src/%.cpp $(HEADERS) src/pybind_common.h
        @mkdir -p obj
        @echo Compiling pybind $< ...
@@ -105,8 +97,7 @@ $(LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
        $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES)
 
 clean::
-       $(MAKE) -C dsp clean
-       $(RM) -f $(LIB_NAME) $(PY_LIB_NAME) $(HOST_OBJ_FILES)
-       $(RM) -f $(LIB_IMGUTIL_NAME) $(HOST_OBJ_IMGUTIL_FILES)
+       $(RM) -f $(LIB_NAME) $(PY_LIB_NAME)
+       $(RM) -f $(LIB_IMGUTIL_NAME)
        $(RM) -rf obj
 
diff --git a/tidl_api/dsp/Makefile b/tidl_api/dsp/Makefile
deleted file mode 100644 (file)
index 69a8a8e..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of Texas Instruments Incorporated nor the
-# names of its contributors may be used to endorse or promote products
-# derived from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-# THE POSSIBILITY OF SUCH DAMAGE.
-
-
-all: ocl_wrapper.dsp_h
-
-include ../make.inc
-
-CLOCL_FLAGS = -I../ -t
-
-
-ocl_wrapper.dsp_h: ocl_wrapper.cl
-       @echo Generating $@
-       @$(CLOCL) $(CLOCL_FLAGS) $^
-
-clean::
-       @$(RM) *.obj *.out *.dsp_h
diff --git a/tidl_api/dsp/ocl_wrapper.cl b/tidl_api/dsp/ocl_wrapper.cl
deleted file mode 100644 (file)
index e75ed1d..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-/******************************************************************************
- * Copyright (c) 2017-2018  Texas Instruments Incorporated - http://www.ti.com/
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions are met:
- *       * Redistributions of source code must retain the above copyright
- *         notice, this list of conditions and the following disclaimer.
- *       * Redistributions in binary form must reproduce the above copyright
- *         notice, this list of conditions and the following disclaimer in the
- *         documentation and/or other materials provided with the distribution.
- *       * Neither the name of Texas Instruments Incorporated nor the
- *         names of its contributors may be used to endorse or promote products
- *         derived from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- *   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- *   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- *   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- *   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- *   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- *   THE POSSIBILITY OF SUCH DAMAGE.
- *****************************************************************************/
-#include "custom.h"
-#include "dsp_c.h"
-
-kernel 
-void ocl_tidl_setup(global unsigned char*        createParams,
-                    global unsigned char*        netParamsBuffer,
-                    global unsigned char*        netParamsHeap,
-                    global OCL_TIDL_SetupParams* setupParams)
-{
-    ocl_dsp_tidl_setup(createParams, netParamsBuffer, netParamsHeap, setupParams);
-}
-
-kernel 
-void ocl_tidl_initialize(global unsigned char*            createParams,
-                         global unsigned char*            netParamsBuffer,
-                         global unsigned char*            externalMemoryHeapBase,
-                         global OCL_TIDL_InitializeParams* initializeParams,
-                         local  unsigned char*            l2HeapBase)
-{
-    // Set L1 cache to 16KB. TIDL requires 16KB of L1 scratch
-    __cache_l1d_16k();
-
-    ocl_dsp_tidl_initialize(createParams, netParamsBuffer, 
-                            externalMemoryHeapBase, initializeParams, 
-                            l2HeapBase);
-}
-
-kernel
-void ocl_tidl_process(global OCL_TIDL_ProcessParams* processParams,
-                      global unsigned char*          externalMemoryHeapBase,
-                      global unsigned char*          traceBufferParams,
-                      uint32_t                       contextIndex)
-{
-    ocl_dsp_tidl_process(processParams, externalMemoryHeapBase,
-                         traceBufferParams, contextIndex);
-}
-
-
-kernel void ocl_tidl_cleanup()
-{
-    ocl_dsp_tidl_cleanup();
-    __cache_l1d_all();
-}
index 285dfde60e7efbac6127fa458cb685c3a6f5f1eb..009ef93530c8030019701720bc7ccdf457cca5a4 100644 (file)
@@ -683,7 +683,7 @@ uint64_t ExecutionObject::Impl::GetProcessCycles(uint32_t context_idx) const
     uint8_t factor = 1;
 
     // ARP32 running at half frequency of VCOP, multiply by 2 for VCOP cycles
-    if (device_m->type() == CL_DEVICE_TYPE_CUSTOM)
+    if (device_type_m == DeviceType::EVE)
         factor = 2;
 
     OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get() +
index d83efe864f11b95bb5eb9b9222dddb031aa6bb43..8020d4e104f4fb1444582e635b718d1c40cab1e3 100644 (file)
@@ -87,12 +87,8 @@ ExecutorImpl::ExecutorImpl(DeviceType core_type, const DeviceIds& ids,
     core_type_m(core_type),
     layers_group_id_m(layers_group_id)
 {
-    std::string name;
-    if (core_type_m == DeviceType::DSP)
-        name  = "";
-    else if (core_type_m == DeviceType::EVE)
-        name = STRING(SETUP_KERNEL) ";" STRING(INIT_KERNEL) ";" STRING(PROCESS_KERNEL) ";" STRING(CLEANUP_KERNEL);
-
+    std::string name = STRING(SETUP_KERNEL) ";" STRING(INIT_KERNEL) ";"
+                       STRING(PROCESS_KERNEL) ";" STRING(CLEANUP_KERNEL);
     device_m = Device::Create(core_type_m, ids, name);
 }
 
index ab0bf26034ed20027296344542d463ecfc973cc5..864551d3dd7d230e70473a0e1eafb3f0331a09f3 100644 (file)
@@ -36,143 +36,113 @@ using std::size_t;
 #include "ocl_device.h"
 #include "ocl_util.h"
 #include "trace.h"
-#include "../dsp/ocl_wrapper.dsp_h"
 
 using namespace tidl;
 
 static const char* error2string(cl_int err);
 static void        errorCheck(cl_int ret, int line);
 
-Device::Device(cl_device_type t, const DeviceIds& ids):
+Device::Device(cl_device_type t, const DeviceIds& ids, const char* name):
                 device_type_m(t), device_ids_m(ids)
 {
     TRACE::print("\tOCL Device: %s created\n",
-              device_type_m == CL_DEVICE_TYPE_ACCELERATOR ? "DSP" :
-              device_type_m == CL_DEVICE_TYPE_CUSTOM ? "EVE" : "Unknown");
+                 device_type_m == CL_DEVICE_TYPE_CUSTOM ? name : "Unknown");
 
     for (int i = 0; i < MAX_DEVICES; i++)
         queue_m[i] = nullptr;
 
 }
 
-DspDevice::DspDevice(const DeviceIds& ids, const std::string &binary_filename):
-              Device(CL_DEVICE_TYPE_ACCELERATOR, ids)
+DspDevice::DspDevice(const DeviceIds& ids, const std::string &kernel_names):
+              Device(CL_DEVICE_TYPE_CUSTOM, ids, "DSP")
 {
-    cl_uint num_devices_found;
+    cl_int       errcode;
     cl_device_id device_ids[MAX_DEVICES];
+    cl_device_id out_device_ids[MAX_DEVICES];
+    cl_uint      num_compute_units;
+    cl_uint      num_out_devices;
 
-    cl_int errcode = clGetDeviceIDs(0,               // platform
-                             device_type_m,          // device_type
-                             MAX_DEVICES,            // num_entries
-                             device_ids,             // devices
-                             &num_devices_found);    // num_devices
-    errorCheck(errcode, __LINE__);
-
-    if (num_devices_found != 1)
+    if (! GetDevices(DeviceType::DSP, device_ids, nullptr, &num_compute_units))
         throw Exception("OpenCL DSP device not found",
                         __FILE__, __FUNCTION__, __LINE__);
 
-    cl_int num_compute_units;
-    errcode = clGetDeviceInfo(device_ids[0],
-                              CL_DEVICE_MAX_COMPUTE_UNITS,
-                              sizeof(num_compute_units),
-                              &num_compute_units,
-                              nullptr);
-
     if (num_compute_units == 1)
     {
-        context_m = clCreateContextFromType(0,              // properties
-                                            device_type_m,  // device_type
-                                            0,              // pfn_notify
-                                            0,              // user_data
-                                            &errcode);
-        errorCheck(errcode, __LINE__);
-
-        // Queue 0 on device 0
-        queue_m[0] = clCreateCommandQueue(context_m,
-                                          device_ids[0],
-                                          CL_QUEUE_PROFILING_ENABLE|
-                                         CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-                                          &errcode);
-        errorCheck(errcode, __LINE__);
-        BuildProgramFromBinary(binary_filename, device_ids, 1);
+        num_out_devices   = 1;
+        out_device_ids[0] = device_ids[0];
     }
     else
     {
-        const cl_uint NUM_SUB_DEVICES = 2;
-
         // Create 2 sub-device's, each consisting of a C66x DSP
         cl_device_partition_property properties[3] =
                                         { CL_DEVICE_PARTITION_EQUALLY, 1, 0 };
 
         // Query the number of sub-devices that can be created
-        cl_uint n_sub_devices = 0;
+        const cl_uint NUM_SUB_DEVICES = 2;
         errcode = clCreateSubDevices(device_ids[0],      // in_device
                                      properties,         // properties
                                      0,                  // num_devices
                                      NULL,               // out_devices
-                                     &n_sub_devices);    // num_devices_ret
+                                     &num_out_devices);  // num_devices_ret
         errorCheck(errcode, __LINE__);
 
-        assert(n_sub_devices == NUM_SUB_DEVICES);
+        assert(num_out_devices == NUM_SUB_DEVICES);
 
         // Create the sub-devices
-        cl_device_id sub_devices[NUM_SUB_DEVICES] = {0, 0};
         errcode = clCreateSubDevices(device_ids[0],        // in_device
                                      properties,           // properties
-                                     n_sub_devices,        // num_devices
-                                     sub_devices,          // out_devices
+                                     num_out_devices,      // num_devices
+                                     out_device_ids,       // out_devices
                                      nullptr);             // num_devices_ret
         errorCheck(errcode, __LINE__);
+    }
 
-        // Create a context containing the sub-devices
-        context_m = clCreateContext(NULL,               // properties
-                                    NUM_SUB_DEVICES,    // num_devices
-                                    sub_devices,        // devices
-                                    NULL,               // pfn_notify
-                                    NULL,               // user_data
-                                    &errcode);          // errcode_ret
-        errorCheck(errcode, __LINE__);
-
-        // Create queues to each sub-device
-        for (auto id : device_ids_m)
-        {
-            int index = static_cast<int>(id);
-            queue_m[index] = clCreateCommandQueue(context_m,
-                                          sub_devices[index],
-                                          CL_QUEUE_PROFILING_ENABLE|
-                                         CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-                                          &errcode);
-            errorCheck(errcode, __LINE__);
-        }
+    // Create a context containing the out-devices
+    context_m = clCreateContext(NULL,               // properties
+                                num_out_devices,    // num_devices
+                                out_device_ids,     // devices
+                                NULL,               // pfn_notify
+                                NULL,               // user_data
+                                &errcode);          // errcode_ret
+    errorCheck(errcode, __LINE__);
 
-        BuildProgramFromBinary(binary_filename, sub_devices, NUM_SUB_DEVICES);
+    // Create queues to each out device
+    for (auto id : device_ids_m)
+    {
+        cl_uint index = static_cast<cl_uint>(id);
+        assert(index < num_out_devices);
+        queue_m[index] = clCreateCommandQueue(context_m,
+                                        out_device_ids[index],
+                                        CL_QUEUE_PROFILING_ENABLE|
+                                        CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+                                        &errcode);
+        errorCheck(errcode, __LINE__);
     }
 
+    // Build kernel program
+    BuildBuiltInProgram(kernel_names, out_device_ids, num_out_devices);
+
+    // Query device frequency
     errcode = clGetDeviceInfo(device_ids[0],
-                                CL_DEVICE_MAX_CLOCK_FREQUENCY,
-                                sizeof(freq_in_mhz_m),
-                                &freq_in_mhz_m,
-                                nullptr);
+                              CL_DEVICE_MAX_CLOCK_FREQUENCY,
+                              sizeof(freq_in_mhz_m),
+                              &freq_in_mhz_m,
+                              nullptr);
     errorCheck(errcode, __LINE__);
 }
 
 
 EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names):
-            Device(CL_DEVICE_TYPE_CUSTOM, ids)
+            Device(CL_DEVICE_TYPE_CUSTOM, ids, "EVE")
 {
-    cl_uint num_devices_found;
+    cl_int       errcode;
     cl_device_id all_device_ids[MAX_DEVICES];
+    cl_uint      num_devices;
+    if (! GetDevices(DeviceType::EVE, all_device_ids, &num_devices, nullptr))
+        throw Exception("OpenCL EVE device not found",
+                        __FILE__, __FUNCTION__, __LINE__);
 
-    // Find all the OpenCL devices available of the given type
-    cl_int errcode = clGetDeviceIDs(0,              // platform
-                             device_type_m,         // device_type
-                             MAX_DEVICES,           // num_entries
-                             all_device_ids,        // devices
-                             &num_devices_found);   // num_devices
-    errorCheck(errcode, __LINE__);
-
-    assert (num_devices_found >= device_ids_m.size());
+    assert (num_devices >= device_ids_m.size());
 
     context_m = clCreateContextFromType(0,              // properties
                                         device_type_m,  // device_type
@@ -181,7 +151,6 @@ EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names):
                                         &errcode);
     errorCheck(errcode, __LINE__);
 
-
     // Create command queues to OpenCL devices specified by the
     // device_ids_m set.
     for (auto id : device_ids_m)
@@ -195,7 +164,7 @@ EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names):
         errorCheck(errcode, __LINE__);
     }
 
-    BuildProgramFromBinary(kernel_names, all_device_ids, device_ids_m.size());
+    BuildBuiltInProgram(kernel_names, all_device_ids, device_ids_m.size());
 
     errcode = clGetDeviceInfo(all_device_ids[0],
                                 CL_DEVICE_MAX_CLOCK_FREQUENCY,
@@ -205,45 +174,24 @@ EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names):
     errorCheck(errcode, __LINE__);
 }
 
-
-bool DspDevice::BuildProgramFromBinary(const std::string &BFN,
-                                       cl_device_id device_ids[],
-                                       int num_devices)
+bool DspDevice::BuildBuiltInProgram(const std::string& kernel_names,
+                                    cl_device_id device_ids[],
+                                    int num_devices)
 {
-    size_t bin_len = ocl_wrapper_dsp_bin_len;
-
-    assert (bin_len != 0);
-
-    // Casting to make ocl_read_binary work with clCreateProgramWithBinary
-    const unsigned char *bin_arrc = reinterpret_cast <const unsigned char *>
-                                    (ocl_wrapper_dsp_bin);
-
-    size_t lengths[num_devices];
-    for (int i=0; i < num_devices; i++) lengths[i] = bin_len;
-
-    const unsigned char* binaries[num_devices];
-    for (int i=0; i < num_devices; i++) binaries[i] = bin_arrc;
-
     cl_int err;
-    program_m = clCreateProgramWithBinary(context_m,
+    program_m = clCreateProgramWithBuiltInKernels(context_m,
                                           num_devices,
-                                          device_ids,          // device_list
-                                          lengths,
-                                          binaries,
-                                          0,                   // binary_status
+                                          device_ids,  // device_list
+                                          kernel_names.c_str(),
                                           &err);
     errorCheck(err, __LINE__);
 
-    const char *options = "";
-    err = clBuildProgram(program_m, num_devices, device_ids, options, 0, 0);
-    errorCheck(err, __LINE__);
-
     return true;
 }
 
-bool EveDevice::BuildProgramFromBinary(const std::string& kernel_names,
-                                       cl_device_id device_ids[],
-                                       int num_devices)
+bool EveDevice::BuildBuiltInProgram(const std::string& kernel_names,
+                                    cl_device_id device_ids[],
+                                    int num_devices)
 {
     cl_int err;
     cl_device_id executor_device_ids[MAX_DEVICES];
@@ -557,16 +505,17 @@ static bool PlatformIsAM57()
 }
 
 // TI DL is supported on AM57x - EVE or C66x devices
-uint32_t Device::GetNumDevices(DeviceType device_type)
+bool Device::GetDevices(DeviceType device_type,
+                        cl_device_id cl_d_ids[],
+                        cl_uint *p_num_devices,
+                        cl_uint *p_num_compute_units)
 {
-    if (!PlatformIsAM57()) return 0;
+    if (!PlatformIsAM57()) return false;
 
     // Convert DeviceType to OpenCL device type
-    cl_device_type t = (device_type == DeviceType::EVE) ?
-                                    CL_DEVICE_TYPE_CUSTOM :
-                                    CL_DEVICE_TYPE_ACCELERATOR;
+    cl_device_type t = CL_DEVICE_TYPE_CUSTOM;
 
-    // Find all the OpenCL devices available
+    // Find all the OpenCL custom devices available
     cl_uint num_devices_found;
     cl_device_id all_device_ids[MAX_DEVICES];
 
@@ -577,25 +526,53 @@ uint32_t Device::GetNumDevices(DeviceType device_type)
                                     &num_devices_found); // num_devices
 
 
-    if (errcode != CL_SUCCESS)            return 0;
-    if (num_devices_found == 0)           return 0;
+    if (errcode != CL_SUCCESS)            return false;
+    if (num_devices_found == 0)           return false;
 
-    // DSP, return the number of compute units since we maintain a
-    // queue to each compute unit (i.e. C66x DSP)
-    if (t == CL_DEVICE_TYPE_ACCELERATOR)
+    // Find devices according to device_type
+    // DSP: ACCELERATOR | CUSTOM
+    // EVE: CUSTOM
+    cl_uint num_devices = 0;
+    for (cl_uint i = 0; i < num_devices_found; i++)
+    {
+        cl_device_type cl_d_type;
+        errcode = clGetDeviceInfo(all_device_ids[i], CL_DEVICE_TYPE,
+                                  sizeof(cl_device_type), &cl_d_type, nullptr);
+        if (errcode != CL_SUCCESS) return false;
+
+        if ((device_type == DeviceType::DSP &&
+               ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) != 0)) ||
+            (device_type == DeviceType::EVE &&
+               ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) == 0)))
+            cl_d_ids[num_devices++] = all_device_ids[i];
+    }
+    if (p_num_devices != nullptr)  *p_num_devices = num_devices;
+
+    // DSP, return the number of compute units
+    if (device_type == DeviceType::DSP &&
+        num_devices > 0 && p_num_compute_units != nullptr)
     {
-        cl_int num_compute_units;
-        errcode = clGetDeviceInfo(all_device_ids[0],
+        errcode = clGetDeviceInfo(cl_d_ids[0],
                                 CL_DEVICE_MAX_COMPUTE_UNITS,
-                                sizeof(num_compute_units),
-                                &num_compute_units,
+                                sizeof(cl_int),
+                                p_num_compute_units,
                                 nullptr);
-        if (errcode != CL_SUCCESS)
-            return 0;
-
-        return num_compute_units;
+        if (errcode != CL_SUCCESS)  return false;
     }
 
+    return true;
+}
+
+uint32_t Device::GetNumDevices(DeviceType device_type)
+{
+    cl_device_id cl_d_ids[MAX_DEVICES];
+    cl_uint num_devices = 0;
+    cl_uint num_cus     = 0;
+
+    if (! GetDevices(device_type, cl_d_ids, &num_devices, &num_cus))  return 0;
+
     // EVE, return the number of devices since each EVE is a device
-    return num_devices_found;
+    // DSP, return the number of compute units since we maintain a
+    //      queue to each compute unit (i.e. C66x DSP)
+    return device_type == DeviceType::EVE ? num_devices : num_cus;
 }
index 773a27e3900fce547e9ebd42c17cd5d9987effeb..5c8f5341aa198426f700a0a884cf2effcdf7e7b1 100644 (file)
@@ -59,7 +59,7 @@ class Device
     public:
         typedef std::unique_ptr<Device> Ptr;
 
-        Device(cl_device_type t, const DeviceIds& ids);
+        Device(cl_device_type t, const DeviceIds& ids, const char *name);
         virtual ~Device();
 
 
@@ -79,10 +79,13 @@ class Device
 
     protected:
 
-        static const int MAX_DEVICES = 4;
+        static const int MAX_DEVICES = 5;  // max: 1 DSP device + 4 EVE devices
         cl_mem CreateBuffer(const DeviceArgInfo &Arg);
         void   ReleaseBuffer(cl_mem M);
-
+        static bool GetDevices(DeviceType device_type,
+                               cl_device_id cl_d_ids[],
+                               cl_uint *p_num_devices,
+                               cl_uint *p_num_compute_units);
 
               cl_context        context_m;
               cl_program        program_m;
@@ -97,7 +100,7 @@ class Device
 class DspDevice: public Device
 {
     public:
-        DspDevice(const DeviceIds& ids, const std::string &binary_filename);
+        DspDevice(const DeviceIds& ids, const std::string &kernel_names);
         virtual ~DspDevice() {}
 
         DspDevice()                            = delete;
@@ -107,9 +110,9 @@ class DspDevice: public Device
         virtual std::string GetDeviceName() { return "DSP"; }
 
     protected:
-        bool BuildProgramFromBinary(const std::string &binary_filename,
-                                    cl_device_id device_ids[],
-                                    int num_devices);
+        bool BuildBuiltInProgram(const std::string &kernel_names,
+                                 cl_device_id device_ids[],
+                                 int num_devices);
 };
 
 class EveDevice : public Device
@@ -125,10 +128,9 @@ class EveDevice : public Device
         virtual std::string GetDeviceName() { return "EVE"; }
 
     protected:
-        bool BuildProgramFromBinary(const std::string &kernel_names,
-                                    cl_device_id device_ids[],
-                                    int num_devices);
-
+        bool BuildBuiltInProgram(const std::string &kernel_names,
+                                 cl_device_id device_ids[],
+                                 int num_devices);
 };