diff options
author | Yuan Zhao | 2019-03-12 12:34:56 -0500 |
---|---|---|
committer | Yuan Zhao | 2019-03-13 11:42:23 -0500 |
commit | 923d5b51a031a723c6db60c436cd1f430333e78d (patch) | |
tree | 294c1a7e05d5275a2b9c8c3f88f15e5fab1e7a50 | |
parent | 353f8b1792b8d604d0bfc9f4897873b71af07569 (diff) | |
download | tidl-api-923d5b51a031a723c6db60c436cd1f430333e78d.tar.gz tidl-api-923d5b51a031a723c6db60c436cd1f430333e78d.tar.xz tidl-api-923d5b51a031a723c6db60c436cd1f430333e78d.zip |
Use DSP Built-in Kernels in TIDL-API
- Replace previously used kernel wrappers
- MCT-1143, MCT-1154
-rw-r--r-- | tidl_api/Makefile | 13 | ||||
-rw-r--r-- | tidl_api/dsp/Makefile | 40 | ||||
-rw-r--r-- | tidl_api/dsp/ocl_wrapper.cl | 70 | ||||
-rw-r--r-- | tidl_api/src/execution_object.cpp | 2 | ||||
-rw-r--r-- | tidl_api/src/executor.cpp | 8 | ||||
-rw-r--r-- | tidl_api/src/ocl_device.cpp | 243 | ||||
-rw-r--r-- | tidl_api/src/ocl_device.h | 24 |
7 files changed, 128 insertions, 272 deletions
diff --git a/tidl_api/Makefile b/tidl_api/Makefile index abae078..8da13e4 100644 --- a/tidl_api/Makefile +++ b/tidl_api/Makefile | |||
@@ -26,8 +26,6 @@ | |||
26 | PYTHON_INCLUDE_DIR ?= $(wildcard $(TARGET_ROOTDIR)/usr/include/python3*) | 26 | PYTHON_INCLUDE_DIR ?= $(wildcard $(TARGET_ROOTDIR)/usr/include/python3*) |
27 | 27 | ||
28 | LIB_NAME = tidl_api.a | 28 | LIB_NAME = tidl_api.a |
29 | DSP_SRCFILE = dsp/ocl_wrapper.cl | ||
30 | DSP_OUTFILE = dsp/ocl_wrapper.dsp_h | ||
31 | LIB_IMGUTIL_NAME = tidl_imgutil.a | 29 | LIB_IMGUTIL_NAME = tidl_imgutil.a |
32 | PY_LIB_NAME = tidl.so | 30 | PY_LIB_NAME = tidl.so |
33 | 31 | ||
@@ -78,12 +76,6 @@ PY_INCLUDE = -I$(PYTHON_INCLUDE_DIR) -I$(PYBIND11_INC_DIR) | |||
78 | # prevent name clashed when multiple shared libraries use pybind11 | 76 | # prevent name clashed when multiple shared libraries use pybind11 |
79 | $(HOST_OBJ_PYBIND_FILES): CXXFLAGS += -fvisibility=hidden | 77 | $(HOST_OBJ_PYBIND_FILES): CXXFLAGS += -fvisibility=hidden |
80 | 78 | ||
81 | $(DSP_OUTFILE): $(DSP_SRCFILE) | ||
82 | $(MAKE) -C dsp | ||
83 | |||
84 | src/ocl_device.cpp: $(DSP_OUTFILE) | ||
85 | touch $@ | ||
86 | |||
87 | $(HOST_OBJ_PYBIND_FILES): obj/%.o: src/%.cpp $(HEADERS) src/pybind_common.h | 79 | $(HOST_OBJ_PYBIND_FILES): obj/%.o: src/%.cpp $(HEADERS) src/pybind_common.h |
88 | @mkdir -p obj | 80 | @mkdir -p obj |
89 | @echo Compiling pybind $< ... | 81 | @echo Compiling pybind $< ... |
@@ -105,8 +97,7 @@ $(LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES) | |||
105 | $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES) | 97 | $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES) |
106 | 98 | ||
107 | clean:: | 99 | clean:: |
108 | $(MAKE) -C dsp clean | 100 | $(RM) -f $(LIB_NAME) $(PY_LIB_NAME) |
109 | $(RM) -f $(LIB_NAME) $(PY_LIB_NAME) $(HOST_OBJ_FILES) | 101 | $(RM) -f $(LIB_IMGUTIL_NAME) |
110 | $(RM) -f $(LIB_IMGUTIL_NAME) $(HOST_OBJ_IMGUTIL_FILES) | ||
111 | $(RM) -rf obj | 102 | $(RM) -rf obj |
112 | 103 | ||
diff --git a/tidl_api/dsp/Makefile b/tidl_api/dsp/Makefile deleted file mode 100644 index 69a8a8e..0000000 --- a/tidl_api/dsp/Makefile +++ /dev/null | |||
@@ -1,40 +0,0 @@ | |||
1 | # Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/ | ||
2 | # All rights reserved. | ||
3 | # | ||
4 | # Redistribution and use in source and binary forms, with or without | ||
5 | # modification, are permitted provided that the following conditions are met: | ||
6 | # * Redistributions of source code must retain the above copyright | ||
7 | # notice, this list of conditions and the following disclaimer. | ||
8 | # * Redistributions in binary form must reproduce the above copyright | ||
9 | # notice, this list of conditions and the following disclaimer in the | ||
10 | # documentation and/or other materials provided with the distribution. | ||
11 | # * Neither the name of Texas Instruments Incorporated nor the | ||
12 | # names of its contributors may be used to endorse or promote products | ||
13 | # derived from this software without specific prior written permission. | ||
14 | # | ||
15 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
16 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
17 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
18 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
19 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
20 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
21 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
22 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
23 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
24 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | ||
25 | # THE POSSIBILITY OF SUCH DAMAGE. | ||
26 | |||
27 | |||
28 | all: ocl_wrapper.dsp_h | ||
29 | |||
30 | include ../make.inc | ||
31 | |||
32 | CLOCL_FLAGS = -I../ -t | ||
33 | |||
34 | |||
35 | ocl_wrapper.dsp_h: ocl_wrapper.cl | ||
36 | @echo Generating $@ | ||
37 | @$(CLOCL) $(CLOCL_FLAGS) $^ | ||
38 | |||
39 | clean:: | ||
40 | @$(RM) *.obj *.out *.dsp_h | ||
diff --git a/tidl_api/dsp/ocl_wrapper.cl b/tidl_api/dsp/ocl_wrapper.cl deleted file mode 100644 index e75ed1d..0000000 --- a/tidl_api/dsp/ocl_wrapper.cl +++ /dev/null | |||
@@ -1,70 +0,0 @@ | |||
1 | /****************************************************************************** | ||
2 | * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/ | ||
3 | * All rights reserved. | ||
4 | * | ||
5 | * Redistribution and use in source and binary forms, with or without | ||
6 | * modification, are permitted provided that the following conditions are met: | ||
7 | * * Redistributions of source code must retain the above copyright | ||
8 | * notice, this list of conditions and the following disclaimer. | ||
9 | * * Redistributions in binary form must reproduce the above copyright | ||
10 | * notice, this list of conditions and the following disclaimer in the | ||
11 | * documentation and/or other materials provided with the distribution. | ||
12 | * * Neither the name of Texas Instruments Incorporated nor the | ||
13 | * names of its contributors may be used to endorse or promote products | ||
14 | * derived from this software without specific prior written permission. | ||
15 | * | ||
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF | ||
26 | * THE POSSIBILITY OF SUCH DAMAGE. | ||
27 | *****************************************************************************/ | ||
28 | #include "custom.h" | ||
29 | #include "dsp_c.h" | ||
30 | |||
31 | kernel | ||
32 | void ocl_tidl_setup(global unsigned char* createParams, | ||
33 | global unsigned char* netParamsBuffer, | ||
34 | global unsigned char* netParamsHeap, | ||
35 | global OCL_TIDL_SetupParams* setupParams) | ||
36 | { | ||
37 | ocl_dsp_tidl_setup(createParams, netParamsBuffer, netParamsHeap, setupParams); | ||
38 | } | ||
39 | |||
40 | kernel | ||
41 | void ocl_tidl_initialize(global unsigned char* createParams, | ||
42 | global unsigned char* netParamsBuffer, | ||
43 | global unsigned char* externalMemoryHeapBase, | ||
44 | global OCL_TIDL_InitializeParams* initializeParams, | ||
45 | local unsigned char* l2HeapBase) | ||
46 | { | ||
47 | // Set L1 cache to 16KB. TIDL requires 16KB of L1 scratch | ||
48 | __cache_l1d_16k(); | ||
49 | |||
50 | ocl_dsp_tidl_initialize(createParams, netParamsBuffer, | ||
51 | externalMemoryHeapBase, initializeParams, | ||
52 | l2HeapBase); | ||
53 | } | ||
54 | |||
55 | kernel | ||
56 | void ocl_tidl_process(global OCL_TIDL_ProcessParams* processParams, | ||
57 | global unsigned char* externalMemoryHeapBase, | ||
58 | global unsigned char* traceBufferParams, | ||
59 | uint32_t contextIndex) | ||
60 | { | ||
61 | ocl_dsp_tidl_process(processParams, externalMemoryHeapBase, | ||
62 | traceBufferParams, contextIndex); | ||
63 | } | ||
64 | |||
65 | |||
66 | kernel void ocl_tidl_cleanup() | ||
67 | { | ||
68 | ocl_dsp_tidl_cleanup(); | ||
69 | __cache_l1d_all(); | ||
70 | } | ||
diff --git a/tidl_api/src/execution_object.cpp b/tidl_api/src/execution_object.cpp index 285dfde..009ef93 100644 --- a/tidl_api/src/execution_object.cpp +++ b/tidl_api/src/execution_object.cpp | |||
@@ -683,7 +683,7 @@ uint64_t ExecutionObject::Impl::GetProcessCycles(uint32_t context_idx) const | |||
683 | uint8_t factor = 1; | 683 | uint8_t factor = 1; |
684 | 684 | ||
685 | // ARP32 running at half frequency of VCOP, multiply by 2 for VCOP cycles | 685 | // ARP32 running at half frequency of VCOP, multiply by 2 for VCOP cycles |
686 | if (device_m->type() == CL_DEVICE_TYPE_CUSTOM) | 686 | if (device_type_m == DeviceType::EVE) |
687 | factor = 2; | 687 | factor = 2; |
688 | 688 | ||
689 | OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get() + | 689 | OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get() + |
diff --git a/tidl_api/src/executor.cpp b/tidl_api/src/executor.cpp index d83efe8..8020d4e 100644 --- a/tidl_api/src/executor.cpp +++ b/tidl_api/src/executor.cpp | |||
@@ -87,12 +87,8 @@ ExecutorImpl::ExecutorImpl(DeviceType core_type, const DeviceIds& ids, | |||
87 | core_type_m(core_type), | 87 | core_type_m(core_type), |
88 | layers_group_id_m(layers_group_id) | 88 | layers_group_id_m(layers_group_id) |
89 | { | 89 | { |
90 | std::string name; | 90 | std::string name = STRING(SETUP_KERNEL) ";" STRING(INIT_KERNEL) ";" |
91 | if (core_type_m == DeviceType::DSP) | 91 | STRING(PROCESS_KERNEL) ";" STRING(CLEANUP_KERNEL); |
92 | name = ""; | ||
93 | else if (core_type_m == DeviceType::EVE) | ||
94 | name = STRING(SETUP_KERNEL) ";" STRING(INIT_KERNEL) ";" STRING(PROCESS_KERNEL) ";" STRING(CLEANUP_KERNEL); | ||
95 | |||
96 | device_m = Device::Create(core_type_m, ids, name); | 92 | device_m = Device::Create(core_type_m, ids, name); |
97 | } | 93 | } |
98 | 94 | ||
diff --git a/tidl_api/src/ocl_device.cpp b/tidl_api/src/ocl_device.cpp index ab0bf26..864551d 100644 --- a/tidl_api/src/ocl_device.cpp +++ b/tidl_api/src/ocl_device.cpp | |||
@@ -36,143 +36,113 @@ using std::size_t; | |||
36 | #include "ocl_device.h" | 36 | #include "ocl_device.h" |
37 | #include "ocl_util.h" | 37 | #include "ocl_util.h" |
38 | #include "trace.h" | 38 | #include "trace.h" |
39 | #include "../dsp/ocl_wrapper.dsp_h" | ||
40 | 39 | ||
41 | using namespace tidl; | 40 | using namespace tidl; |
42 | 41 | ||
43 | static const char* error2string(cl_int err); | 42 | static const char* error2string(cl_int err); |
44 | static void errorCheck(cl_int ret, int line); | 43 | static void errorCheck(cl_int ret, int line); |
45 | 44 | ||
46 | Device::Device(cl_device_type t, const DeviceIds& ids): | 45 | Device::Device(cl_device_type t, const DeviceIds& ids, const char* name): |
47 | device_type_m(t), device_ids_m(ids) | 46 | device_type_m(t), device_ids_m(ids) |
48 | { | 47 | { |
49 | TRACE::print("\tOCL Device: %s created\n", | 48 | TRACE::print("\tOCL Device: %s created\n", |
50 | device_type_m == CL_DEVICE_TYPE_ACCELERATOR ? "DSP" : | 49 | device_type_m == CL_DEVICE_TYPE_CUSTOM ? name : "Unknown"); |
51 | device_type_m == CL_DEVICE_TYPE_CUSTOM ? "EVE" : "Unknown"); | ||
52 | 50 | ||
53 | for (int i = 0; i < MAX_DEVICES; i++) | 51 | for (int i = 0; i < MAX_DEVICES; i++) |
54 | queue_m[i] = nullptr; | 52 | queue_m[i] = nullptr; |
55 | 53 | ||
56 | } | 54 | } |
57 | 55 | ||
58 | DspDevice::DspDevice(const DeviceIds& ids, const std::string &binary_filename): | 56 | DspDevice::DspDevice(const DeviceIds& ids, const std::string &kernel_names): |
59 | Device(CL_DEVICE_TYPE_ACCELERATOR, ids) | 57 | Device(CL_DEVICE_TYPE_CUSTOM, ids, "DSP") |
60 | { | 58 | { |
61 | cl_uint num_devices_found; | 59 | cl_int errcode; |
62 | cl_device_id device_ids[MAX_DEVICES]; | 60 | cl_device_id device_ids[MAX_DEVICES]; |
61 | cl_device_id out_device_ids[MAX_DEVICES]; | ||
62 | cl_uint num_compute_units; | ||
63 | cl_uint num_out_devices; | ||
63 | 64 | ||
64 | cl_int errcode = clGetDeviceIDs(0, // platform | 65 | if (! GetDevices(DeviceType::DSP, device_ids, nullptr, &num_compute_units)) |
65 | device_type_m, // device_type | ||
66 | MAX_DEVICES, // num_entries | ||
67 | device_ids, // devices | ||
68 | &num_devices_found); // num_devices | ||
69 | errorCheck(errcode, __LINE__); | ||
70 | |||
71 | if (num_devices_found != 1) | ||
72 | throw Exception("OpenCL DSP device not found", | 66 | throw Exception("OpenCL DSP device not found", |
73 | __FILE__, __FUNCTION__, __LINE__); | 67 | __FILE__, __FUNCTION__, __LINE__); |
74 | 68 | ||
75 | cl_int num_compute_units; | ||
76 | errcode = clGetDeviceInfo(device_ids[0], | ||
77 | CL_DEVICE_MAX_COMPUTE_UNITS, | ||
78 | sizeof(num_compute_units), | ||
79 | &num_compute_units, | ||
80 | nullptr); | ||
81 | |||
82 | if (num_compute_units == 1) | 69 | if (num_compute_units == 1) |
83 | { | 70 | { |
84 | context_m = clCreateContextFromType(0, // properties | 71 | num_out_devices = 1; |
85 | device_type_m, // device_type | 72 | out_device_ids[0] = device_ids[0]; |
86 | 0, // pfn_notify | ||
87 | 0, // user_data | ||
88 | &errcode); | ||
89 | errorCheck(errcode, __LINE__); | ||
90 | |||
91 | // Queue 0 on device 0 | ||
92 | queue_m[0] = clCreateCommandQueue(context_m, | ||
93 | device_ids[0], | ||
94 | CL_QUEUE_PROFILING_ENABLE| | ||
95 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, | ||
96 | &errcode); | ||
97 | errorCheck(errcode, __LINE__); | ||
98 | BuildProgramFromBinary(binary_filename, device_ids, 1); | ||
99 | } | 73 | } |
100 | else | 74 | else |
101 | { | 75 | { |
102 | const cl_uint NUM_SUB_DEVICES = 2; | ||
103 | |||
104 | // Create 2 sub-device's, each consisting of a C66x DSP | 76 | // Create 2 sub-device's, each consisting of a C66x DSP |
105 | cl_device_partition_property properties[3] = | 77 | cl_device_partition_property properties[3] = |
106 | { CL_DEVICE_PARTITION_EQUALLY, 1, 0 }; | 78 | { CL_DEVICE_PARTITION_EQUALLY, 1, 0 }; |
107 | 79 | ||
108 | // Query the number of sub-devices that can be created | 80 | // Query the number of sub-devices that can be created |
109 | cl_uint n_sub_devices = 0; | 81 | const cl_uint NUM_SUB_DEVICES = 2; |
110 | errcode = clCreateSubDevices(device_ids[0], // in_device | 82 | errcode = clCreateSubDevices(device_ids[0], // in_device |
111 | properties, // properties | 83 | properties, // properties |
112 | 0, // num_devices | 84 | 0, // num_devices |
113 | NULL, // out_devices | 85 | NULL, // out_devices |
114 | &n_sub_devices); // num_devices_ret | 86 | &num_out_devices); // num_devices_ret |
115 | errorCheck(errcode, __LINE__); | 87 | errorCheck(errcode, __LINE__); |
116 | 88 | ||
117 | assert(n_sub_devices == NUM_SUB_DEVICES); | 89 | assert(num_out_devices == NUM_SUB_DEVICES); |
118 | 90 | ||
119 | // Create the sub-devices | 91 | // Create the sub-devices |
120 | cl_device_id sub_devices[NUM_SUB_DEVICES] = {0, 0}; | ||
121 | errcode = clCreateSubDevices(device_ids[0], // in_device | 92 | errcode = clCreateSubDevices(device_ids[0], // in_device |
122 | properties, // properties | 93 | properties, // properties |
123 | n_sub_devices, // num_devices | 94 | num_out_devices, // num_devices |
124 | sub_devices, // out_devices | 95 | out_device_ids, // out_devices |
125 | nullptr); // num_devices_ret | 96 | nullptr); // num_devices_ret |
126 | errorCheck(errcode, __LINE__); | 97 | errorCheck(errcode, __LINE__); |
98 | } | ||
127 | 99 | ||
128 | // Create a context containing the sub-devices | 100 | // Create a context containing the out-devices |
129 | context_m = clCreateContext(NULL, // properties | 101 | context_m = clCreateContext(NULL, // properties |
130 | NUM_SUB_DEVICES, // num_devices | 102 | num_out_devices, // num_devices |
131 | sub_devices, // devices | 103 | out_device_ids, // devices |
132 | NULL, // pfn_notify | 104 | NULL, // pfn_notify |
133 | NULL, // user_data | 105 | NULL, // user_data |
134 | &errcode); // errcode_ret | 106 | &errcode); // errcode_ret |
135 | errorCheck(errcode, __LINE__); | 107 | errorCheck(errcode, __LINE__); |
136 | |||
137 | // Create queues to each sub-device | ||
138 | for (auto id : device_ids_m) | ||
139 | { | ||
140 | int index = static_cast<int>(id); | ||
141 | queue_m[index] = clCreateCommandQueue(context_m, | ||
142 | sub_devices[index], | ||
143 | CL_QUEUE_PROFILING_ENABLE| | ||
144 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, | ||
145 | &errcode); | ||
146 | errorCheck(errcode, __LINE__); | ||
147 | } | ||
148 | 108 | ||
149 | BuildProgramFromBinary(binary_filename, sub_devices, NUM_SUB_DEVICES); | 109 | // Create queues to each out device |
110 | for (auto id : device_ids_m) | ||
111 | { | ||
112 | cl_uint index = static_cast<cl_uint>(id); | ||
113 | assert(index < num_out_devices); | ||
114 | queue_m[index] = clCreateCommandQueue(context_m, | ||
115 | out_device_ids[index], | ||
116 | CL_QUEUE_PROFILING_ENABLE| | ||
117 | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, | ||
118 | &errcode); | ||
119 | errorCheck(errcode, __LINE__); | ||
150 | } | 120 | } |
151 | 121 | ||
122 | // Build kernel program | ||
123 | BuildBuiltInProgram(kernel_names, out_device_ids, num_out_devices); | ||
124 | |||
125 | // Query device frequency | ||
152 | errcode = clGetDeviceInfo(device_ids[0], | 126 | errcode = clGetDeviceInfo(device_ids[0], |
153 | CL_DEVICE_MAX_CLOCK_FREQUENCY, | 127 | CL_DEVICE_MAX_CLOCK_FREQUENCY, |
154 | sizeof(freq_in_mhz_m), | 128 | sizeof(freq_in_mhz_m), |
155 | &freq_in_mhz_m, | 129 | &freq_in_mhz_m, |
156 | nullptr); | 130 | nullptr); |
157 | errorCheck(errcode, __LINE__); | 131 | errorCheck(errcode, __LINE__); |
158 | } | 132 | } |
159 | 133 | ||
160 | 134 | ||
161 | EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names): | 135 | EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names): |
162 | Device(CL_DEVICE_TYPE_CUSTOM, ids) | 136 | Device(CL_DEVICE_TYPE_CUSTOM, ids, "EVE") |
163 | { | 137 | { |
164 | cl_uint num_devices_found; | 138 | cl_int errcode; |
165 | cl_device_id all_device_ids[MAX_DEVICES]; | 139 | cl_device_id all_device_ids[MAX_DEVICES]; |
140 | cl_uint num_devices; | ||
141 | if (! GetDevices(DeviceType::EVE, all_device_ids, &num_devices, nullptr)) | ||
142 | throw Exception("OpenCL EVE device not found", | ||
143 | __FILE__, __FUNCTION__, __LINE__); | ||
166 | 144 | ||
167 | // Find all the OpenCL devices available of the given type | 145 | assert (num_devices >= device_ids_m.size()); |
168 | cl_int errcode = clGetDeviceIDs(0, // platform | ||
169 | device_type_m, // device_type | ||
170 | MAX_DEVICES, // num_entries | ||
171 | all_device_ids, // devices | ||
172 | &num_devices_found); // num_devices | ||
173 | errorCheck(errcode, __LINE__); | ||
174 | |||
175 | assert (num_devices_found >= device_ids_m.size()); | ||
176 | 146 | ||
177 | context_m = clCreateContextFromType(0, // properties | 147 | context_m = clCreateContextFromType(0, // properties |
178 | device_type_m, // device_type | 148 | device_type_m, // device_type |
@@ -181,7 +151,6 @@ EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names): | |||
181 | &errcode); | 151 | &errcode); |
182 | errorCheck(errcode, __LINE__); | 152 | errorCheck(errcode, __LINE__); |
183 | 153 | ||
184 | |||
185 | // Create command queues to OpenCL devices specified by the | 154 | // Create command queues to OpenCL devices specified by the |
186 | // device_ids_m set. | 155 | // device_ids_m set. |
187 | for (auto id : device_ids_m) | 156 | for (auto id : device_ids_m) |
@@ -195,7 +164,7 @@ EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names): | |||
195 | errorCheck(errcode, __LINE__); | 164 | errorCheck(errcode, __LINE__); |
196 | } | 165 | } |
197 | 166 | ||
198 | BuildProgramFromBinary(kernel_names, all_device_ids, device_ids_m.size()); | 167 | BuildBuiltInProgram(kernel_names, all_device_ids, device_ids_m.size()); |
199 | 168 | ||
200 | errcode = clGetDeviceInfo(all_device_ids[0], | 169 | errcode = clGetDeviceInfo(all_device_ids[0], |
201 | CL_DEVICE_MAX_CLOCK_FREQUENCY, | 170 | CL_DEVICE_MAX_CLOCK_FREQUENCY, |
@@ -205,45 +174,24 @@ EveDevice::EveDevice(const DeviceIds& ids, const std::string &kernel_names): | |||
205 | errorCheck(errcode, __LINE__); | 174 | errorCheck(errcode, __LINE__); |
206 | } | 175 | } |
207 | 176 | ||
208 | 177 | bool DspDevice::BuildBuiltInProgram(const std::string& kernel_names, | |
209 | bool DspDevice::BuildProgramFromBinary(const std::string &BFN, | 178 | cl_device_id device_ids[], |
210 | cl_device_id device_ids[], | 179 | int num_devices) |
211 | int num_devices) | ||
212 | { | 180 | { |
213 | size_t bin_len = ocl_wrapper_dsp_bin_len; | ||
214 | |||
215 | assert (bin_len != 0); | ||
216 | |||
217 | // Casting to make ocl_read_binary work with clCreateProgramWithBinary | ||
218 | const unsigned char *bin_arrc = reinterpret_cast <const unsigned char *> | ||
219 | (ocl_wrapper_dsp_bin); | ||
220 | |||
221 | size_t lengths[num_devices]; | ||
222 | for (int i=0; i < num_devices; i++) lengths[i] = bin_len; | ||
223 | |||
224 | const unsigned char* binaries[num_devices]; | ||
225 | for (int i=0; i < num_devices; i++) binaries[i] = bin_arrc; | ||
226 | |||
227 | cl_int err; | 181 | cl_int err; |
228 | program_m = clCreateProgramWithBinary(context_m, | 182 | program_m = clCreateProgramWithBuiltInKernels(context_m, |
229 | num_devices, | 183 | num_devices, |
230 | device_ids, // device_list | 184 | device_ids, // device_list |
231 | lengths, | 185 | kernel_names.c_str(), |
232 | binaries, | ||
233 | 0, // binary_status | ||
234 | &err); | 186 | &err); |
235 | errorCheck(err, __LINE__); | 187 | errorCheck(err, __LINE__); |
236 | 188 | ||
237 | const char *options = ""; | ||
238 | err = clBuildProgram(program_m, num_devices, device_ids, options, 0, 0); | ||
239 | errorCheck(err, __LINE__); | ||
240 | |||
241 | return true; | 189 | return true; |
242 | } | 190 | } |
243 | 191 | ||
244 | bool EveDevice::BuildProgramFromBinary(const std::string& kernel_names, | 192 | bool EveDevice::BuildBuiltInProgram(const std::string& kernel_names, |
245 | cl_device_id device_ids[], | 193 | cl_device_id device_ids[], |
246 | int num_devices) | 194 | int num_devices) |
247 | { | 195 | { |
248 | cl_int err; | 196 | cl_int err; |
249 | cl_device_id executor_device_ids[MAX_DEVICES]; | 197 | cl_device_id executor_device_ids[MAX_DEVICES]; |
@@ -557,16 +505,17 @@ static bool PlatformIsAM57() | |||
557 | } | 505 | } |
558 | 506 | ||
559 | // TI DL is supported on AM57x - EVE or C66x devices | 507 | // TI DL is supported on AM57x - EVE or C66x devices |
560 | uint32_t Device::GetNumDevices(DeviceType device_type) | 508 | bool Device::GetDevices(DeviceType device_type, |
509 | cl_device_id cl_d_ids[], | ||
510 | cl_uint *p_num_devices, | ||
511 | cl_uint *p_num_compute_units) | ||
561 | { | 512 | { |
562 | if (!PlatformIsAM57()) return 0; | 513 | if (!PlatformIsAM57()) return false; |
563 | 514 | ||
564 | // Convert DeviceType to OpenCL device type | 515 | // Convert DeviceType to OpenCL device type |
565 | cl_device_type t = (device_type == DeviceType::EVE) ? | 516 | cl_device_type t = CL_DEVICE_TYPE_CUSTOM; |
566 | CL_DEVICE_TYPE_CUSTOM : | ||
567 | CL_DEVICE_TYPE_ACCELERATOR; | ||
568 | 517 | ||
569 | // Find all the OpenCL devices available | 518 | // Find all the OpenCL custom devices available |
570 | cl_uint num_devices_found; | 519 | cl_uint num_devices_found; |
571 | cl_device_id all_device_ids[MAX_DEVICES]; | 520 | cl_device_id all_device_ids[MAX_DEVICES]; |
572 | 521 | ||
@@ -577,25 +526,53 @@ uint32_t Device::GetNumDevices(DeviceType device_type) | |||
577 | &num_devices_found); // num_devices | 526 | &num_devices_found); // num_devices |
578 | 527 | ||
579 | 528 | ||
580 | if (errcode != CL_SUCCESS) return 0; | 529 | if (errcode != CL_SUCCESS) return false; |
581 | if (num_devices_found == 0) return 0; | 530 | if (num_devices_found == 0) return false; |
582 | 531 | ||
583 | // DSP, return the number of compute units since we maintain a | 532 | // Find devices according to device_type |
584 | // queue to each compute unit (i.e. C66x DSP) | 533 | // DSP: ACCELERATOR | CUSTOM |
585 | if (t == CL_DEVICE_TYPE_ACCELERATOR) | 534 | // EVE: CUSTOM |
535 | cl_uint num_devices = 0; | ||
536 | for (cl_uint i = 0; i < num_devices_found; i++) | ||
537 | { | ||
538 | cl_device_type cl_d_type; | ||
539 | errcode = clGetDeviceInfo(all_device_ids[i], CL_DEVICE_TYPE, | ||
540 | sizeof(cl_device_type), &cl_d_type, nullptr); | ||
541 | if (errcode != CL_SUCCESS) return false; | ||
542 | |||
543 | if ((device_type == DeviceType::DSP && | ||
544 | ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) != 0)) || | ||
545 | (device_type == DeviceType::EVE && | ||
546 | ((cl_d_type & CL_DEVICE_TYPE_ACCELERATOR) == 0))) | ||
547 | cl_d_ids[num_devices++] = all_device_ids[i]; | ||
548 | } | ||
549 | if (p_num_devices != nullptr) *p_num_devices = num_devices; | ||
550 | |||
551 | // DSP, return the number of compute units | ||
552 | if (device_type == DeviceType::DSP && | ||
553 | num_devices > 0 && p_num_compute_units != nullptr) | ||
586 | { | 554 | { |
587 | cl_int num_compute_units; | 555 | errcode = clGetDeviceInfo(cl_d_ids[0], |
588 | errcode = clGetDeviceInfo(all_device_ids[0], | ||
589 | CL_DEVICE_MAX_COMPUTE_UNITS, | 556 | CL_DEVICE_MAX_COMPUTE_UNITS, |
590 | sizeof(num_compute_units), | 557 | sizeof(cl_int), |
591 | &num_compute_units, | 558 | p_num_compute_units, |
592 | nullptr); | 559 | nullptr); |
593 | if (errcode != CL_SUCCESS) | 560 | if (errcode != CL_SUCCESS) return false; |
594 | return 0; | ||
595 | |||
596 | return num_compute_units; | ||
597 | } | 561 | } |
598 | 562 | ||
563 | return true; | ||
564 | } | ||
565 | |||
566 | uint32_t Device::GetNumDevices(DeviceType device_type) | ||
567 | { | ||
568 | cl_device_id cl_d_ids[MAX_DEVICES]; | ||
569 | cl_uint num_devices = 0; | ||
570 | cl_uint num_cus = 0; | ||
571 | |||
572 | if (! GetDevices(device_type, cl_d_ids, &num_devices, &num_cus)) return 0; | ||
573 | |||
599 | // EVE, return the number of devices since each EVE is a device | 574 | // EVE, return the number of devices since each EVE is a device |
600 | return num_devices_found; | 575 | // DSP, return the number of compute units since we maintain a |
576 | // queue to each compute unit (i.e. C66x DSP) | ||
577 | return device_type == DeviceType::EVE ? num_devices : num_cus; | ||
601 | } | 578 | } |
diff --git a/tidl_api/src/ocl_device.h b/tidl_api/src/ocl_device.h index 773a27e..5c8f534 100644 --- a/tidl_api/src/ocl_device.h +++ b/tidl_api/src/ocl_device.h | |||
@@ -59,7 +59,7 @@ class Device | |||
59 | public: | 59 | public: |
60 | typedef std::unique_ptr<Device> Ptr; | 60 | typedef std::unique_ptr<Device> Ptr; |
61 | 61 | ||
62 | Device(cl_device_type t, const DeviceIds& ids); | 62 | Device(cl_device_type t, const DeviceIds& ids, const char *name); |
63 | virtual ~Device(); | 63 | virtual ~Device(); |
64 | 64 | ||
65 | 65 | ||
@@ -79,10 +79,13 @@ class Device | |||
79 | 79 | ||
80 | protected: | 80 | protected: |
81 | 81 | ||
82 | static const int MAX_DEVICES = 4; | 82 | static const int MAX_DEVICES = 5; // max: 1 DSP device + 4 EVE devices |
83 | cl_mem CreateBuffer(const DeviceArgInfo &Arg); | 83 | cl_mem CreateBuffer(const DeviceArgInfo &Arg); |
84 | void ReleaseBuffer(cl_mem M); | 84 | void ReleaseBuffer(cl_mem M); |
85 | 85 | static bool GetDevices(DeviceType device_type, | |
86 | cl_device_id cl_d_ids[], | ||
87 | cl_uint *p_num_devices, | ||
88 | cl_uint *p_num_compute_units); | ||
86 | 89 | ||
87 | cl_context context_m; | 90 | cl_context context_m; |
88 | cl_program program_m; | 91 | cl_program program_m; |
@@ -97,7 +100,7 @@ class Device | |||
97 | class DspDevice: public Device | 100 | class DspDevice: public Device |
98 | { | 101 | { |
99 | public: | 102 | public: |
100 | DspDevice(const DeviceIds& ids, const std::string &binary_filename); | 103 | DspDevice(const DeviceIds& ids, const std::string &kernel_names); |
101 | virtual ~DspDevice() {} | 104 | virtual ~DspDevice() {} |
102 | 105 | ||
103 | DspDevice() = delete; | 106 | DspDevice() = delete; |
@@ -107,9 +110,9 @@ class DspDevice: public Device | |||
107 | virtual std::string GetDeviceName() { return "DSP"; } | 110 | virtual std::string GetDeviceName() { return "DSP"; } |
108 | 111 | ||
109 | protected: | 112 | protected: |
110 | bool BuildProgramFromBinary(const std::string &binary_filename, | 113 | bool BuildBuiltInProgram(const std::string &kernel_names, |
111 | cl_device_id device_ids[], | 114 | cl_device_id device_ids[], |
112 | int num_devices); | 115 | int num_devices); |
113 | }; | 116 | }; |
114 | 117 | ||
115 | class EveDevice : public Device | 118 | class EveDevice : public Device |
@@ -125,10 +128,9 @@ class EveDevice : public Device | |||
125 | virtual std::string GetDeviceName() { return "EVE"; } | 128 | virtual std::string GetDeviceName() { return "EVE"; } |
126 | 129 | ||
127 | protected: | 130 | protected: |
128 | bool BuildProgramFromBinary(const std::string &kernel_names, | 131 | bool BuildBuiltInProgram(const std::string &kernel_names, |
129 | cl_device_id device_ids[], | 132 | cl_device_id device_ids[], |
130 | int num_devices); | 133 | int num_devices); |
131 | |||
132 | }; | 134 | }; |
133 | 135 | ||
134 | 136 | ||