index fba4f94305da4659de311f604a320ebec8219159..508c5498a352d43ab99c24390ffbd9010d3d1d88 100644 (file)
// Queue 0 on device 0
queue_m[0] = clCreateCommandQueue(context_m,
device_ids[0],
- 0,
+ CL_QUEUE_PROFILING_ENABLE,
&errcode);
errorCheck(errcode, __LINE__);
BuildProgramFromBinary(binary_filename, device_ids, 1);
int index = static_cast<int>(id);
queue_m[index] = clCreateCommandQueue(context_m,
sub_devices[index],
- 0,
+ CL_QUEUE_PROFILING_ENABLE,
&errcode);
errorCheck(errcode, __LINE__);
}
int index = static_cast<int>(id);
queue_m[index] = clCreateCommandQueue(context_m,
all_device_ids[index],
- 0,
+ CL_QUEUE_PROFILING_ENABLE,
&errcode);
errorCheck(errcode, __LINE__);
}
Kernel::Kernel(Device* device, const std::string& name,
const KernelArgs& args, uint8_t device_index):
name_m(name), device_m(device), device_index_m(device_index),
- is_running_m(false)
+ num_running_contexts_m(0)
{
TRACE::print("Creating kernel %s\n", name.c_str());
cl_int err;
clSetKernelArg(kernel_m, arg_index, sizeof(cl_mem), &buffer);
TRACE::print(" Arg[%d]: %p\n", arg_index, buffer);
- buffers_m.push_back(buffer);
+ if (buffer)
+ buffers_m.push_back(buffer);
}
else if (arg.kind() == DeviceArgInfo::Kind::SCALAR)
{
}
}
-Kernel& Kernel::RunAsync()
+bool Kernel::UpdateScalarArg(uint32_t index, size_t size, const void *value)
+{
+ cl_int ret = clSetKernelArg(kernel_m, index, size, value);
+ return ret == CL_SUCCESS;
+}
+
+Kernel& Kernel::RunAsync(uint32_t context_idx)
{
// Execute kernel
- TRACE::print("\tKernel: device %d executing %s\n", device_index_m,
- name_m.c_str());
+ TRACE::print("\tKernel: device %d executing %s, context %d\n",
+ device_index_m, name_m.c_str(), context_idx);
cl_int ret = clEnqueueTask(device_m->queue_m[device_index_m],
- kernel_m, 0, 0, &event_m);
+ kernel_m, 0, 0, &event_m[context_idx]);
errorCheck(ret, __LINE__);
- is_running_m = true;
+ __sync_fetch_and_add(&num_running_contexts_m, 1);
return *this;
}
-bool Kernel::Wait()
+bool Kernel::Wait(float *host_elapsed_ms, uint32_t context_idx)
{
// Wait called without a corresponding RunAsync
- if (!is_running_m)
+ if (num_running_contexts_m == 0)
return false;
- TRACE::print("\tKernel: waiting...\n");
- cl_int ret = clWaitForEvents(1, &event_m);
+ TRACE::print("\tKernel: waiting context %d...\n", context_idx);
+ cl_int ret = clWaitForEvents(1, &event_m[context_idx]);
errorCheck(ret, __LINE__);
- ret = clReleaseEvent(event_m);
+
+ if (host_elapsed_ms != nullptr)
+ {
+ cl_ulong t_que, t_end;
+ clGetEventProfilingInfo(event_m[context_idx],
+ CL_PROFILING_COMMAND_QUEUED,
+ sizeof(cl_ulong), &t_que, nullptr);
+ clGetEventProfilingInfo(event_m[context_idx], CL_PROFILING_COMMAND_END,
+ sizeof(cl_ulong), &t_end, nullptr);
+ *host_elapsed_ms = (t_end - t_que) / 1.0e6; // nano to milli seconds
+ }
+
+ ret = clReleaseEvent(event_m[context_idx]);
errorCheck(ret, __LINE__);
TRACE::print("\tKernel: finished execution\n");
- is_running_m = false;
+ __sync_fetch_and_sub(&num_running_contexts_m, 1);
return true;
}
+extern void CallbackWrapper(void *user_data) __attribute__((weak));
+
+static
+void EventCallback(cl_event event, cl_int exec_status, void *user_data)
+{
+ if (exec_status != CL_SUCCESS || user_data == nullptr) return;
+ if (CallbackWrapper) CallbackWrapper(user_data);
+}
+
+bool Kernel::AddCallback(void *user_data, uint32_t context_idx)
+{
+ if (num_running_contexts_m == 0) return false;
+ return clSetEventCallback(event_m[context_idx], CL_COMPLETE, EventCallback,
+ user_data) == CL_SUCCESS;
+}
+
Kernel::~Kernel()
{
for (auto b : buffers_m)
return p;
}
+// Minimum version of OpenCL required for this version of TIDL API
+#define MIN_OCL_VERSION "01.01.16.00"
+static bool CheckOpenCLVersion(cl_platform_id id)
+{
+ cl_int err;
+ size_t length;
+ err = clGetPlatformInfo(id, CL_PLATFORM_VERSION, 0, nullptr, &length);
+ if (err != CL_SUCCESS) return false;
+
+ std::unique_ptr<char> version(new char[length]);
+ err = clGetPlatformInfo(id, CL_PLATFORM_VERSION, length, version.get(),
+ nullptr);
+ if (err != CL_SUCCESS) return false;
+
+ std::string v(version.get());
+
+ if (v.substr(v.find("01."), sizeof(MIN_OCL_VERSION)) >= MIN_OCL_VERSION)
+ return true;
+
+ std::cerr << "TIDL API Error: OpenCL " << MIN_OCL_VERSION
+ << " or higher required." << std::endl;
+
+ return false;
+}
+
static bool PlatformIsAM57()
{
cl_platform_id id;
err = clGetPlatformIDs(1, &id, nullptr);
if (err != CL_SUCCESS) return false;
+ if (!CheckOpenCLVersion(id))
+ return false;
+
// Check if the device name is AM57
size_t length;
err = clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, nullptr, &length);