index f20305d1cba21547f84066455207968401f959e0..b3eaf36d4894a8c2f0b15f60d0d24889a9dcc4fa 100644 (file)
// Queue 0 on device 0
queue_m[0] = clCreateCommandQueue(context_m,
device_ids[0],
- 0,
+ CL_QUEUE_PROFILING_ENABLE,
&errcode);
errorCheck(errcode, __LINE__);
BuildProgramFromBinary(binary_filename, device_ids, 1);
int index = static_cast<int>(id);
queue_m[index] = clCreateCommandQueue(context_m,
sub_devices[index],
- 0,
+ CL_QUEUE_PROFILING_ENABLE,
&errcode);
errorCheck(errcode, __LINE__);
}
int index = static_cast<int>(id);
queue_m[index] = clCreateCommandQueue(context_m,
all_device_ids[index],
- 0,
+ CL_QUEUE_PROFILING_ENABLE,
&errcode);
errorCheck(errcode, __LINE__);
}
errorCheck(err, __LINE__);
int arg_index = 0;
- for (auto arg : args)
+ for (const auto& arg : args)
{
if (!arg.isLocal())
{
- if (arg.kind() == ArgInfo::Kind::BUFFER)
+ if (arg.kind() == DeviceArgInfo::Kind::BUFFER)
{
cl_mem buffer = device_m->CreateBuffer(arg);
buffers_m.push_back(buffer);
}
- else if (arg.kind() == ArgInfo::Kind::SCALAR)
+ else if (arg.kind() == DeviceArgInfo::Kind::SCALAR)
{
clSetKernelArg(kernel_m, arg_index, arg.size(), arg.ptr());
TRACE::print(" Arg[%d]: %p\n", arg_index, arg.ptr());
}
else
{
- assert ("ArgInfo kind not supported");
+ assert ("DeviceArgInfo kind not supported");
}
}
else
}
-bool Kernel::Wait()
+bool Kernel::Wait(float *host_elapsed_ms)
{
// Wait called without a corresponding RunAsync
if (!is_running_m)
TRACE::print("\tKernel: waiting...\n");
cl_int ret = clWaitForEvents(1, &event_m);
errorCheck(ret, __LINE__);
+
+ if (host_elapsed_ms != nullptr)
+ {
+ cl_ulong t_que, t_end;
+ clGetEventProfilingInfo(event_m, CL_PROFILING_COMMAND_QUEUED,
+ sizeof(cl_ulong), &t_que, nullptr);
+ clGetEventProfilingInfo(event_m, CL_PROFILING_COMMAND_END,
+ sizeof(cl_ulong), &t_end, nullptr);
+ *host_elapsed_ms = (t_end - t_que) / 1.0e6; // nano to milli seconds
+ }
+
ret = clReleaseEvent(event_m);
errorCheck(ret, __LINE__);
TRACE::print("\tKernel: finished execution\n");
return true;
}
+extern void CallbackWrapper(void *user_data) __attribute__((weak));
+
+static
+void EventCallback(cl_event event, cl_int exec_status, void *user_data)
+{
+ if (exec_status != CL_SUCCESS || user_data == nullptr) return;
+ if (CallbackWrapper) CallbackWrapper(user_data);
+}
+
+bool Kernel::AddCallback(void *user_data)
+{
+ if (! is_running_m) return false;
+ return clSetEventCallback(event_m, CL_COMPLETE, EventCallback, user_data)
+ == CL_SUCCESS;
+}
+
Kernel::~Kernel()
{
for (auto b : buffers_m)
clReleaseKernel(kernel_m);
}
-cl_mem Device::CreateBuffer(const ArgInfo &Arg)
+cl_mem Device::CreateBuffer(const DeviceArgInfo &Arg)
{
size_t size = Arg.size();
void *host_ptr = Arg.ptr();