index 2d49bf14347998a00229ce088f39a265ad4c2a58..d722ebb196669019fbc3071338397d62a9a79ba9 100644 (file)
/*! \file execution_object.cpp */
+#include <string.h>
+#include <fstream>
+#include <climits>
#include "executor.h"
#include "execution_object.h"
#include "trace.h"
#include "parameters.h"
#include "configuration.h"
#include "common_defines.h"
-#include <string.h>
+#include "tidl_create_params.h"
+#include "device_arginfo.h"
using namespace tidl;
{
public:
Impl(Device* d, uint8_t device_index,
- const ArgInfo& create_arg,
- const ArgInfo& param_heap_arg,
+ const DeviceArgInfo& create_arg,
+ const DeviceArgInfo& param_heap_arg,
size_t extmem_heap_size,
bool internal_input);
~Impl() {}
bool RunAsync(CallType ct);
bool Wait (CallType ct);
- bool SetupProcessKernel(const ArgInfo& in, const ArgInfo& out);
- void HostWriteNetInput();
- void HostReadNetOutput();
- void ComputeInputOutputSizes();
-
Device* device_m;
- std::unique_ptr<Kernel> k_initialize_m;
- std::unique_ptr<Kernel> k_process_m;
- std::unique_ptr<Kernel> k_cleanup_m;
up_malloc_ddr<char> tidl_extmem_heap_m;
up_malloc_ddr<OCL_TIDL_InitializeParams> shared_initialize_params_m;
size_t in_size_m;
size_t out_size_m;
- ArgInfo in_m;
- ArgInfo out_m;
+ IODeviceArgInfo in_m;
+ IODeviceArgInfo out_m;
+
+ // Frame being processed by the EO
+ int current_frame_idx_m;
+
+ // Trace related
+ void WriteLayerOutputsToFile (const std::string& filename_prefix) const;
+
+ const LayerOutput* GetOutputFromLayer (uint32_t layer_index,
+ uint32_t output_index) const;
+ const LayerOutputs* GetOutputsFromAllLayers() const;
+
+ uint32_t num_network_layers_m;
+ up_malloc_ddr<OCL_TIDL_BufParams> trace_buf_params_m;
+ size_t trace_buf_params_sz_m;
+
+ private:
+ void SetupInitializeKernel(const DeviceArgInfo& create_arg,
+ const DeviceArgInfo& param_heap_arg,
+ size_t extmem_heap_size,
+ bool internal_input);
+ void SetupProcessKernel();
+
+ void HostWriteNetInput();
+ void HostReadNetOutput();
+ void ComputeInputOutputSizes();
// Index of the OpenCL device/queue used by this EO
uint8_t device_index_m;
- // Frame being processed by the EO
- int current_frame_idx_m;
+ std::unique_ptr<Kernel> k_initialize_m;
+ std::unique_ptr<Kernel> k_process_m;
+ std::unique_ptr<Kernel> k_cleanup_m;
+
+
};
size_t extmem_heap_size,
bool internal_input)
{
+ DeviceArgInfo create_arg_d(create_arg, DeviceArgInfo::Kind::BUFFER);
+ DeviceArgInfo param_heap_arg_d(param_heap_arg, DeviceArgInfo::Kind::BUFFER);
+
pimpl_m = std::unique_ptr<ExecutionObject::Impl>
{ new ExecutionObject::Impl(d, device_index,
- create_arg,
- param_heap_arg,
+ create_arg_d,
+ param_heap_arg_d,
extmem_heap_size,
internal_input) };
}
ExecutionObject::Impl::Impl(Device* d,
uint8_t device_index,
- const ArgInfo& create_arg,
- const ArgInfo& param_heap_arg,
+ const DeviceArgInfo& create_arg,
+ const DeviceArgInfo& param_heap_arg,
size_t extmem_heap_size,
bool internal_input):
device_m(d),
- k_initialize_m(nullptr),
- k_process_m(nullptr),
- k_cleanup_m(nullptr),
tidl_extmem_heap_m (nullptr, &__free_ddr),
shared_initialize_params_m(nullptr, &__free_ddr),
shared_process_params_m(nullptr, &__free_ddr),
in_size_m(0),
out_size_m(0),
- in_m(nullptr, 0),
- out_m(nullptr, 0),
+ in_m(),
+ out_m(),
+ current_frame_idx_m(0),
+ num_network_layers_m(0),
+ trace_buf_params_m(nullptr, &__free_ddr),
+ trace_buf_params_sz_m(0),
device_index_m(device_index),
- current_frame_idx_m(0)
+ k_initialize_m(nullptr),
+ k_process_m(nullptr),
+ k_cleanup_m(nullptr)
{
- // Allocate a heap for TI DL to use on the device
- tidl_extmem_heap_m.reset(malloc_ddr<char>(extmem_heap_size));
+ SetupInitializeKernel(create_arg, param_heap_arg, extmem_heap_size,
+ internal_input);
- // Create a kernel for cleanup
- KernelArgs cleanup_args;
- k_cleanup_m.reset(new Kernel(device_m,
- STRING(CLEANUP_KERNEL),
- cleanup_args, device_index_m));
-
- // Set up parameter struct for the initialize kernel
- shared_initialize_params_m.reset(malloc_ddr<OCL_TIDL_InitializeParams>());
- memset(shared_initialize_params_m.get(), 0,
- sizeof(OCL_TIDL_InitializeParams));
-
- shared_initialize_params_m->tidlHeapSize = extmem_heap_size;
- shared_initialize_params_m->l2HeapSize = tidl::internal::DMEM1_SIZE;
- shared_initialize_params_m->l1HeapSize = tidl::internal::DMEM0_SIZE;
- shared_initialize_params_m->enableTrace = OCL_TIDL_TRACE_OFF;
- shared_initialize_params_m->enableInternalInput = internal_input ? 1 : 0;
-
- // Setup kernel arguments for initialize
- KernelArgs args = { create_arg,
- param_heap_arg,
- ArgInfo(tidl_extmem_heap_m.get(),
- extmem_heap_size),
- ArgInfo(shared_initialize_params_m.get(),
- sizeof(OCL_TIDL_InitializeParams)),
- device_m->type() == CL_DEVICE_TYPE_ACCELERATOR ?
- ArgInfo(nullptr, tidl::internal::DMEM1_SIZE):
- ArgInfo(nullptr, 4) };
+ SetupProcessKernel();
- k_initialize_m.reset(new Kernel(device_m,
- STRING(INIT_KERNEL), args, device_index_m));
+ // Save number of layers in the network
+ const TIDL_CreateParams* cp =
+ static_cast<const TIDL_CreateParams *>(create_arg.ptr());
+ num_network_layers_m = cp->net.numLayers;
}
// Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
char* ExecutionObject::GetInputBufferPtr() const
{
- return static_cast<char *>(pimpl_m->in_m.ptr());
+ return static_cast<char *>(pimpl_m->in_m.GetArg().ptr());
}
size_t ExecutionObject::GetInputBufferSizeInBytes() const
{
- if (pimpl_m->in_m.ptr() == nullptr) return pimpl_m->in_size_m;
- else return pimpl_m->in_m.size();
+ const DeviceArgInfo& arg = pimpl_m->in_m.GetArg();
+ if (arg.ptr() == nullptr) return pimpl_m->in_size_m;
+ else return arg.size();
}
char* ExecutionObject::GetOutputBufferPtr() const
{
- return static_cast<char *>(pimpl_m->out_m.ptr());
+ return static_cast<char *>(pimpl_m->out_m.GetArg().ptr());
}
size_t ExecutionObject::GetOutputBufferSizeInBytes() const
{
- if (pimpl_m->out_m.ptr() == nullptr) return pimpl_m->out_size_m;
- else return pimpl_m->shared_process_params_m.get()->bytesWritten;
+ const DeviceArgInfo& arg = pimpl_m->out_m.GetArg();
+ if (arg.ptr() == nullptr)
+ return pimpl_m->out_size_m;
+ else
+ return pimpl_m->shared_process_params_m.get()->bytesWritten;
}
void ExecutionObject::SetFrameIndex(int idx)
void ExecutionObject::SetInputOutputBuffer(const ArgInfo& in, const ArgInfo& out)
{
- pimpl_m->SetupProcessKernel(in, out);
+ assert(in.ptr() != nullptr && in.size() > 0);
+ assert(out.ptr() != nullptr && out.size() > 0);
+
+ pimpl_m->in_m = IODeviceArgInfo(in);
+ pimpl_m->out_m = IODeviceArgInfo(out);
+}
+
+void ExecutionObject::SetInputOutputBuffer(const IODeviceArgInfo* in,
+ const IODeviceArgInfo* out)
+{
+ pimpl_m->in_m = *in;
+ pimpl_m->out_m = *out;
}
bool ExecutionObject::ProcessFrameStartAsync()
return ((float)GetProcessCycles())/frequency * 1000;
}
+const LayerOutput* ExecutionObject::GetOutputFromLayer(
+ uint32_t layer_index, uint32_t output_index) const
+{
+ return pimpl_m->GetOutputFromLayer(layer_index, output_index);
+}
+
+const LayerOutputs* ExecutionObject::GetOutputsFromAllLayers() const
+{
+ return pimpl_m->GetOutputsFromAllLayers();
+}
+
//
-// Create a kernel to call the "process" function
+// Allocate an OpenCL buffer for TIDL layer output buffer metadata.
+// The device will populate metadata for every buffer that is used as an
+// output buffer by a layer.
+//
+void ExecutionObject::EnableOutputBufferTrace()
+{
+ pimpl_m->trace_buf_params_sz_m = (sizeof(OCL_TIDL_BufParams)*
+ pimpl_m->num_network_layers_m*
+ TIDL_NUM_OUT_BUFS);
+
+ pimpl_m->trace_buf_params_m.reset(malloc_ddr<OCL_TIDL_BufParams>
+ (pimpl_m->trace_buf_params_sz_m));
+
+ // Device will update bufferId if there is valid data for the entry
+ OCL_TIDL_BufParams* bufferParams = pimpl_m->trace_buf_params_m.get();
+ for (uint32_t i = 0; i < pimpl_m->num_network_layers_m; i++)
+ for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
+ {
+ OCL_TIDL_BufParams *bufP =
+ &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
+ bufP->bufferId = UINT_MAX;
+ }
+}
+
+void
+ExecutionObject::WriteLayerOutputsToFile(const std::string& filename_prefix) const
+{
+ pimpl_m->WriteLayerOutputsToFile(filename_prefix);
+}
+
+
//
-bool
-ExecutionObject::Impl::SetupProcessKernel(const ArgInfo& in, const ArgInfo& out)
+// Create a kernel to call the "initialize" function
+//
+void
+ExecutionObject::Impl::SetupInitializeKernel(const DeviceArgInfo& create_arg,
+ const DeviceArgInfo& param_heap_arg,
+ size_t extmem_heap_size,
+ bool internal_input)
{
- in_m = in;
- out_m = out;
+ // Allocate a heap for TI DL to use on the device
+ tidl_extmem_heap_m.reset(malloc_ddr<char>(extmem_heap_size));
+
+ // Create a kernel for cleanup
+ KernelArgs cleanup_args;
+ k_cleanup_m.reset(new Kernel(device_m,
+ STRING(CLEANUP_KERNEL),
+ cleanup_args, device_index_m));
+
+ // Set up parameter struct for the initialize kernel
+ shared_initialize_params_m.reset(malloc_ddr<OCL_TIDL_InitializeParams>());
+ memset(shared_initialize_params_m.get(), 0,
+ sizeof(OCL_TIDL_InitializeParams));
+
+ shared_initialize_params_m->tidlHeapSize = extmem_heap_size;
+ shared_initialize_params_m->l2HeapSize = tidl::internal::DMEM1_SIZE;
+ shared_initialize_params_m->l1HeapSize = tidl::internal::DMEM0_SIZE;
+ shared_initialize_params_m->enableTrace = OCL_TIDL_TRACE_OFF;
+ shared_initialize_params_m->enableInternalInput = internal_input ? 1 : 0;
+
+ // Setup kernel arguments for initialize
+ KernelArgs args = { create_arg,
+ param_heap_arg,
+ DeviceArgInfo(tidl_extmem_heap_m.get(),
+ extmem_heap_size,
+ DeviceArgInfo::Kind::BUFFER),
+ DeviceArgInfo(shared_initialize_params_m.get(),
+ sizeof(OCL_TIDL_InitializeParams),
+ DeviceArgInfo::Kind::BUFFER),
+ device_m->type() == CL_DEVICE_TYPE_ACCELERATOR ?
+ DeviceArgInfo(nullptr, tidl::internal::DMEM1_SIZE,
+ DeviceArgInfo::Kind::LOCAL):
+ DeviceArgInfo(nullptr, 4,
+ DeviceArgInfo::Kind::LOCAL) };
+
+ k_initialize_m.reset(new Kernel(device_m,
+ STRING(INIT_KERNEL), args,
+ device_index_m));
+}
+//
+// Create a kernel to call the "process" function
+//
+void
+ExecutionObject::Impl::SetupProcessKernel()
+{
shared_process_params_m.reset(malloc_ddr<OCL_TIDL_ProcessParams>());
shared_process_params_m->enableTrace = OCL_TIDL_TRACE_OFF;
shared_process_params_m->enableInternalInput =
shared_initialize_params_m->enableInternalInput;
shared_process_params_m->cycles = 0;
- if (shared_process_params_m->enableInternalInput == 0)
- assert(in.ptr() != nullptr && in.size() > 0);
+ KernelArgs args = { DeviceArgInfo(shared_process_params_m.get(),
+ sizeof(OCL_TIDL_ProcessParams),
+ DeviceArgInfo::Kind::BUFFER),
+ DeviceArgInfo(tidl_extmem_heap_m.get(),
+ shared_initialize_params_m->tidlHeapSize,
+ DeviceArgInfo::Kind::BUFFER),
+ DeviceArgInfo(trace_buf_params_m.get(),
+ trace_buf_params_sz_m,
+ DeviceArgInfo::Kind::BUFFER)
- KernelArgs args = { ArgInfo(shared_process_params_m.get(),
- sizeof(OCL_TIDL_ProcessParams)),
- in,
- out,
- ArgInfo(tidl_extmem_heap_m.get(),
- shared_initialize_params_m->tidlHeapSize)
};
k_process_m.reset(new Kernel(device_m,
- STRING(PROCESS_KERNEL), args, device_index_m));
-
- return true;
+ STRING(PROCESS_KERNEL), args,
+ device_index_m));
}
return width*height*n;
}
+//
+// Copy from host buffer to TIDL device buffer
+//
void ExecutionObject::Impl::HostWriteNetInput()
{
- char* readPtr = (char *) in_m.ptr();
- PipeInfo *pipe = in_m.GetPipe();
+ const char* readPtr = (const char *) in_m.GetArg().ptr();
+ const PipeInfo& pipe = in_m.GetPipe();
for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
{
}
else
{
- shared_process_params_m->inBufAddr[i] = pipe->bufAddr_m[i];
+ shared_process_params_m->inBufAddr[i] = pipe.bufAddr_m[i];
}
- shared_process_params_m->inDataQ[i] = pipe->dataQ_m[i];
+ shared_process_params_m->inDataQ[i] = pipe.dataQ_m[i];
}
}
+//
+// Copy from TIDL device buffer into host buffer
+//
void ExecutionObject::Impl::HostReadNetOutput()
{
- char* writePtr = (char *) out_m.ptr();
- PipeInfo *pipe = out_m.GetPipe();
+ char* writePtr = (char *) out_m.GetArg().ptr();
+ PipeInfo& pipe = out_m.GetPipe();
for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
{
outBuf->numChannels));
}
- pipe->dataQ_m[i] = shared_process_params_m->outDataQ[i];
- pipe->bufAddr_m[i] = shared_initialize_params_m->bufAddrBase
+ pipe.dataQ_m[i] = shared_process_params_m->outDataQ[i];
+ pipe.bufAddr_m[i] = shared_initialize_params_m->bufAddrBase
+ outBuf->bufPlaneBufOffset;
}
- shared_process_params_m->bytesWritten = writePtr - (char *) out_m.ptr();
+ shared_process_params_m->bytesWritten = writePtr -
+ (char *) out_m.GetArg().ptr();
}
void ExecutionObject::Impl::ComputeInputOutputSizes()
return false;
}
+
+//
+// Write the trace data to output files
+//
+void
+ExecutionObject::Impl::WriteLayerOutputsToFile(const std::string& filename_prefix) const
+{
+ if (trace_buf_params_sz_m == 0)
+ return;
+
+ OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
+
+ for (uint32_t i = 0; i < num_network_layers_m; i++)
+ for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
+ {
+ OCL_TIDL_BufParams* buf = &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
+
+ if (buf->bufferId == UINT_MAX)
+ continue;
+
+ size_t buffer_size = buf->numChannels * buf->ROIHeight *
+ buf->ROIWidth;
+
+ char *tmp = new char[buffer_size];
+
+ if (tmp == nullptr)
+ throw Exception("Out of memory, new failed",
+ __FILE__, __FUNCTION__, __LINE__);
+
+ writeDataS8(
+ tmp,
+ (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
+ + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
+ + OCL_TIDL_MAX_PAD_SIZE,
+ buf->numChannels,
+ buf->ROIWidth,
+ buf->ROIHeight,
+ buf->bufPlaneWidth,
+ ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
+ buf->numChannels));
+
+ std::string filename(filename_prefix);
+ filename += std::to_string(buf->bufferId) + "_";
+ filename += std::to_string(buf->ROIWidth) + "x";
+ filename += std::to_string(buf->ROIHeight) + ".bin";
+
+ std::ofstream ofs;
+ ofs.open(filename, std::ofstream::out);
+ ofs.write(tmp, buffer_size);
+ ofs.close();
+
+ delete[] tmp;
+ }
+}
+
+
+const LayerOutput* ExecutionObject::Impl::GetOutputFromLayer(
+ uint32_t layer_index, uint32_t output_index) const
+{
+ if (trace_buf_params_sz_m == 0)
+ return nullptr;
+
+ if (layer_index > num_network_layers_m || output_index > TIDL_NUM_OUT_BUFS)
+ return nullptr;
+
+ OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
+ OCL_TIDL_BufParams* buf = &bufferParams[layer_index*TIDL_NUM_OUT_BUFS+
+ output_index];
+
+ if (buf->bufferId == UINT_MAX)
+ return nullptr;
+
+ size_t buffer_size = buf->numChannels * buf->ROIHeight *
+ buf->ROIWidth;
+
+ char *data = new char[buffer_size];
+
+ if (data == nullptr)
+ throw Exception("Out of memory, new failed",
+ __FILE__, __FUNCTION__, __LINE__);
+
+ writeDataS8(data,
+ (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
+ + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
+ + OCL_TIDL_MAX_PAD_SIZE,
+ buf->numChannels,
+ buf->ROIWidth,
+ buf->ROIHeight,
+ buf->bufPlaneWidth,
+ ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
+ buf->numChannels));
+
+ return new LayerOutput(layer_index, output_index, buf->bufferId,
+ buf->numROIs, buf->numChannels, buf->ROIHeight,
+ buf->ROIWidth, data);
+}
+
+const LayerOutputs* ExecutionObject::Impl::GetOutputsFromAllLayers() const
+{
+ LayerOutputs* result = new LayerOutputs;
+
+ for (uint32_t i=0; i < num_network_layers_m; i++)
+ for (int j=0; j < TIDL_NUM_OUT_BUFS; j++)
+ {
+ const LayerOutput* lo = GetOutputFromLayer(i, j);
+ if (lo)
+ result->push_back(std::unique_ptr<const LayerOutput>{ lo });
+ }
+
+ return result;
+}
+
+LayerOutput::LayerOutput(int layer_index, int output_index, int buffer_id,
+ int num_roi, int num_channels, size_t height,
+ size_t width, const char* data):
+ layer_index_m(layer_index), buffer_id_m(buffer_id),
+ num_roi_m(num_roi), num_channels_m(num_channels),
+ height_m(height), width_m(width), data_m(data)
+{ }
+
+LayerOutput::~LayerOutput()
+{
+ delete[] data_m;
+}