Remove implementation details from ArgInfo
[tidl/tidl-api.git] / tidl_api / src / execution_object.cpp
1 /******************************************************************************
2  * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *      * Redistributions of source code must retain the above copyright
8  *        notice, this list of conditions and the following disclaimer.
9  *      * Redistributions in binary form must reproduce the above copyright
10  *        notice, this list of conditions and the following disclaimer in the
11  *        documentation and/or other materials provided with the distribution.
12  *      * Neither the name of Texas Instruments Incorporated nor the
13  *        names of its contributors may be used to endorse or promote products
14  *        derived from this software without specific prior written permission.
15  *
16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  *  THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
29 /*! \file execution_object.cpp */
31 #include "executor.h"
32 #include "execution_object.h"
33 #include "trace.h"
34 #include "ocl_device.h"
35 #include "parameters.h"
36 #include "configuration.h"
37 #include "common_defines.h"
38 #include <string.h>
39 #include "tidl_create_params.h"
40 #include <fstream>
41 #include <climits>
43 using namespace tidl;
45 /*! @class PipeInfo
46  *  @brief Describe input and output required by piping output and input
47  *         between Execution Objects
48  */
49 class PipeInfo
50 {
51     public:
52         uint32_t dataQ_m[OCL_TIDL_MAX_IN_BUFS];
53         uint32_t bufAddr_m[OCL_TIDL_MAX_IN_BUFS];
54 };
57 class IODeviceArgInfo
58 {
59     public:
60         IODeviceArgInfo(const ArgInfo& arg):
61                         arg_m(arg, DeviceArgInfo::Kind::BUFFER) {} explicit
63         IODeviceArgInfo(): arg_m(nullptr, 0, DeviceArgInfo::Kind::BUFFER) {}
65         PipeInfo&            GetPipe()      { return pipe_m; }
66         const DeviceArgInfo& GetArg() const { return arg_m; }
68     private:
69         DeviceArgInfo arg_m;
70         PipeInfo      pipe_m;
71 };
73 class ExecutionObject::Impl
74 {
75     public:
76         Impl(Device* d, uint8_t device_index,
77              const DeviceArgInfo& create_arg,
78              const DeviceArgInfo& param_heap_arg,
79              size_t extmem_heap_size,
80              bool   internal_input);
81         ~Impl() {}
83         bool RunAsync(CallType ct);
84         bool Wait    (CallType ct);
86         Device*                         device_m;
88         up_malloc_ddr<char>             tidl_extmem_heap_m;
89         up_malloc_ddr<OCL_TIDL_InitializeParams> shared_initialize_params_m;
90         up_malloc_ddr<OCL_TIDL_ProcessParams>    shared_process_params_m;
92         size_t                          in_size_m;
93         size_t                          out_size_m;
94         IODeviceArgInfo                 in_m;
95         IODeviceArgInfo                 out_m;
97         // Frame being processed by the EO
98         int                             current_frame_idx_m;
100         // Trace related
101         void WriteLayerOutputsToFile (const std::string& filename_prefix) const;
103         const LayerOutput* GetOutputFromLayer (uint32_t layer_index,
104                                                uint32_t output_index) const;
105         const LayerOutputs* GetOutputsFromAllLayers() const;
107         uint32_t                          num_network_layers_m;
108         up_malloc_ddr<OCL_TIDL_BufParams> trace_buf_params_m;
109         size_t                            trace_buf_params_sz_m;
111     private:
112         void SetupInitializeKernel(const DeviceArgInfo& create_arg,
113                                    const DeviceArgInfo& param_heap_arg,
114                                    size_t extmem_heap_size,
115                                    bool   internal_input);
116         void SetupProcessKernel();
118         void HostWriteNetInput();
119         void HostReadNetOutput();
120         void ComputeInputOutputSizes();
122         // Index of the OpenCL device/queue used by this EO
123         uint8_t                         device_index_m;
125         std::unique_ptr<Kernel>         k_initialize_m;
126         std::unique_ptr<Kernel>         k_process_m;
127         std::unique_ptr<Kernel>         k_cleanup_m;
130 };
133 ExecutionObject::ExecutionObject(Device* d,
134                                  uint8_t device_index,
135                                  const ArgInfo& create_arg,
136                                  const ArgInfo& param_heap_arg,
137                                  size_t extmem_heap_size,
138                                  bool   internal_input)
140     DeviceArgInfo create_arg_d(create_arg, DeviceArgInfo::Kind::BUFFER);
141     DeviceArgInfo param_heap_arg_d(param_heap_arg, DeviceArgInfo::Kind::BUFFER);
143     pimpl_m = std::unique_ptr<ExecutionObject::Impl>
144               { new ExecutionObject::Impl(d, device_index,
145                                           create_arg_d,
146                                           param_heap_arg_d,
147                                           extmem_heap_size,
148                                           internal_input) };
152 ExecutionObject::Impl::Impl(Device* d,
153                                  uint8_t device_index,
154                                  const DeviceArgInfo& create_arg,
155                                  const DeviceArgInfo& param_heap_arg,
156                                  size_t extmem_heap_size,
157                                  bool   internal_input):
158     device_m(d),
159     tidl_extmem_heap_m (nullptr, &__free_ddr),
160     shared_initialize_params_m(nullptr, &__free_ddr),
161     shared_process_params_m(nullptr, &__free_ddr),
162     in_size_m(0),
163     out_size_m(0),
164     in_m(),
165     out_m(),
166     current_frame_idx_m(0),
167     num_network_layers_m(0),
168     trace_buf_params_m(nullptr, &__free_ddr),
169     trace_buf_params_sz_m(0),
170     device_index_m(device_index),
171     k_initialize_m(nullptr),
172     k_process_m(nullptr),
173     k_cleanup_m(nullptr)
175     SetupInitializeKernel(create_arg, param_heap_arg, extmem_heap_size,
176                           internal_input);
178     SetupProcessKernel();
180     // Save number of layers in the network
181     const TIDL_CreateParams* cp =
182                 static_cast<const TIDL_CreateParams *>(create_arg.ptr());
183     num_network_layers_m = cp->net.numLayers;
186 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
187 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
188 // unique_ptr's destructor requires a complete type in order to invoke delete
189 ExecutionObject::~ExecutionObject() = default;
191 char* ExecutionObject::GetInputBufferPtr() const
193     return static_cast<char *>(pimpl_m->in_m.GetArg().ptr());
196 size_t ExecutionObject::GetInputBufferSizeInBytes() const
198     const DeviceArgInfo& arg = pimpl_m->in_m.GetArg();
199     if    (arg.ptr() == nullptr)  return pimpl_m->in_size_m;
200     else                          return arg.size();
203 char* ExecutionObject::GetOutputBufferPtr() const
205     return static_cast<char *>(pimpl_m->out_m.GetArg().ptr());
208 size_t ExecutionObject::GetOutputBufferSizeInBytes() const
210     const DeviceArgInfo& arg = pimpl_m->out_m.GetArg();
211     if   (arg.ptr() == nullptr)
212         return pimpl_m->out_size_m;
213     else
214         return pimpl_m->shared_process_params_m.get()->bytesWritten;
217 void  ExecutionObject::SetFrameIndex(int idx)
219     pimpl_m->current_frame_idx_m = idx;
222 int ExecutionObject::GetFrameIndex() const
224     return pimpl_m->current_frame_idx_m;
227 void ExecutionObject::SetInputOutputBuffer(const ArgInfo& in, const ArgInfo& out)
229     assert(in.ptr() != nullptr && in.size() > 0);
230     assert(out.ptr() != nullptr && out.size() > 0);
232     pimpl_m->in_m  = IODeviceArgInfo(in);
233     pimpl_m->out_m = IODeviceArgInfo(out);
236 bool ExecutionObject::ProcessFrameStartAsync()
238     return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS);
241 bool ExecutionObject::ProcessFrameWait()
243     return pimpl_m->Wait(ExecutionObject::CallType::PROCESS);
246 bool ExecutionObject::RunAsync (CallType ct)
248     return pimpl_m->RunAsync(ct);
251 bool ExecutionObject::Wait (CallType ct)
253     return pimpl_m->Wait(ct);
256 uint64_t ExecutionObject::GetProcessCycles() const
258     uint8_t factor = 1;
260     // ARP32 running at half frequency of VCOP, multiply by 2 for VCOP cycles
261     if (pimpl_m->device_m->type() == CL_DEVICE_TYPE_CUSTOM)
262         factor = 2;
264     return pimpl_m->shared_process_params_m.get()->cycles * factor;
267 float ExecutionObject::GetProcessTimeInMilliSeconds() const
269     float frequency = pimpl_m->device_m->GetFrequencyInMhz() * 1000000;
270     return ((float)GetProcessCycles())/frequency * 1000;
273 const LayerOutput* ExecutionObject::GetOutputFromLayer(
274                          uint32_t layer_index, uint32_t output_index) const
276     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
279 const LayerOutputs* ExecutionObject::GetOutputsFromAllLayers() const
281     return pimpl_m->GetOutputsFromAllLayers();
284 //
285 // Allocate an OpenCL buffer for TIDL layer output buffer metadata.
286 // The device will populate metadata for every buffer that is used as an
287 // output buffer by a layer.
288 //
289 void ExecutionObject::EnableOutputBufferTrace()
291     pimpl_m->trace_buf_params_sz_m = (sizeof(OCL_TIDL_BufParams)*
292                                        pimpl_m->num_network_layers_m*
293                                        TIDL_NUM_OUT_BUFS);
295     pimpl_m->trace_buf_params_m.reset(malloc_ddr<OCL_TIDL_BufParams>
296                                       (pimpl_m->trace_buf_params_sz_m));
298     // Device will update bufferId if there is valid data for the entry
299     OCL_TIDL_BufParams* bufferParams = pimpl_m->trace_buf_params_m.get();
300     for (uint32_t i = 0; i < pimpl_m->num_network_layers_m; i++)
301         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
302         {
303             OCL_TIDL_BufParams *bufP =
304                                 &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
305             bufP->bufferId = UINT_MAX;
306         }
309 void
310 ExecutionObject::WriteLayerOutputsToFile(const std::string& filename_prefix) const
312     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
316 //
317 // Create a kernel to call the "initialize" function
318 //
319 void
320 ExecutionObject::Impl::SetupInitializeKernel(const DeviceArgInfo& create_arg,
321                                              const DeviceArgInfo& param_heap_arg,
322                                              size_t extmem_heap_size,
323                                              bool   internal_input)
325     // Allocate a heap for TI DL to use on the device
326     tidl_extmem_heap_m.reset(malloc_ddr<char>(extmem_heap_size));
328     // Create a kernel for cleanup
329     KernelArgs cleanup_args;
330     k_cleanup_m.reset(new Kernel(device_m,
331                                  STRING(CLEANUP_KERNEL),
332                                  cleanup_args, device_index_m));
334     // Set up parameter struct for the initialize kernel
335     shared_initialize_params_m.reset(malloc_ddr<OCL_TIDL_InitializeParams>());
336     memset(shared_initialize_params_m.get(), 0,
337            sizeof(OCL_TIDL_InitializeParams));
339     shared_initialize_params_m->tidlHeapSize = extmem_heap_size;
340     shared_initialize_params_m->l2HeapSize   = tidl::internal::DMEM1_SIZE;
341     shared_initialize_params_m->l1HeapSize   = tidl::internal::DMEM0_SIZE;
342     shared_initialize_params_m->enableTrace  = OCL_TIDL_TRACE_OFF;
343     shared_initialize_params_m->enableInternalInput = internal_input ? 1 : 0;
345     // Setup kernel arguments for initialize
346     KernelArgs args = { create_arg,
347                         param_heap_arg,
348                         DeviceArgInfo(tidl_extmem_heap_m.get(),
349                                       extmem_heap_size,
350                                       DeviceArgInfo::Kind::BUFFER),
351                         DeviceArgInfo(shared_initialize_params_m.get(),
352                                       sizeof(OCL_TIDL_InitializeParams),
353                                       DeviceArgInfo::Kind::BUFFER),
354                         device_m->type() == CL_DEVICE_TYPE_ACCELERATOR ?
355                             DeviceArgInfo(nullptr, tidl::internal::DMEM1_SIZE,
356                                           DeviceArgInfo::Kind::LOCAL):
357                             DeviceArgInfo(nullptr, 4,
358                                           DeviceArgInfo::Kind::LOCAL) };
360     k_initialize_m.reset(new Kernel(device_m,
361                                     STRING(INIT_KERNEL), args,
362                                     device_index_m));
365 //
366 // Create a kernel to call the "process" function
367 //
368 void
369 ExecutionObject::Impl::SetupProcessKernel()
371     shared_process_params_m.reset(malloc_ddr<OCL_TIDL_ProcessParams>());
372     shared_process_params_m->enableTrace = OCL_TIDL_TRACE_OFF;
373     shared_process_params_m->enableInternalInput =
374                                shared_initialize_params_m->enableInternalInput;
375     shared_process_params_m->cycles = 0;
377     KernelArgs args = { DeviceArgInfo(shared_process_params_m.get(),
378                                       sizeof(OCL_TIDL_ProcessParams),
379                                       DeviceArgInfo::Kind::BUFFER),
380                         DeviceArgInfo(tidl_extmem_heap_m.get(),
381                                       shared_initialize_params_m->tidlHeapSize,
382                                       DeviceArgInfo::Kind::BUFFER),
383                         DeviceArgInfo(trace_buf_params_m.get(),
384                                       trace_buf_params_sz_m,
385                                       DeviceArgInfo::Kind::BUFFER)
387                       };
389     k_process_m.reset(new Kernel(device_m,
390                                  STRING(PROCESS_KERNEL), args,
391                                  device_index_m));
395 static size_t readDataS8(const char *readPtr, char *ptr, int roi, int n,
396                          int width, int height, int pitch,
397                          int chOffset)
399     if (!readPtr)  return 0;
401     for(int i2 = 0; i2 < roi; i2++)
402         for(int i0 = 0; i0 < n; i0++)
403             for(int i1 = 0; i1 < height; i1++)
404                 memcpy(&ptr[i2*n*chOffset + i0*chOffset + i1*pitch],
405                        &readPtr[i2*n*width*height + i0*width*height+ i1*width],
406                        width);
408     return width*height*n*roi;
411 static size_t writeDataS8(char *writePtr, const char *ptr, int n, int width,
412                           int height, int pitch, int chOffset)
414     if (!writePtr)  return 0;
416     for(int i0 = 0; i0 < n; i0++)
417         for(int i1 = 0; i1 < height; i1++)
418             memcpy(&writePtr[i0*width*height + i1*width],
419                    &ptr[i0*chOffset + i1*pitch],
420                    width);
422     return width*height*n;
425 //
426 // Copy from host buffer to TIDL device buffer
427 //
428 void ExecutionObject::Impl::HostWriteNetInput()
430     const char*     readPtr  = (const char *) in_m.GetArg().ptr();
431     const PipeInfo& pipe     = in_m.GetPipe();
433     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
434     {
435         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
437         if (shared_process_params_m->enableInternalInput == 0)
438         {
439             readPtr += readDataS8(
440                 readPtr,
441                 (char *) tidl_extmem_heap_m.get() + inBuf->bufPlaneBufOffset
442                     + inBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
443                     + OCL_TIDL_MAX_PAD_SIZE,
444                 inBuf->numROIs,
445                 inBuf->numChannels,
446                 inBuf->ROIWidth,
447                 inBuf->ROIHeight,
448                 inBuf->bufPlaneWidth,
449                 ((inBuf->bufPlaneWidth * inBuf->bufPlaneHeight) /
450                  inBuf->numChannels));
451         }
452         else
453         {
454             shared_process_params_m->inBufAddr[i] = pipe.bufAddr_m[i];
455         }
457         shared_process_params_m->inDataQ[i]   = pipe.dataQ_m[i];
458     }
461 //
462 // Copy from TIDL device buffer into host buffer
463 //
464 void ExecutionObject::Impl::HostReadNetOutput()
466     char* writePtr = (char *) out_m.GetArg().ptr();
467     PipeInfo& pipe = out_m.GetPipe();
469     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
470     {
471         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
472         if (writePtr != nullptr)
473         {
474             writePtr += writeDataS8(
475                 writePtr,
476                 (char *) tidl_extmem_heap_m.get() + outBuf->bufPlaneBufOffset
477                     + outBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
478                     + OCL_TIDL_MAX_PAD_SIZE,
479                 outBuf->numChannels,
480                 outBuf->ROIWidth,
481                 outBuf->ROIHeight,
482                 outBuf->bufPlaneWidth,
483                 ((outBuf->bufPlaneWidth * outBuf->bufPlaneHeight)/
484                  outBuf->numChannels));
485         }
487         pipe.dataQ_m[i]   = shared_process_params_m->outDataQ[i];
488         pipe.bufAddr_m[i] = shared_initialize_params_m->bufAddrBase
489                            + outBuf->bufPlaneBufOffset;
490     }
491     shared_process_params_m->bytesWritten = writePtr -
492                                             (char *) out_m.GetArg().ptr();
495 void ExecutionObject::Impl::ComputeInputOutputSizes()
497     if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)  return;
499     if (shared_initialize_params_m->numInBufs > OCL_TIDL_MAX_IN_BUFS ||
500         shared_initialize_params_m->numOutBufs > OCL_TIDL_MAX_OUT_BUFS)
501     {
502         std::cout << "Num input/output bufs ("
503                   << shared_initialize_params_m->numInBufs << ", "
504                   << shared_initialize_params_m->numOutBufs
505                   << ") exceeded limit!" << std::endl;
506         shared_initialize_params_m->errorCode = OCL_TIDL_INIT_FAIL;
507         return;
508     }
510     in_size_m  = 0;
511     out_size_m = 0;
512     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
513     {
514         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
515         in_size_m += inBuf->numROIs * inBuf->numChannels * inBuf->ROIWidth *
516                      inBuf->ROIHeight;
517     }
518     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
519     {
520         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
521         out_size_m += outBuf->numChannels * outBuf->ROIWidth *outBuf->ROIHeight;
522     }
526 bool ExecutionObject::Impl::RunAsync(CallType ct)
528     switch (ct)
529     {
530         case CallType::INIT:
531         {
532             k_initialize_m->RunAsync();
533             break;
534         }
535         case CallType::PROCESS:
536         {
537             shared_process_params_m->frameIdx = current_frame_idx_m;
538             shared_process_params_m->bytesWritten = 0;
539             HostWriteNetInput();
540             k_process_m->RunAsync();
541             break;
542         }
543         case CallType::CLEANUP:
544         {
545             k_cleanup_m->RunAsync();
546             break;
547         }
548         default:
549             return false;
550     }
552     return true;
555 bool ExecutionObject::Impl::Wait(CallType ct)
557     switch (ct)
558     {
559         case CallType::INIT:
560         {
561             bool has_work = k_initialize_m->Wait();
563             if (has_work)
564             {
565                 ComputeInputOutputSizes();
566                 if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)
567                     throw Exception(shared_initialize_params_m->errorCode,
568                                     __FILE__, __FUNCTION__, __LINE__);
569             }
570             return has_work;
571         }
572         case CallType::PROCESS:
573         {
574             bool has_work = k_process_m->Wait();
575             if (has_work)
576             {
577                 if (shared_process_params_m->errorCode != OCL_TIDL_SUCCESS)
578                     throw Exception(shared_process_params_m->errorCode,
579                                     __FILE__, __FUNCTION__, __LINE__);
580                 HostReadNetOutput();
581             }
583             return has_work;
584         }
585         case CallType::CLEANUP:
586         {
587             return k_cleanup_m->Wait();
588             break;
589         }
590         default:
591             return false;
592     }
594     return false;
597 //
598 // Write the trace data to output files
599 //
600 void
601 ExecutionObject::Impl::WriteLayerOutputsToFile(const std::string& filename_prefix) const
603     if (trace_buf_params_sz_m == 0)
604         return;
606     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
608     for (uint32_t i = 0; i < num_network_layers_m; i++)
609         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
610         {
611             OCL_TIDL_BufParams* buf = &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
613             if (buf->bufferId == UINT_MAX)
614                 continue;
616             size_t buffer_size = buf->numChannels * buf->ROIHeight *
617                                  buf->ROIWidth;
619             char *tmp = new char[buffer_size];
621             if (tmp == nullptr)
622                 throw Exception("Out of memory, new failed",
623                         __FILE__, __FUNCTION__, __LINE__);
625             writeDataS8(
626                 tmp,
627                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
628                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
629                 + OCL_TIDL_MAX_PAD_SIZE,
630                 buf->numChannels,
631                 buf->ROIWidth,
632                 buf->ROIHeight,
633                 buf->bufPlaneWidth,
634                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
635                  buf->numChannels));
637             std::string filename(filename_prefix);
638             filename += std::to_string(buf->bufferId) + "_";
639             filename += std::to_string(buf->ROIWidth) + "x";
640             filename += std::to_string(buf->ROIHeight) + ".bin";
642             std::ofstream ofs;
643             ofs.open(filename, std::ofstream::out);
644             ofs.write(tmp, buffer_size);
645             ofs.close();
647             delete[] tmp;
648         }
652 const LayerOutput* ExecutionObject::Impl::GetOutputFromLayer(
653                             uint32_t layer_index, uint32_t output_index) const
655     if (trace_buf_params_sz_m == 0)
656         return nullptr;
658     if (layer_index > num_network_layers_m || output_index > TIDL_NUM_OUT_BUFS)
659         return nullptr;
661     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
662     OCL_TIDL_BufParams* buf = &bufferParams[layer_index*TIDL_NUM_OUT_BUFS+
663                                             output_index];
665     if (buf->bufferId == UINT_MAX)
666         return nullptr;
668     size_t buffer_size = buf->numChannels * buf->ROIHeight *
669                          buf->ROIWidth;
671     char *data = new char[buffer_size];
673     if (data == nullptr)
674         throw Exception("Out of memory, new failed",
675                 __FILE__, __FUNCTION__, __LINE__);
677     writeDataS8(data,
678                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
679                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
680                 + OCL_TIDL_MAX_PAD_SIZE,
681                 buf->numChannels,
682                 buf->ROIWidth,
683                 buf->ROIHeight,
684                 buf->bufPlaneWidth,
685                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
686                  buf->numChannels));
688     return new LayerOutput(layer_index, output_index, buf->bufferId,
689                            buf->numROIs, buf->numChannels, buf->ROIHeight,
690                            buf->ROIWidth, data);
693 const LayerOutputs* ExecutionObject::Impl::GetOutputsFromAllLayers() const
695     LayerOutputs* result = new LayerOutputs;
697     for (uint32_t i=0; i < num_network_layers_m; i++)
698         for (int j=0; j < TIDL_NUM_OUT_BUFS; j++)
699         {
700             const LayerOutput* lo = GetOutputFromLayer(i, j);
701             if (lo)
702                 result->push_back(std::unique_ptr<const LayerOutput>{ lo });
703         }
705     return result;
708 LayerOutput::LayerOutput(int layer_index, int output_index, int buffer_id,
709                          int num_roi, int num_channels, size_t height,
710                          size_t width, const char* data):
711                         layer_index_m(layer_index), buffer_id_m(buffer_id),
712                         num_roi_m(num_roi), num_channels_m(num_channels),
713                         height_m(height), width_m(width), data_m(data)
714 { }
716 LayerOutput::~LayerOutput()
718     delete[] data_m;