d722ebb196669019fbc3071338397d62a9a79ba9
[tidl/tidl-api.git] / tidl_api / src / execution_object.cpp
1 /******************************************************************************
2  * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *      * Redistributions of source code must retain the above copyright
8  *        notice, this list of conditions and the following disclaimer.
9  *      * Redistributions in binary form must reproduce the above copyright
10  *        notice, this list of conditions and the following disclaimer in the
11  *        documentation and/or other materials provided with the distribution.
12  *      * Neither the name of Texas Instruments Incorporated nor the
13  *        names of its contributors may be used to endorse or promote products
14  *        derived from this software without specific prior written permission.
15  *
16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  *  THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
29 /*! \file execution_object.cpp */
31 #include <string.h>
32 #include <fstream>
33 #include <climits>
34 #include "executor.h"
35 #include "execution_object.h"
36 #include "trace.h"
37 #include "ocl_device.h"
38 #include "parameters.h"
39 #include "configuration.h"
40 #include "common_defines.h"
41 #include "tidl_create_params.h"
42 #include "device_arginfo.h"
44 using namespace tidl;
46 class ExecutionObject::Impl
47 {
48     public:
49         Impl(Device* d, uint8_t device_index,
50              const DeviceArgInfo& create_arg,
51              const DeviceArgInfo& param_heap_arg,
52              size_t extmem_heap_size,
53              bool   internal_input);
54         ~Impl() {}
56         bool RunAsync(CallType ct);
57         bool Wait    (CallType ct);
59         Device*                         device_m;
61         up_malloc_ddr<char>             tidl_extmem_heap_m;
62         up_malloc_ddr<OCL_TIDL_InitializeParams> shared_initialize_params_m;
63         up_malloc_ddr<OCL_TIDL_ProcessParams>    shared_process_params_m;
65         size_t                          in_size_m;
66         size_t                          out_size_m;
67         IODeviceArgInfo                 in_m;
68         IODeviceArgInfo                 out_m;
70         // Frame being processed by the EO
71         int                             current_frame_idx_m;
73         // Trace related
74         void WriteLayerOutputsToFile (const std::string& filename_prefix) const;
76         const LayerOutput* GetOutputFromLayer (uint32_t layer_index,
77                                                uint32_t output_index) const;
78         const LayerOutputs* GetOutputsFromAllLayers() const;
80         uint32_t                          num_network_layers_m;
81         up_malloc_ddr<OCL_TIDL_BufParams> trace_buf_params_m;
82         size_t                            trace_buf_params_sz_m;
84     private:
85         void SetupInitializeKernel(const DeviceArgInfo& create_arg,
86                                    const DeviceArgInfo& param_heap_arg,
87                                    size_t extmem_heap_size,
88                                    bool   internal_input);
89         void SetupProcessKernel();
91         void HostWriteNetInput();
92         void HostReadNetOutput();
93         void ComputeInputOutputSizes();
95         // Index of the OpenCL device/queue used by this EO
96         uint8_t                         device_index_m;
98         std::unique_ptr<Kernel>         k_initialize_m;
99         std::unique_ptr<Kernel>         k_process_m;
100         std::unique_ptr<Kernel>         k_cleanup_m;
103 };
106 ExecutionObject::ExecutionObject(Device* d,
107                                  uint8_t device_index,
108                                  const ArgInfo& create_arg,
109                                  const ArgInfo& param_heap_arg,
110                                  size_t extmem_heap_size,
111                                  bool   internal_input)
113     DeviceArgInfo create_arg_d(create_arg, DeviceArgInfo::Kind::BUFFER);
114     DeviceArgInfo param_heap_arg_d(param_heap_arg, DeviceArgInfo::Kind::BUFFER);
116     pimpl_m = std::unique_ptr<ExecutionObject::Impl>
117               { new ExecutionObject::Impl(d, device_index,
118                                           create_arg_d,
119                                           param_heap_arg_d,
120                                           extmem_heap_size,
121                                           internal_input) };
125 ExecutionObject::Impl::Impl(Device* d,
126                                  uint8_t device_index,
127                                  const DeviceArgInfo& create_arg,
128                                  const DeviceArgInfo& param_heap_arg,
129                                  size_t extmem_heap_size,
130                                  bool   internal_input):
131     device_m(d),
132     tidl_extmem_heap_m (nullptr, &__free_ddr),
133     shared_initialize_params_m(nullptr, &__free_ddr),
134     shared_process_params_m(nullptr, &__free_ddr),
135     in_size_m(0),
136     out_size_m(0),
137     in_m(),
138     out_m(),
139     current_frame_idx_m(0),
140     num_network_layers_m(0),
141     trace_buf_params_m(nullptr, &__free_ddr),
142     trace_buf_params_sz_m(0),
143     device_index_m(device_index),
144     k_initialize_m(nullptr),
145     k_process_m(nullptr),
146     k_cleanup_m(nullptr)
148     SetupInitializeKernel(create_arg, param_heap_arg, extmem_heap_size,
149                           internal_input);
151     SetupProcessKernel();
153     // Save number of layers in the network
154     const TIDL_CreateParams* cp =
155                 static_cast<const TIDL_CreateParams *>(create_arg.ptr());
156     num_network_layers_m = cp->net.numLayers;
159 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
160 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
161 // unique_ptr's destructor requires a complete type in order to invoke delete
162 ExecutionObject::~ExecutionObject() = default;
164 char* ExecutionObject::GetInputBufferPtr() const
166     return static_cast<char *>(pimpl_m->in_m.GetArg().ptr());
169 size_t ExecutionObject::GetInputBufferSizeInBytes() const
171     const DeviceArgInfo& arg = pimpl_m->in_m.GetArg();
172     if    (arg.ptr() == nullptr)  return pimpl_m->in_size_m;
173     else                          return arg.size();
176 char* ExecutionObject::GetOutputBufferPtr() const
178     return static_cast<char *>(pimpl_m->out_m.GetArg().ptr());
181 size_t ExecutionObject::GetOutputBufferSizeInBytes() const
183     const DeviceArgInfo& arg = pimpl_m->out_m.GetArg();
184     if   (arg.ptr() == nullptr)
185         return pimpl_m->out_size_m;
186     else
187         return pimpl_m->shared_process_params_m.get()->bytesWritten;
190 void  ExecutionObject::SetFrameIndex(int idx)
192     pimpl_m->current_frame_idx_m = idx;
195 int ExecutionObject::GetFrameIndex() const
197     return pimpl_m->current_frame_idx_m;
200 void ExecutionObject::SetInputOutputBuffer(const ArgInfo& in, const ArgInfo& out)
202     assert(in.ptr() != nullptr && in.size() > 0);
203     assert(out.ptr() != nullptr && out.size() > 0);
205     pimpl_m->in_m  = IODeviceArgInfo(in);
206     pimpl_m->out_m = IODeviceArgInfo(out);
209 void ExecutionObject::SetInputOutputBuffer(const IODeviceArgInfo* in,
210                                            const IODeviceArgInfo* out)
212     pimpl_m->in_m  = *in;
213     pimpl_m->out_m = *out;
216 bool ExecutionObject::ProcessFrameStartAsync()
218     return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS);
221 bool ExecutionObject::ProcessFrameWait()
223     return pimpl_m->Wait(ExecutionObject::CallType::PROCESS);
226 bool ExecutionObject::RunAsync (CallType ct)
228     return pimpl_m->RunAsync(ct);
231 bool ExecutionObject::Wait (CallType ct)
233     return pimpl_m->Wait(ct);
236 uint64_t ExecutionObject::GetProcessCycles() const
238     uint8_t factor = 1;
240     // ARP32 running at half frequency of VCOP, multiply by 2 for VCOP cycles
241     if (pimpl_m->device_m->type() == CL_DEVICE_TYPE_CUSTOM)
242         factor = 2;
244     return pimpl_m->shared_process_params_m.get()->cycles * factor;
247 float ExecutionObject::GetProcessTimeInMilliSeconds() const
249     float frequency = pimpl_m->device_m->GetFrequencyInMhz() * 1000000;
250     return ((float)GetProcessCycles())/frequency * 1000;
253 const LayerOutput* ExecutionObject::GetOutputFromLayer(
254                          uint32_t layer_index, uint32_t output_index) const
256     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
259 const LayerOutputs* ExecutionObject::GetOutputsFromAllLayers() const
261     return pimpl_m->GetOutputsFromAllLayers();
264 //
265 // Allocate an OpenCL buffer for TIDL layer output buffer metadata.
266 // The device will populate metadata for every buffer that is used as an
267 // output buffer by a layer.
268 //
269 void ExecutionObject::EnableOutputBufferTrace()
271     pimpl_m->trace_buf_params_sz_m = (sizeof(OCL_TIDL_BufParams)*
272                                        pimpl_m->num_network_layers_m*
273                                        TIDL_NUM_OUT_BUFS);
275     pimpl_m->trace_buf_params_m.reset(malloc_ddr<OCL_TIDL_BufParams>
276                                       (pimpl_m->trace_buf_params_sz_m));
278     // Device will update bufferId if there is valid data for the entry
279     OCL_TIDL_BufParams* bufferParams = pimpl_m->trace_buf_params_m.get();
280     for (uint32_t i = 0; i < pimpl_m->num_network_layers_m; i++)
281         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
282         {
283             OCL_TIDL_BufParams *bufP =
284                                 &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
285             bufP->bufferId = UINT_MAX;
286         }
289 void
290 ExecutionObject::WriteLayerOutputsToFile(const std::string& filename_prefix) const
292     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
296 //
297 // Create a kernel to call the "initialize" function
298 //
299 void
300 ExecutionObject::Impl::SetupInitializeKernel(const DeviceArgInfo& create_arg,
301                                              const DeviceArgInfo& param_heap_arg,
302                                              size_t extmem_heap_size,
303                                              bool   internal_input)
305     // Allocate a heap for TI DL to use on the device
306     tidl_extmem_heap_m.reset(malloc_ddr<char>(extmem_heap_size));
308     // Create a kernel for cleanup
309     KernelArgs cleanup_args;
310     k_cleanup_m.reset(new Kernel(device_m,
311                                  STRING(CLEANUP_KERNEL),
312                                  cleanup_args, device_index_m));
314     // Set up parameter struct for the initialize kernel
315     shared_initialize_params_m.reset(malloc_ddr<OCL_TIDL_InitializeParams>());
316     memset(shared_initialize_params_m.get(), 0,
317            sizeof(OCL_TIDL_InitializeParams));
319     shared_initialize_params_m->tidlHeapSize = extmem_heap_size;
320     shared_initialize_params_m->l2HeapSize   = tidl::internal::DMEM1_SIZE;
321     shared_initialize_params_m->l1HeapSize   = tidl::internal::DMEM0_SIZE;
322     shared_initialize_params_m->enableTrace  = OCL_TIDL_TRACE_OFF;
323     shared_initialize_params_m->enableInternalInput = internal_input ? 1 : 0;
325     // Setup kernel arguments for initialize
326     KernelArgs args = { create_arg,
327                         param_heap_arg,
328                         DeviceArgInfo(tidl_extmem_heap_m.get(),
329                                       extmem_heap_size,
330                                       DeviceArgInfo::Kind::BUFFER),
331                         DeviceArgInfo(shared_initialize_params_m.get(),
332                                       sizeof(OCL_TIDL_InitializeParams),
333                                       DeviceArgInfo::Kind::BUFFER),
334                         device_m->type() == CL_DEVICE_TYPE_ACCELERATOR ?
335                             DeviceArgInfo(nullptr, tidl::internal::DMEM1_SIZE,
336                                           DeviceArgInfo::Kind::LOCAL):
337                             DeviceArgInfo(nullptr, 4,
338                                           DeviceArgInfo::Kind::LOCAL) };
340     k_initialize_m.reset(new Kernel(device_m,
341                                     STRING(INIT_KERNEL), args,
342                                     device_index_m));
345 //
346 // Create a kernel to call the "process" function
347 //
348 void
349 ExecutionObject::Impl::SetupProcessKernel()
351     shared_process_params_m.reset(malloc_ddr<OCL_TIDL_ProcessParams>());
352     shared_process_params_m->enableTrace = OCL_TIDL_TRACE_OFF;
353     shared_process_params_m->enableInternalInput =
354                                shared_initialize_params_m->enableInternalInput;
355     shared_process_params_m->cycles = 0;
357     KernelArgs args = { DeviceArgInfo(shared_process_params_m.get(),
358                                       sizeof(OCL_TIDL_ProcessParams),
359                                       DeviceArgInfo::Kind::BUFFER),
360                         DeviceArgInfo(tidl_extmem_heap_m.get(),
361                                       shared_initialize_params_m->tidlHeapSize,
362                                       DeviceArgInfo::Kind::BUFFER),
363                         DeviceArgInfo(trace_buf_params_m.get(),
364                                       trace_buf_params_sz_m,
365                                       DeviceArgInfo::Kind::BUFFER)
367                       };
369     k_process_m.reset(new Kernel(device_m,
370                                  STRING(PROCESS_KERNEL), args,
371                                  device_index_m));
375 static size_t readDataS8(const char *readPtr, char *ptr, int roi, int n,
376                          int width, int height, int pitch,
377                          int chOffset)
379     if (!readPtr)  return 0;
381     for(int i2 = 0; i2 < roi; i2++)
382         for(int i0 = 0; i0 < n; i0++)
383             for(int i1 = 0; i1 < height; i1++)
384                 memcpy(&ptr[i2*n*chOffset + i0*chOffset + i1*pitch],
385                        &readPtr[i2*n*width*height + i0*width*height+ i1*width],
386                        width);
388     return width*height*n*roi;
391 static size_t writeDataS8(char *writePtr, const char *ptr, int n, int width,
392                           int height, int pitch, int chOffset)
394     if (!writePtr)  return 0;
396     for(int i0 = 0; i0 < n; i0++)
397         for(int i1 = 0; i1 < height; i1++)
398             memcpy(&writePtr[i0*width*height + i1*width],
399                    &ptr[i0*chOffset + i1*pitch],
400                    width);
402     return width*height*n;
405 //
406 // Copy from host buffer to TIDL device buffer
407 //
408 void ExecutionObject::Impl::HostWriteNetInput()
410     const char*     readPtr  = (const char *) in_m.GetArg().ptr();
411     const PipeInfo& pipe     = in_m.GetPipe();
413     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
414     {
415         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
417         if (shared_process_params_m->enableInternalInput == 0)
418         {
419             readPtr += readDataS8(
420                 readPtr,
421                 (char *) tidl_extmem_heap_m.get() + inBuf->bufPlaneBufOffset
422                     + inBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
423                     + OCL_TIDL_MAX_PAD_SIZE,
424                 inBuf->numROIs,
425                 inBuf->numChannels,
426                 inBuf->ROIWidth,
427                 inBuf->ROIHeight,
428                 inBuf->bufPlaneWidth,
429                 ((inBuf->bufPlaneWidth * inBuf->bufPlaneHeight) /
430                  inBuf->numChannels));
431         }
432         else
433         {
434             shared_process_params_m->inBufAddr[i] = pipe.bufAddr_m[i];
435         }
437         shared_process_params_m->inDataQ[i]   = pipe.dataQ_m[i];
438     }
441 //
442 // Copy from TIDL device buffer into host buffer
443 //
444 void ExecutionObject::Impl::HostReadNetOutput()
446     char* writePtr = (char *) out_m.GetArg().ptr();
447     PipeInfo& pipe = out_m.GetPipe();
449     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
450     {
451         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
452         if (writePtr != nullptr)
453         {
454             writePtr += writeDataS8(
455                 writePtr,
456                 (char *) tidl_extmem_heap_m.get() + outBuf->bufPlaneBufOffset
457                     + outBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
458                     + OCL_TIDL_MAX_PAD_SIZE,
459                 outBuf->numChannels,
460                 outBuf->ROIWidth,
461                 outBuf->ROIHeight,
462                 outBuf->bufPlaneWidth,
463                 ((outBuf->bufPlaneWidth * outBuf->bufPlaneHeight)/
464                  outBuf->numChannels));
465         }
467         pipe.dataQ_m[i]   = shared_process_params_m->outDataQ[i];
468         pipe.bufAddr_m[i] = shared_initialize_params_m->bufAddrBase
469                            + outBuf->bufPlaneBufOffset;
470     }
471     shared_process_params_m->bytesWritten = writePtr -
472                                             (char *) out_m.GetArg().ptr();
475 void ExecutionObject::Impl::ComputeInputOutputSizes()
477     if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)  return;
479     if (shared_initialize_params_m->numInBufs > OCL_TIDL_MAX_IN_BUFS ||
480         shared_initialize_params_m->numOutBufs > OCL_TIDL_MAX_OUT_BUFS)
481     {
482         std::cout << "Num input/output bufs ("
483                   << shared_initialize_params_m->numInBufs << ", "
484                   << shared_initialize_params_m->numOutBufs
485                   << ") exceeded limit!" << std::endl;
486         shared_initialize_params_m->errorCode = OCL_TIDL_INIT_FAIL;
487         return;
488     }
490     in_size_m  = 0;
491     out_size_m = 0;
492     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
493     {
494         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
495         in_size_m += inBuf->numROIs * inBuf->numChannels * inBuf->ROIWidth *
496                      inBuf->ROIHeight;
497     }
498     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
499     {
500         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
501         out_size_m += outBuf->numChannels * outBuf->ROIWidth *outBuf->ROIHeight;
502     }
506 bool ExecutionObject::Impl::RunAsync(CallType ct)
508     switch (ct)
509     {
510         case CallType::INIT:
511         {
512             k_initialize_m->RunAsync();
513             break;
514         }
515         case CallType::PROCESS:
516         {
517             shared_process_params_m->frameIdx = current_frame_idx_m;
518             shared_process_params_m->bytesWritten = 0;
519             HostWriteNetInput();
520             k_process_m->RunAsync();
521             break;
522         }
523         case CallType::CLEANUP:
524         {
525             k_cleanup_m->RunAsync();
526             break;
527         }
528         default:
529             return false;
530     }
532     return true;
535 bool ExecutionObject::Impl::Wait(CallType ct)
537     switch (ct)
538     {
539         case CallType::INIT:
540         {
541             bool has_work = k_initialize_m->Wait();
543             if (has_work)
544             {
545                 ComputeInputOutputSizes();
546                 if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)
547                     throw Exception(shared_initialize_params_m->errorCode,
548                                     __FILE__, __FUNCTION__, __LINE__);
549             }
550             return has_work;
551         }
552         case CallType::PROCESS:
553         {
554             bool has_work = k_process_m->Wait();
555             if (has_work)
556             {
557                 if (shared_process_params_m->errorCode != OCL_TIDL_SUCCESS)
558                     throw Exception(shared_process_params_m->errorCode,
559                                     __FILE__, __FUNCTION__, __LINE__);
560                 HostReadNetOutput();
561             }
563             return has_work;
564         }
565         case CallType::CLEANUP:
566         {
567             return k_cleanup_m->Wait();
568             break;
569         }
570         default:
571             return false;
572     }
574     return false;
577 //
578 // Write the trace data to output files
579 //
580 void
581 ExecutionObject::Impl::WriteLayerOutputsToFile(const std::string& filename_prefix) const
583     if (trace_buf_params_sz_m == 0)
584         return;
586     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
588     for (uint32_t i = 0; i < num_network_layers_m; i++)
589         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
590         {
591             OCL_TIDL_BufParams* buf = &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
593             if (buf->bufferId == UINT_MAX)
594                 continue;
596             size_t buffer_size = buf->numChannels * buf->ROIHeight *
597                                  buf->ROIWidth;
599             char *tmp = new char[buffer_size];
601             if (tmp == nullptr)
602                 throw Exception("Out of memory, new failed",
603                         __FILE__, __FUNCTION__, __LINE__);
605             writeDataS8(
606                 tmp,
607                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
608                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
609                 + OCL_TIDL_MAX_PAD_SIZE,
610                 buf->numChannels,
611                 buf->ROIWidth,
612                 buf->ROIHeight,
613                 buf->bufPlaneWidth,
614                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
615                  buf->numChannels));
617             std::string filename(filename_prefix);
618             filename += std::to_string(buf->bufferId) + "_";
619             filename += std::to_string(buf->ROIWidth) + "x";
620             filename += std::to_string(buf->ROIHeight) + ".bin";
622             std::ofstream ofs;
623             ofs.open(filename, std::ofstream::out);
624             ofs.write(tmp, buffer_size);
625             ofs.close();
627             delete[] tmp;
628         }
632 const LayerOutput* ExecutionObject::Impl::GetOutputFromLayer(
633                             uint32_t layer_index, uint32_t output_index) const
635     if (trace_buf_params_sz_m == 0)
636         return nullptr;
638     if (layer_index > num_network_layers_m || output_index > TIDL_NUM_OUT_BUFS)
639         return nullptr;
641     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
642     OCL_TIDL_BufParams* buf = &bufferParams[layer_index*TIDL_NUM_OUT_BUFS+
643                                             output_index];
645     if (buf->bufferId == UINT_MAX)
646         return nullptr;
648     size_t buffer_size = buf->numChannels * buf->ROIHeight *
649                          buf->ROIWidth;
651     char *data = new char[buffer_size];
653     if (data == nullptr)
654         throw Exception("Out of memory, new failed",
655                 __FILE__, __FUNCTION__, __LINE__);
657     writeDataS8(data,
658                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
659                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
660                 + OCL_TIDL_MAX_PAD_SIZE,
661                 buf->numChannels,
662                 buf->ROIWidth,
663                 buf->ROIHeight,
664                 buf->bufPlaneWidth,
665                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
666                  buf->numChannels));
668     return new LayerOutput(layer_index, output_index, buf->bufferId,
669                            buf->numROIs, buf->numChannels, buf->ROIHeight,
670                            buf->ROIWidth, data);
673 const LayerOutputs* ExecutionObject::Impl::GetOutputsFromAllLayers() const
675     LayerOutputs* result = new LayerOutputs;
677     for (uint32_t i=0; i < num_network_layers_m; i++)
678         for (int j=0; j < TIDL_NUM_OUT_BUFS; j++)
679         {
680             const LayerOutput* lo = GetOutputFromLayer(i, j);
681             if (lo)
682                 result->push_back(std::unique_ptr<const LayerOutput>{ lo });
683         }
685     return result;
688 LayerOutput::LayerOutput(int layer_index, int output_index, int buffer_id,
689                          int num_roi, int num_channels, size_t height,
690                          size_t width, const char* data):
691                         layer_index_m(layer_index), buffer_id_m(buffer_id),
692                         num_roi_m(num_roi), num_channels_m(num_channels),
693                         height_m(height), width_m(width), data_m(data)
694 { }
696 LayerOutput::~LayerOutput()
698     delete[] data_m;