6ebf653d46710e9e9828986da257ee90f3053ebf
[tidl/tidl-api.git] / tidl_api / src / execution_object.cpp
1 /******************************************************************************
2  * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *      * Redistributions of source code must retain the above copyright
8  *        notice, this list of conditions and the following disclaimer.
9  *      * Redistributions in binary form must reproduce the above copyright
10  *        notice, this list of conditions and the following disclaimer in the
11  *        documentation and/or other materials provided with the distribution.
12  *      * Neither the name of Texas Instruments Incorporated nor the
13  *        names of its contributors may be used to endorse or promote products
14  *        derived from this software without specific prior written permission.
15  *
16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  *  THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
29 /*! \file execution_object.cpp */
31 #include "executor.h"
32 #include "execution_object.h"
33 #include "trace.h"
34 #include "ocl_device.h"
35 #include "parameters.h"
36 #include "configuration.h"
37 #include "common_defines.h"
38 #include <string.h>
39 #include "tidl_create_params.h"
40 #include <fstream>
41 #include <climits>
43 using namespace tidl;
45 class ExecutionObject::Impl
46 {
47     public:
48         Impl(Device* d, uint8_t device_index,
49              const ArgInfo& create_arg,
50              const ArgInfo& param_heap_arg,
51              size_t extmem_heap_size,
52              bool   internal_input);
53         ~Impl() {}
55         bool RunAsync(CallType ct);
56         bool Wait    (CallType ct);
58         Device*                         device_m;
60         up_malloc_ddr<char>             tidl_extmem_heap_m;
61         up_malloc_ddr<OCL_TIDL_InitializeParams> shared_initialize_params_m;
62         up_malloc_ddr<OCL_TIDL_ProcessParams>    shared_process_params_m;
64         size_t                          in_size_m;
65         size_t                          out_size_m;
66         ArgInfo                         in_m;
67         ArgInfo                         out_m;
69         // Frame being processed by the EO
70         int                             current_frame_idx_m;
72         // Trace related
73         void WriteLayerOutputsToFile (const std::string& filename_prefix) const;
75         const LayerOutput* GetOutputFromLayer (uint32_t layer_index,
76                                                uint32_t output_index) const;
77         const LayerOutputs* GetOutputsFromAllLayers() const;
79         uint32_t                          num_network_layers_m;
80         up_malloc_ddr<OCL_TIDL_BufParams> trace_buf_params_m;
81         size_t                            trace_buf_params_sz_m;
83     private:
84         void SetupInitializeKernel(const ArgInfo& create_arg,
85                                    const ArgInfo& param_heap_arg,
86                                    size_t extmem_heap_size,
87                                    bool   internal_input);
88         void SetupProcessKernel();
90         void HostWriteNetInput();
91         void HostReadNetOutput();
92         void ComputeInputOutputSizes();
94         // Index of the OpenCL device/queue used by this EO
95         uint8_t                         device_index_m;
97         std::unique_ptr<Kernel>         k_initialize_m;
98         std::unique_ptr<Kernel>         k_process_m;
99         std::unique_ptr<Kernel>         k_cleanup_m;
102 };
105 ExecutionObject::ExecutionObject(Device* d,
106                                  uint8_t device_index,
107                                  const ArgInfo& create_arg,
108                                  const ArgInfo& param_heap_arg,
109                                  size_t extmem_heap_size,
110                                  bool   internal_input)
112     pimpl_m = std::unique_ptr<ExecutionObject::Impl>
113               { new ExecutionObject::Impl(d, device_index,
114                                           create_arg,
115                                           param_heap_arg,
116                                           extmem_heap_size,
117                                           internal_input) };
121 ExecutionObject::Impl::Impl(Device* d,
122                                  uint8_t device_index,
123                                  const ArgInfo& create_arg,
124                                  const ArgInfo& param_heap_arg,
125                                  size_t extmem_heap_size,
126                                  bool   internal_input):
127     device_m(d),
128     tidl_extmem_heap_m (nullptr, &__free_ddr),
129     shared_initialize_params_m(nullptr, &__free_ddr),
130     shared_process_params_m(nullptr, &__free_ddr),
131     in_size_m(0),
132     out_size_m(0),
133     in_m(nullptr, 0),
134     out_m(nullptr, 0),
135     current_frame_idx_m(0),
136     num_network_layers_m(0),
137     trace_buf_params_m(nullptr, &__free_ddr),
138     trace_buf_params_sz_m(0),
139     device_index_m(device_index),
140     k_initialize_m(nullptr),
141     k_process_m(nullptr),
142     k_cleanup_m(nullptr)
144     SetupInitializeKernel(create_arg, param_heap_arg, extmem_heap_size,
145                           internal_input);
147     SetupProcessKernel();
149     // Save number of layers in the network
150     const TIDL_CreateParams* cp =
151                 static_cast<const TIDL_CreateParams *>(create_arg.ptr());
152     num_network_layers_m = cp->net.numLayers;
155 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
156 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
157 // unique_ptr's destructor requires a complete type in order to invoke delete
158 ExecutionObject::~ExecutionObject() = default;
160 char* ExecutionObject::GetInputBufferPtr() const
162     return static_cast<char *>(pimpl_m->in_m.ptr());
165 size_t ExecutionObject::GetInputBufferSizeInBytes() const
167     if (pimpl_m->in_m.ptr() == nullptr)  return pimpl_m->in_size_m;
168     else                                 return pimpl_m->in_m.size();
171 char* ExecutionObject::GetOutputBufferPtr() const
173     return static_cast<char *>(pimpl_m->out_m.ptr());
176 size_t ExecutionObject::GetOutputBufferSizeInBytes() const
178     if (pimpl_m->out_m.ptr() == nullptr)  return pimpl_m->out_size_m;
179     else           return pimpl_m->shared_process_params_m.get()->bytesWritten;
182 void  ExecutionObject::SetFrameIndex(int idx)
184     pimpl_m->current_frame_idx_m = idx;
187 int ExecutionObject::GetFrameIndex() const
189     return pimpl_m->current_frame_idx_m;
192 void ExecutionObject::SetInputOutputBuffer(const ArgInfo& in, const ArgInfo& out)
194     assert(in.ptr() != nullptr && in.size() > 0);
195     assert(out.ptr() != nullptr && out.size() > 0);
197     pimpl_m->in_m  = in;
198     pimpl_m->out_m = out;
201 bool ExecutionObject::ProcessFrameStartAsync()
203     return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS);
206 bool ExecutionObject::ProcessFrameWait()
208     return pimpl_m->Wait(ExecutionObject::CallType::PROCESS);
211 bool ExecutionObject::RunAsync (CallType ct)
213     return pimpl_m->RunAsync(ct);
216 bool ExecutionObject::Wait (CallType ct)
218     return pimpl_m->Wait(ct);
221 uint64_t ExecutionObject::GetProcessCycles() const
223     uint8_t factor = 1;
225     // ARP32 running at half frequency of VCOP, multiply by 2 for VCOP cycles
226     if (pimpl_m->device_m->type() == CL_DEVICE_TYPE_CUSTOM)
227         factor = 2;
229     return pimpl_m->shared_process_params_m.get()->cycles * factor;
232 float ExecutionObject::GetProcessTimeInMilliSeconds() const
234     float frequency = pimpl_m->device_m->GetFrequencyInMhz() * 1000000;
235     return ((float)GetProcessCycles())/frequency * 1000;
238 const LayerOutput* ExecutionObject::GetOutputFromLayer(
239                          uint32_t layer_index, uint32_t output_index) const
241     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
244 const LayerOutputs* ExecutionObject::GetOutputsFromAllLayers() const
246     return pimpl_m->GetOutputsFromAllLayers();
249 //
250 // Allocate an OpenCL buffer for TIDL layer output buffer metadata.
251 // The device will populate metadata for every buffer that is used as an
252 // output buffer by a layer.
253 //
254 void ExecutionObject::EnableOutputBufferTrace()
256     pimpl_m->trace_buf_params_sz_m = (sizeof(OCL_TIDL_BufParams)*
257                                        pimpl_m->num_network_layers_m*
258                                        TIDL_NUM_OUT_BUFS);
260     pimpl_m->trace_buf_params_m.reset(malloc_ddr<OCL_TIDL_BufParams>
261                                       (pimpl_m->trace_buf_params_sz_m));
263     // Device will update bufferId if there is valid data for the entry
264     OCL_TIDL_BufParams* bufferParams = pimpl_m->trace_buf_params_m.get();
265     for (uint32_t i = 0; i < pimpl_m->num_network_layers_m; i++)
266         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
267         {
268             OCL_TIDL_BufParams *bufP =
269                                 &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
270             bufP->bufferId = UINT_MAX;
271         }
274 void
275 ExecutionObject::WriteLayerOutputsToFile(const std::string& filename_prefix) const
277     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
281 //
282 // Create a kernel to call the "initialize" function
283 //
284 void
285 ExecutionObject::Impl::SetupInitializeKernel(const ArgInfo& create_arg,
286                                              const ArgInfo& param_heap_arg,
287                                              size_t extmem_heap_size,
288                                              bool   internal_input)
290     // Allocate a heap for TI DL to use on the device
291     tidl_extmem_heap_m.reset(malloc_ddr<char>(extmem_heap_size));
293     // Create a kernel for cleanup
294     KernelArgs cleanup_args;
295     k_cleanup_m.reset(new Kernel(device_m,
296                                  STRING(CLEANUP_KERNEL),
297                                  cleanup_args, device_index_m));
299     // Set up parameter struct for the initialize kernel
300     shared_initialize_params_m.reset(malloc_ddr<OCL_TIDL_InitializeParams>());
301     memset(shared_initialize_params_m.get(), 0,
302            sizeof(OCL_TIDL_InitializeParams));
304     shared_initialize_params_m->tidlHeapSize = extmem_heap_size;
305     shared_initialize_params_m->l2HeapSize   = tidl::internal::DMEM1_SIZE;
306     shared_initialize_params_m->l1HeapSize   = tidl::internal::DMEM0_SIZE;
307     shared_initialize_params_m->enableTrace  = OCL_TIDL_TRACE_OFF;
308     shared_initialize_params_m->enableInternalInput = internal_input ? 1 : 0;
310     // Setup kernel arguments for initialize
311     KernelArgs args = { create_arg,
312                         param_heap_arg,
313                         ArgInfo(tidl_extmem_heap_m.get(),
314                                 extmem_heap_size),
315                         ArgInfo(shared_initialize_params_m.get(),
316                                 sizeof(OCL_TIDL_InitializeParams)),
317                         device_m->type() == CL_DEVICE_TYPE_ACCELERATOR ?
318                             ArgInfo(nullptr, tidl::internal::DMEM1_SIZE):
319                             ArgInfo(nullptr, 4)                       };
321     k_initialize_m.reset(new Kernel(device_m,
322                                     STRING(INIT_KERNEL), args,
323                                     device_index_m));
326 //
327 // Create a kernel to call the "process" function
328 //
329 void
330 ExecutionObject::Impl::SetupProcessKernel()
332     shared_process_params_m.reset(malloc_ddr<OCL_TIDL_ProcessParams>());
333     shared_process_params_m->enableTrace = OCL_TIDL_TRACE_OFF;
334     shared_process_params_m->enableInternalInput =
335                                shared_initialize_params_m->enableInternalInput;
336     shared_process_params_m->cycles = 0;
338     KernelArgs args = { ArgInfo(shared_process_params_m.get(),
339                                 sizeof(OCL_TIDL_ProcessParams)),
340                         ArgInfo(tidl_extmem_heap_m.get(),
341                                 shared_initialize_params_m->tidlHeapSize),
342                         ArgInfo(trace_buf_params_m.get(),
343                                 trace_buf_params_sz_m)
345                       };
347     k_process_m.reset(new Kernel(device_m,
348                                  STRING(PROCESS_KERNEL), args,
349                                  device_index_m));
353 static size_t readDataS8(const char *readPtr, char *ptr, int roi, int n,
354                          int width, int height, int pitch,
355                          int chOffset)
357     if (!readPtr)  return 0;
359     for(int i2 = 0; i2 < roi; i2++)
360         for(int i0 = 0; i0 < n; i0++)
361             for(int i1 = 0; i1 < height; i1++)
362                 memcpy(&ptr[i2*n*chOffset + i0*chOffset + i1*pitch],
363                        &readPtr[i2*n*width*height + i0*width*height+ i1*width],
364                        width);
366     return width*height*n*roi;
369 static size_t writeDataS8(char *writePtr, const char *ptr, int n, int width,
370                           int height, int pitch, int chOffset)
372     if (!writePtr)  return 0;
374     for(int i0 = 0; i0 < n; i0++)
375         for(int i1 = 0; i1 < height; i1++)
376             memcpy(&writePtr[i0*width*height + i1*width],
377                    &ptr[i0*chOffset + i1*pitch],
378                    width);
380     return width*height*n;
383 //
384 // Copy from host buffer to TIDL device buffer
385 //
386 void ExecutionObject::Impl::HostWriteNetInput()
388     const char*     readPtr  = (const char *) in_m.ptr();
389     const PipeInfo* pipe     = in_m.GetPipe();
391     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
392     {
393         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
395         if (shared_process_params_m->enableInternalInput == 0)
396         {
397             readPtr += readDataS8(
398                 readPtr,
399                 (char *) tidl_extmem_heap_m.get() + inBuf->bufPlaneBufOffset
400                     + inBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
401                     + OCL_TIDL_MAX_PAD_SIZE,
402                 inBuf->numROIs,
403                 inBuf->numChannels,
404                 inBuf->ROIWidth,
405                 inBuf->ROIHeight,
406                 inBuf->bufPlaneWidth,
407                 ((inBuf->bufPlaneWidth * inBuf->bufPlaneHeight) /
408                  inBuf->numChannels));
409         }
410         else
411         {
412             shared_process_params_m->inBufAddr[i] = pipe->bufAddr_m[i];
413         }
415         shared_process_params_m->inDataQ[i]   = pipe->dataQ_m[i];
416     }
419 //
420 // Copy from TIDL device buffer into host buffer
421 //
422 void ExecutionObject::Impl::HostReadNetOutput()
424     char* writePtr = (char *) out_m.ptr();
425     PipeInfo* pipe = out_m.GetPipe();
427     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
428     {
429         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
430         if (writePtr != nullptr)
431         {
432             writePtr += writeDataS8(
433                 writePtr,
434                 (char *) tidl_extmem_heap_m.get() + outBuf->bufPlaneBufOffset
435                     + outBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
436                     + OCL_TIDL_MAX_PAD_SIZE,
437                 outBuf->numChannels,
438                 outBuf->ROIWidth,
439                 outBuf->ROIHeight,
440                 outBuf->bufPlaneWidth,
441                 ((outBuf->bufPlaneWidth * outBuf->bufPlaneHeight)/
442                  outBuf->numChannels));
443         }
445         pipe->dataQ_m[i]   = shared_process_params_m->outDataQ[i];
446         pipe->bufAddr_m[i] = shared_initialize_params_m->bufAddrBase
447                            + outBuf->bufPlaneBufOffset;
448     }
449     shared_process_params_m->bytesWritten = writePtr - (char *) out_m.ptr();
452 void ExecutionObject::Impl::ComputeInputOutputSizes()
454     if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)  return;
456     if (shared_initialize_params_m->numInBufs > OCL_TIDL_MAX_IN_BUFS ||
457         shared_initialize_params_m->numOutBufs > OCL_TIDL_MAX_OUT_BUFS)
458     {
459         std::cout << "Num input/output bufs ("
460                   << shared_initialize_params_m->numInBufs << ", "
461                   << shared_initialize_params_m->numOutBufs
462                   << ") exceeded limit!" << std::endl;
463         shared_initialize_params_m->errorCode = OCL_TIDL_INIT_FAIL;
464         return;
465     }
467     in_size_m  = 0;
468     out_size_m = 0;
469     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
470     {
471         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
472         in_size_m += inBuf->numROIs * inBuf->numChannels * inBuf->ROIWidth *
473                      inBuf->ROIHeight;
474     }
475     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
476     {
477         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
478         out_size_m += outBuf->numChannels * outBuf->ROIWidth *outBuf->ROIHeight;
479     }
483 bool ExecutionObject::Impl::RunAsync(CallType ct)
485     switch (ct)
486     {
487         case CallType::INIT:
488         {
489             k_initialize_m->RunAsync();
490             break;
491         }
492         case CallType::PROCESS:
493         {
494             shared_process_params_m->frameIdx = current_frame_idx_m;
495             shared_process_params_m->bytesWritten = 0;
496             HostWriteNetInput();
497             k_process_m->RunAsync();
498             break;
499         }
500         case CallType::CLEANUP:
501         {
502             k_cleanup_m->RunAsync();
503             break;
504         }
505         default:
506             return false;
507     }
509     return true;
512 bool ExecutionObject::Impl::Wait(CallType ct)
514     switch (ct)
515     {
516         case CallType::INIT:
517         {
518             bool has_work = k_initialize_m->Wait();
520             if (has_work)
521             {
522                 ComputeInputOutputSizes();
523                 if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)
524                     throw Exception(shared_initialize_params_m->errorCode,
525                                     __FILE__, __FUNCTION__, __LINE__);
526             }
527             return has_work;
528         }
529         case CallType::PROCESS:
530         {
531             bool has_work = k_process_m->Wait();
532             if (has_work)
533             {
534                 if (shared_process_params_m->errorCode != OCL_TIDL_SUCCESS)
535                     throw Exception(shared_process_params_m->errorCode,
536                                     __FILE__, __FUNCTION__, __LINE__);
537                 HostReadNetOutput();
538             }
540             return has_work;
541         }
542         case CallType::CLEANUP:
543         {
544             return k_cleanup_m->Wait();
545             break;
546         }
547         default:
548             return false;
549     }
551     return false;
554 //
555 // Write the trace data to output files
556 //
557 void
558 ExecutionObject::Impl::WriteLayerOutputsToFile(const std::string& filename_prefix) const
560     if (trace_buf_params_sz_m == 0)
561         return;
563     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
565     for (uint32_t i = 0; i < num_network_layers_m; i++)
566         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
567         {
568             OCL_TIDL_BufParams* buf = &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
570             if (buf->bufferId == UINT_MAX)
571                 continue;
573             size_t buffer_size = buf->numChannels * buf->ROIHeight *
574                                  buf->ROIWidth;
576             char *tmp = new char[buffer_size];
578             if (tmp == nullptr)
579                 throw Exception("Out of memory, new failed",
580                         __FILE__, __FUNCTION__, __LINE__);
582             writeDataS8(
583                 tmp,
584                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
585                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
586                 + OCL_TIDL_MAX_PAD_SIZE,
587                 buf->numChannels,
588                 buf->ROIWidth,
589                 buf->ROIHeight,
590                 buf->bufPlaneWidth,
591                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
592                  buf->numChannels));
594             std::string filename(filename_prefix);
595             filename += std::to_string(buf->bufferId) + "_";
596             filename += std::to_string(buf->ROIWidth) + "x";
597             filename += std::to_string(buf->ROIHeight) + ".bin";
599             std::ofstream ofs;
600             ofs.open(filename, std::ofstream::out);
601             ofs.write(tmp, buffer_size);
602             ofs.close();
604             delete[] tmp;
605         }
609 const LayerOutput* ExecutionObject::Impl::GetOutputFromLayer(
610                             uint32_t layer_index, uint32_t output_index) const
612     if (trace_buf_params_sz_m == 0)
613         return nullptr;
615     if (layer_index > num_network_layers_m || output_index > TIDL_NUM_OUT_BUFS)
616         return nullptr;
618     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
619     OCL_TIDL_BufParams* buf = &bufferParams[layer_index*TIDL_NUM_OUT_BUFS+
620                                             output_index];
622     if (buf->bufferId == UINT_MAX)
623         return nullptr;
625     size_t buffer_size = buf->numChannels * buf->ROIHeight *
626                          buf->ROIWidth;
628     char *data = new char[buffer_size];
630     if (data == nullptr)
631         throw Exception("Out of memory, new failed",
632                 __FILE__, __FUNCTION__, __LINE__);
634     writeDataS8(data,
635                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
636                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
637                 + OCL_TIDL_MAX_PAD_SIZE,
638                 buf->numChannels,
639                 buf->ROIWidth,
640                 buf->ROIHeight,
641                 buf->bufPlaneWidth,
642                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
643                  buf->numChannels));
645     return new LayerOutput(layer_index, output_index, buf->bufferId,
646                            buf->numROIs, buf->numChannels, buf->ROIHeight,
647                            buf->ROIWidth, data);
650 const LayerOutputs* ExecutionObject::Impl::GetOutputsFromAllLayers() const
652     LayerOutputs* result = new LayerOutputs;
654     for (uint32_t i=0; i < num_network_layers_m; i++)
655         for (int j=0; j < TIDL_NUM_OUT_BUFS; j++)
656         {
657             const LayerOutput* lo = GetOutputFromLayer(i, j);
658             if (lo)
659                 result->push_back(std::unique_ptr<const LayerOutput>{ lo });
660         }
662     return result;
665 LayerOutput::LayerOutput(int layer_index, int output_index, int buffer_id,
666                          int num_roi, int num_channels, size_t height,
667                          size_t width, const char* data):
668                         layer_index_m(layer_index), buffer_id_m(buffer_id),
669                         num_roi_m(num_roi), num_channels_m(num_channels),
670                         height_m(height), width_m(width), data_m(data)
671 { }
673 LayerOutput::~LayerOutput()
675     delete[] data_m;