]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - tidl/tidl-api.git/blob - tidl_api/src/execution_object.cpp
Updated version to 1.2.0
[tidl/tidl-api.git] / tidl_api / src / execution_object.cpp
1 /******************************************************************************
2  * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *      * Redistributions of source code must retain the above copyright
8  *        notice, this list of conditions and the following disclaimer.
9  *      * Redistributions in binary form must reproduce the above copyright
10  *        notice, this list of conditions and the following disclaimer in the
11  *        documentation and/or other materials provided with the distribution.
12  *      * Neither the name of Texas Instruments Incorporated nor the
13  *        names of its contributors may be used to endorse or promote products
14  *        derived from this software without specific prior written permission.
15  *
16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  *  THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
29 /*! \file execution_object.cpp */
31 #include <string.h>
32 #include <fstream>
33 #include <climits>
34 #include <mutex>
35 #include <condition_variable>
36 #include <chrono>
37 #include "executor.h"
38 #include "execution_object.h"
39 #include "trace.h"
40 #include "ocl_device.h"
41 #include "parameters.h"
42 #include "common_defines.h"
43 #include "tidl_create_params.h"
44 #include "device_arginfo.h"
46 using namespace tidl;
48 class ExecutionObject::Impl
49 {
50     public:
51         Impl(Device* d, uint8_t device_index,
52              const DeviceArgInfo& create_arg,
53              const DeviceArgInfo& param_heap_arg,
54              const Configuration& configuration,
55              int    layers_group_id);
56         ~Impl() {}
58         bool RunAsync(CallType ct);
59         bool Wait    (CallType ct);
60         bool AddCallback(CallType ct, void *user_data);
62         uint64_t GetProcessCycles() const;
63         int  GetLayersGroupId() const;
64         void AcquireLock();
65         void ReleaseLock();
67         Device*                         device_m;
68         // Index of the OpenCL device/queue used by this EO
69         uint8_t                         device_index_m;
70         std::string                     device_name_m;
72         up_malloc_ddr<char>             tidl_extmem_heap_m;
73         up_malloc_ddr<OCL_TIDL_InitializeParams> shared_initialize_params_m;
74         up_malloc_ddr<OCL_TIDL_ProcessParams>    shared_process_params_m;
76         size_t                          in_size_m;
77         size_t                          out_size_m;
78         IODeviceArgInfo                 in_m;
79         IODeviceArgInfo                 out_m;
81         // Frame being processed by the EO
82         int                             current_frame_idx_m;
84         // LayersGroupId being processed by the EO
85         int                             layers_group_id_m;
87         // Trace related
88         void WriteLayerOutputsToFile (const std::string& filename_prefix) const;
90         const LayerOutput* GetOutputFromLayer (uint32_t layer_index,
91                                                uint32_t output_index) const;
92         const LayerOutputs* GetOutputsFromAllLayers() const;
94         uint32_t                          num_network_layers_m;
95         up_malloc_ddr<OCL_TIDL_BufParams> trace_buf_params_m;
96         size_t                            trace_buf_params_sz_m;
98         // host time tracking: eo start to finish
99         float host_time_m;
101     private:
102         void SetupInitializeKernel(const DeviceArgInfo& create_arg,
103                                    const DeviceArgInfo& param_heap_arg);
104         void EnableOutputBufferTrace();
105         void SetupProcessKernel();
107         void HostWriteNetInput();
108         void HostReadNetOutput();
109         void ComputeInputOutputSizes();
111         std::unique_ptr<Kernel>         k_initialize_m;
112         std::unique_ptr<Kernel>         k_process_m;
113         std::unique_ptr<Kernel>         k_cleanup_m;
115         // Guarding sole access to input/output for one frame during execution
116         bool                            is_idle_m;
117         std::mutex                      mutex_access_m;
118         std::condition_variable         cv_access_m;
120         const Configuration             configuration_m;
121 };
124 ExecutionObject::ExecutionObject(Device* d,
125                                  uint8_t device_index,
126                                  const   ArgInfo& create_arg,
127                                  const   ArgInfo& param_heap_arg,
128                                  const   Configuration& configuration,
129                                  int     layers_group_id)
131     TRACE::print("-> ExecutionObject::ExecutionObject()\n");
133     DeviceArgInfo create_arg_d(create_arg, DeviceArgInfo::Kind::BUFFER);
134     DeviceArgInfo param_heap_arg_d(param_heap_arg, DeviceArgInfo::Kind::BUFFER);
136     pimpl_m = std::unique_ptr<ExecutionObject::Impl>
137               { new ExecutionObject::Impl(d, device_index,
138                                           create_arg_d,
139                                           param_heap_arg_d,
140                                           configuration,
141                                           layers_group_id) };
142     TRACE::print("<- ExecutionObject::ExecutionObject()\n");
146 ExecutionObject::Impl::Impl(Device* d, uint8_t device_index,
147                             const DeviceArgInfo& create_arg,
148                             const DeviceArgInfo& param_heap_arg,
149                             const Configuration& configuration,
150                             int    layers_group_id):
151     device_m(d),
152     device_index_m(device_index),
153     tidl_extmem_heap_m (nullptr, &__free_ddr),
154     shared_initialize_params_m(nullptr, &__free_ddr),
155     shared_process_params_m(nullptr, &__free_ddr),
156     in_size_m(0),
157     out_size_m(0),
158     in_m(),
159     out_m(),
160     current_frame_idx_m(0),
161     layers_group_id_m(layers_group_id),
162     num_network_layers_m(0),
163     trace_buf_params_m(nullptr, &__free_ddr),
164     trace_buf_params_sz_m(0),
165     k_initialize_m(nullptr),
166     k_process_m(nullptr),
167     k_cleanup_m(nullptr),
168     is_idle_m(true),
169     configuration_m(configuration)
171     device_name_m = device_m->GetDeviceName() + std::to_string(device_index_m);
172     // Save number of layers in the network
173     const TIDL_CreateParams* cp =
174                 static_cast<const TIDL_CreateParams *>(create_arg.ptr());
175     num_network_layers_m = cp->net.numLayers;
177     SetupInitializeKernel(create_arg, param_heap_arg);
179     if (configuration_m.enableOutputTrace)
180         EnableOutputBufferTrace();
182     SetupProcessKernel();
185 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
186 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
187 // unique_ptr's destructor requires a complete type in order to invoke delete
188 ExecutionObject::~ExecutionObject() = default;
190 char* ExecutionObject::GetInputBufferPtr() const
192     return static_cast<char *>(pimpl_m->in_m.GetArg().ptr());
195 size_t ExecutionObject::GetInputBufferSizeInBytes() const
197     return pimpl_m->in_size_m;
200 char* ExecutionObject::GetOutputBufferPtr() const
202     return static_cast<char *>(pimpl_m->out_m.GetArg().ptr());
205 size_t ExecutionObject::GetOutputBufferSizeInBytes() const
207     return pimpl_m->out_size_m;
210 void  ExecutionObject::SetFrameIndex(int idx)
212     pimpl_m->current_frame_idx_m = idx;
215 int ExecutionObject::GetFrameIndex() const
217     return pimpl_m->current_frame_idx_m;
220 void ExecutionObject::SetInputOutputBuffer(const ArgInfo& in, const ArgInfo& out)
222     assert(in.ptr()  != nullptr && in.size()  >= pimpl_m->in_size_m);
223     assert(out.ptr() != nullptr && out.size() >= pimpl_m->out_size_m);
225     pimpl_m->in_m  = IODeviceArgInfo(in);
226     pimpl_m->out_m = IODeviceArgInfo(out);
229 void ExecutionObject::SetInputOutputBuffer(const IODeviceArgInfo* in,
230                                            const IODeviceArgInfo* out)
232     pimpl_m->in_m  = *in;
233     pimpl_m->out_m = *out;
236 bool ExecutionObject::ProcessFrameStartAsync()
238     TRACE::print("-> ExecutionObject::ProcessFrameStartAsync()\n");
239     assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
240     return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS);
243 bool ExecutionObject::ProcessFrameWait()
245     TRACE::print("-> ExecutionObject::ProcessFrameWait()\n");
246     return pimpl_m->Wait(ExecutionObject::CallType::PROCESS);
249 bool ExecutionObject::RunAsync (CallType ct)
251     return pimpl_m->RunAsync(ct);
254 bool ExecutionObject::Wait (CallType ct)
256     return pimpl_m->Wait(ct);
259 bool ExecutionObject::AddCallback(CallType ct, void *user_data)
261     return pimpl_m->AddCallback(ct, user_data);
264 float ExecutionObject::GetProcessTimeInMilliSeconds() const
266     float frequency = pimpl_m->device_m->GetFrequencyInMhz() * 1000000;
267     return ((float)pimpl_m->GetProcessCycles()) / frequency * 1000;
270 float ExecutionObject::GetHostProcessTimeInMilliSeconds() const
272     return pimpl_m->host_time_m;
275 void
276 ExecutionObject::WriteLayerOutputsToFile(const std::string& filename_prefix) const
278     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
281 const LayerOutput* ExecutionObject::GetOutputFromLayer(
282                          uint32_t layer_index, uint32_t output_index) const
284     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
287 const LayerOutputs* ExecutionObject::GetOutputsFromAllLayers() const
289     return pimpl_m->GetOutputsFromAllLayers();
292 int ExecutionObject::GetLayersGroupId() const
294     return pimpl_m->layers_group_id_m;
297 const std::string& ExecutionObject::GetDeviceName() const
299     return pimpl_m->device_name_m;
302 void ExecutionObject::AcquireLock()
304     pimpl_m->AcquireLock();
307 void ExecutionObject::ReleaseLock()
309     pimpl_m->ReleaseLock();
312 //
313 // Create a kernel to call the "initialize" function
314 //
315 void
316 ExecutionObject::Impl::SetupInitializeKernel(const DeviceArgInfo& create_arg,
317                                              const DeviceArgInfo& param_heap_arg)
319     // Allocate a heap for TI DL to use on the device
320     tidl_extmem_heap_m.reset(
321                          malloc_ddr<char>(configuration_m.NETWORK_HEAP_SIZE));
323     // Create a kernel for cleanup
324     KernelArgs cleanup_args;
325     k_cleanup_m.reset(new Kernel(device_m,
326                                  STRING(CLEANUP_KERNEL),
327                                  cleanup_args, device_index_m));
329     // Set up parameter struct for the initialize kernel
330     shared_initialize_params_m.reset(malloc_ddr<OCL_TIDL_InitializeParams>());
331     memset(shared_initialize_params_m.get(), 0,
332            sizeof(OCL_TIDL_InitializeParams));
334     shared_initialize_params_m->tidlHeapSize =configuration_m.NETWORK_HEAP_SIZE;
335     shared_initialize_params_m->l2HeapSize   = tidl::internal::DMEM1_SIZE;
336     shared_initialize_params_m->l1HeapSize   = tidl::internal::DMEM0_SIZE;
337     shared_initialize_params_m->enableInternalInput =
338                    configuration_m.enableInternalInput ? 1 : 0;
340     // Set up execution trace specified in the configuration
341     EnableExecutionTrace(configuration_m,
342                          &shared_initialize_params_m->enableTrace);
344     // Setup kernel arguments for initialize
345     KernelArgs args = { create_arg,
346                         param_heap_arg,
347                         DeviceArgInfo(tidl_extmem_heap_m.get(),
348                                       configuration_m.NETWORK_HEAP_SIZE,
349                                       DeviceArgInfo::Kind::BUFFER),
350                         DeviceArgInfo(shared_initialize_params_m.get(),
351                                       sizeof(OCL_TIDL_InitializeParams),
352                                       DeviceArgInfo::Kind::BUFFER),
353                         device_m->type() == CL_DEVICE_TYPE_ACCELERATOR ?
354                             DeviceArgInfo(nullptr, tidl::internal::DMEM1_SIZE,
355                                           DeviceArgInfo::Kind::LOCAL):
356                             DeviceArgInfo(nullptr, 4,
357                                           DeviceArgInfo::Kind::LOCAL) };
359     k_initialize_m.reset(new Kernel(device_m,
360                                     STRING(INIT_KERNEL), args,
361                                     device_index_m));
364 //
365 // Allocate an OpenCL buffer for TIDL layer output buffer metadata.
366 // The device will populate metadata for every buffer that is used as an
367 // output buffer by a layer.  This needs to be done before setting up
368 // process kernel.
369 //
370 void ExecutionObject::Impl::EnableOutputBufferTrace()
372     trace_buf_params_sz_m = (sizeof(OCL_TIDL_BufParams)*
373                              num_network_layers_m*
374                              TIDL_NUM_OUT_BUFS);
376     trace_buf_params_m.reset(malloc_ddr<OCL_TIDL_BufParams>
377                              (trace_buf_params_sz_m));
379     // Device will update bufferId if there is valid data for the entry
380     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
381     for (uint32_t i = 0; i < num_network_layers_m; i++)
382         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
383         {
384             OCL_TIDL_BufParams *bufP =
385                                 &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
386             bufP->bufferId = UINT_MAX;
387         }
390 //
391 // Create a kernel to call the "process" function
392 //
393 void
394 ExecutionObject::Impl::SetupProcessKernel()
396     shared_process_params_m.reset(malloc_ddr<OCL_TIDL_ProcessParams>());
397     shared_process_params_m->enableInternalInput =
398                                shared_initialize_params_m->enableInternalInput;
399     shared_process_params_m->cycles = 0;
401     // Set up execution trace specified in the configuration
402     EnableExecutionTrace(configuration_m,
403                          &shared_process_params_m->enableTrace);
405     KernelArgs args = { DeviceArgInfo(shared_process_params_m.get(),
406                                       sizeof(OCL_TIDL_ProcessParams),
407                                       DeviceArgInfo::Kind::BUFFER),
408                         DeviceArgInfo(tidl_extmem_heap_m.get(),
409                                       shared_initialize_params_m->tidlHeapSize,
410                                       DeviceArgInfo::Kind::BUFFER),
411                         DeviceArgInfo(trace_buf_params_m.get(),
412                                       trace_buf_params_sz_m,
413                                       DeviceArgInfo::Kind::BUFFER)
415                       };
417     k_process_m.reset(new Kernel(device_m,
418                                  STRING(PROCESS_KERNEL), args,
419                                  device_index_m));
423 static size_t readDataS8(const char *readPtr, char *ptr, int roi, int n,
424                          int width, int height, int pitch,
425                          int chOffset)
427     if (!readPtr)  return 0;
429     for(int i2 = 0; i2 < roi; i2++)
430         for(int i0 = 0; i0 < n; i0++)
431             for(int i1 = 0; i1 < height; i1++)
432                 memcpy(&ptr[i2*n*chOffset + i0*chOffset + i1*pitch],
433                        &readPtr[i2*n*width*height + i0*width*height+ i1*width],
434                        width);
436     return width*height*n*roi;
439 static size_t writeDataS8(char *writePtr, const char *ptr, int n, int width,
440                           int height, int pitch, int chOffset)
442     if (!writePtr)  return 0;
444     for(int i0 = 0; i0 < n; i0++)
445         for(int i1 = 0; i1 < height; i1++)
446             memcpy(&writePtr[i0*width*height + i1*width],
447                    &ptr[i0*chOffset + i1*pitch],
448                    width);
450     return width*height*n;
453 //
454 // Copy from host buffer to TIDL device buffer
455 //
456 void ExecutionObject::Impl::HostWriteNetInput()
458     const char*     readPtr  = (const char *) in_m.GetArg().ptr();
459     const PipeInfo& pipe     = in_m.GetPipe();
461     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
462     {
463         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
465         if (shared_process_params_m->enableInternalInput == 0)
466         {
467             readPtr += readDataS8(
468                 readPtr,
469                 (char *) tidl_extmem_heap_m.get() + inBuf->bufPlaneBufOffset
470                     + inBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
471                     + OCL_TIDL_MAX_PAD_SIZE,
472                 inBuf->numROIs,
473                 inBuf->numChannels,
474                 inBuf->ROIWidth,
475                 inBuf->ROIHeight,
476                 inBuf->bufPlaneWidth,
477                 ((inBuf->bufPlaneWidth * inBuf->bufPlaneHeight) /
478                  inBuf->numChannels));
479         }
480         else
481         {
482             shared_process_params_m->inBufAddr[i] = pipe.bufAddr_m[i];
483         }
485         shared_process_params_m->inDataQ[i]   = pipe.dataQ_m[i];
486     }
489 //
490 // Copy from TIDL device buffer into host buffer
491 //
492 void ExecutionObject::Impl::HostReadNetOutput()
494     char* writePtr = (char *) out_m.GetArg().ptr();
495     PipeInfo& pipe = out_m.GetPipe();
497     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
498     {
499         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
500         if (writePtr != nullptr)
501         {
502             writePtr += writeDataS8(
503                 writePtr,
504                 (char *) tidl_extmem_heap_m.get() + outBuf->bufPlaneBufOffset
505                     + outBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
506                     + OCL_TIDL_MAX_PAD_SIZE,
507                 outBuf->numChannels,
508                 outBuf->ROIWidth,
509                 outBuf->ROIHeight,
510                 outBuf->bufPlaneWidth,
511                 ((outBuf->bufPlaneWidth * outBuf->bufPlaneHeight)/
512                  outBuf->numChannels));
513         }
515         pipe.dataQ_m[i]   = shared_process_params_m->outDataQ[i];
516         pipe.bufAddr_m[i] = shared_initialize_params_m->bufAddrBase
517                            + outBuf->bufPlaneBufOffset;
518     }
519     shared_process_params_m->bytesWritten = writePtr -
520                                             (char *) out_m.GetArg().ptr();
523 void ExecutionObject::Impl::ComputeInputOutputSizes()
525     if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)  return;
527     if (shared_initialize_params_m->numInBufs > OCL_TIDL_MAX_IN_BUFS ||
528         shared_initialize_params_m->numOutBufs > OCL_TIDL_MAX_OUT_BUFS)
529     {
530         std::cout << "Num input/output bufs ("
531                   << shared_initialize_params_m->numInBufs << ", "
532                   << shared_initialize_params_m->numOutBufs
533                   << ") exceeded limit!" << std::endl;
534         shared_initialize_params_m->errorCode = OCL_TIDL_INIT_FAIL;
535         return;
536     }
538     in_size_m  = 0;
539     out_size_m = 0;
540     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
541     {
542         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
543         in_size_m += inBuf->numROIs * inBuf->numChannels * inBuf->ROIWidth *
544                      inBuf->ROIHeight;
545     }
546     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
547     {
548         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
549         out_size_m += outBuf->numChannels * outBuf->ROIWidth *outBuf->ROIHeight;
550     }
554 bool ExecutionObject::Impl::RunAsync(CallType ct)
556     switch (ct)
557     {
558         case CallType::INIT:
559         {
560             k_initialize_m->RunAsync();
561             break;
562         }
563         case CallType::PROCESS:
564         {
565             std::chrono::time_point<std::chrono::steady_clock> t1, t2;
566             t1 = std::chrono::steady_clock::now();
568             shared_process_params_m->frameIdx = current_frame_idx_m;
569             shared_process_params_m->bytesWritten = 0;
570             HostWriteNetInput();
571             k_process_m->RunAsync();
573             t2 = std::chrono::steady_clock::now();
574             std::chrono::duration<float> elapsed = t2 - t1;
575             host_time_m = elapsed.count() * 1000;
576             break;
577         }
578         case CallType::CLEANUP:
579         {
580             k_cleanup_m->RunAsync();
581             break;
582         }
583         default:
584             return false;
585     }
587     return true;
590 bool ExecutionObject::Impl::Wait(CallType ct)
592     switch (ct)
593     {
594         case CallType::INIT:
595         {
596             bool has_work = k_initialize_m->Wait();
598             if (has_work)
599             {
600                 ComputeInputOutputSizes();
601                 if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)
602                     throw Exception(shared_initialize_params_m->errorCode,
603                                     __FILE__, __FUNCTION__, __LINE__);
604             }
605             return has_work;
606         }
607         case CallType::PROCESS:
608         {
609             float host_elapsed_ms = 0.0f;
610             bool has_work = k_process_m->Wait(&host_elapsed_ms);
611             if (has_work)
612             {
613                 if (shared_process_params_m->errorCode != OCL_TIDL_SUCCESS)
614                     throw Exception(shared_process_params_m->errorCode,
615                                     __FILE__, __FUNCTION__, __LINE__);
617                 std::chrono::time_point<std::chrono::steady_clock> t1, t2;
618                 t1 = std::chrono::steady_clock::now();
619                 HostReadNetOutput();
620                 t2 = std::chrono::steady_clock::now();
621                 std::chrono::duration<float> elapsed = t2 - t1;
622                 host_time_m += elapsed.count() * 1000 + host_elapsed_ms;
623             }
625             return has_work;
626         }
627         case CallType::CLEANUP:
628         {
629             return k_cleanup_m->Wait();
630             break;
631         }
632         default:
633             return false;
634     }
636     return false;
639 bool ExecutionObject::Impl::AddCallback(CallType ct, void *user_data)
641     switch (ct)
642     {
643         case CallType::PROCESS:
644         {
645             return k_process_m->AddCallback(user_data);
646             break;
647         }
648         default:
649             return false;
650     }
652     return false;
655 uint64_t ExecutionObject::Impl::GetProcessCycles() const
657     uint8_t factor = 1;
659     // ARP32 running at half frequency of VCOP, multiply by 2 for VCOP cycles
660     if (device_m->type() == CL_DEVICE_TYPE_CUSTOM)
661         factor = 2;
663     return shared_process_params_m.get()->cycles * factor;
666 //
667 // Write the trace data to output files
668 //
669 void
670 ExecutionObject::Impl::WriteLayerOutputsToFile(const std::string& filename_prefix) const
672     if (trace_buf_params_sz_m == 0)
673         return;
675     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
677     for (uint32_t i = 0; i < num_network_layers_m; i++)
678         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
679         {
680             OCL_TIDL_BufParams* buf = &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
682             if (buf->bufferId == UINT_MAX)
683                 continue;
685             size_t buffer_size = buf->numChannels * buf->ROIHeight *
686                                  buf->ROIWidth;
688             char *tmp = new char[buffer_size];
690             if (tmp == nullptr)
691                 throw Exception("Out of memory, new failed",
692                         __FILE__, __FUNCTION__, __LINE__);
694             writeDataS8(
695                 tmp,
696                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
697                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
698                 + OCL_TIDL_MAX_PAD_SIZE,
699                 buf->numChannels,
700                 buf->ROIWidth,
701                 buf->ROIHeight,
702                 buf->bufPlaneWidth,
703                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
704                  buf->numChannels));
706             std::string filename(filename_prefix);
707             filename += std::to_string(buf->bufferId) + "_";
708             filename += std::to_string(buf->ROIWidth) + "x";
709             filename += std::to_string(buf->ROIHeight) + ".bin";
711             std::ofstream ofs;
712             ofs.open(filename, std::ofstream::out);
713             ofs.write(tmp, buffer_size);
714             ofs.close();
716             delete[] tmp;
717         }
721 const LayerOutput* ExecutionObject::Impl::GetOutputFromLayer(
722                             uint32_t layer_index, uint32_t output_index) const
724     if (trace_buf_params_sz_m == 0)
725         return nullptr;
727     if (layer_index > num_network_layers_m || output_index > TIDL_NUM_OUT_BUFS)
728         return nullptr;
730     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
731     OCL_TIDL_BufParams* buf = &bufferParams[layer_index*TIDL_NUM_OUT_BUFS+
732                                             output_index];
734     if (buf->bufferId == UINT_MAX)
735         return nullptr;
737     size_t buffer_size = buf->numChannels * buf->ROIHeight *
738                          buf->ROIWidth;
740     char *data = new char[buffer_size];
742     if (data == nullptr)
743         throw Exception("Out of memory, new failed",
744                 __FILE__, __FUNCTION__, __LINE__);
746     writeDataS8(data,
747                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
748                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
749                 + OCL_TIDL_MAX_PAD_SIZE,
750                 buf->numChannels,
751                 buf->ROIWidth,
752                 buf->ROIHeight,
753                 buf->bufPlaneWidth,
754                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
755                  buf->numChannels));
757     return new LayerOutput(layer_index, output_index, buf->bufferId,
758                            buf->numROIs, buf->numChannels, buf->ROIHeight,
759                            buf->ROIWidth, data);
762 const LayerOutputs* ExecutionObject::Impl::GetOutputsFromAllLayers() const
764     LayerOutputs* result = new LayerOutputs;
766     for (uint32_t i=0; i < num_network_layers_m; i++)
767         for (int j=0; j < TIDL_NUM_OUT_BUFS; j++)
768         {
769             const LayerOutput* lo = GetOutputFromLayer(i, j);
770             if (lo)
771                 result->push_back(std::unique_ptr<const LayerOutput>{ lo });
772         }
774     return result;
777 LayerOutput::LayerOutput(int layer_index, int output_index, int buffer_id,
778                          int num_roi, int num_channels, size_t height,
779                          size_t width, const char* data):
780                         layer_index_m(layer_index), buffer_id_m(buffer_id),
781                         num_roi_m(num_roi), num_channels_m(num_channels),
782                         height_m(height), width_m(width), data_m(data)
783 { }
785 LayerOutput::~LayerOutput()
787     delete[] data_m;
790 void ExecutionObject::Impl::AcquireLock()
792     std::unique_lock<std::mutex> lock(mutex_access_m);
793     cv_access_m.wait(lock, [this]{ return this->is_idle_m; });
794     is_idle_m = false;
797 void ExecutionObject::Impl::ReleaseLock()
799     is_idle_m = true;
800     cv_access_m.notify_all();