]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - tidl/tidl-api.git/blob - tidl_api/src/execution_object.cpp
Fix g++ 8.3.0 compilation error
[tidl/tidl-api.git] / tidl_api / src / execution_object.cpp
1 /******************************************************************************
2  * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *      * Redistributions of source code must retain the above copyright
8  *        notice, this list of conditions and the following disclaimer.
9  *      * Redistributions in binary form must reproduce the above copyright
10  *        notice, this list of conditions and the following disclaimer in the
11  *        documentation and/or other materials provided with the distribution.
12  *      * Neither the name of Texas Instruments Incorporated nor the
13  *        names of its contributors may be used to endorse or promote products
14  *        derived from this software without specific prior written permission.
15  *
16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  *  THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
29 /*! \file execution_object.cpp */
31 #include <string.h>
32 #include <fstream>
33 #include <climits>
34 #include <mutex>
35 #include <condition_variable>
36 #include <chrono>
37 #include "executor.h"
38 #include "execution_object.h"
39 #include "trace.h"
40 #include "ocl_device.h"
41 #include "parameters.h"
42 #include "common_defines.h"
43 #include "tidl_create_params.h"
44 #include "device_arginfo.h"
45 #include "util.h"
47 using namespace tidl;
49 class ExecutionObject::Impl
50 {
51     public:
52         Impl(Device* d, DeviceType t, uint8_t device_index,
53              const DeviceArgInfo& create_arg,
54              const DeviceArgInfo& param_heap_arg,
55              const Configuration& configuration,
56              int    layers_group_id);
57         ~Impl() {}
59         bool RunAsync(CallType ct, uint32_t context_idx);
60         bool Wait    (CallType ct, uint32_t context_idx);
61         bool AddCallback(CallType ct, void *user_data, uint32_t context_idx);
63         uint64_t GetProcessCycles(uint32_t context_idx) const;
64         int  GetLayersGroupId() const;
65         void AcquireContext(uint32_t& context_idx);
66         void ReleaseContext(uint32_t  context_idx);
68         // Trace related
69         void WriteLayerOutputsToFile (const std::string& filename_prefix) const;
70         const LayerOutput* GetOutputFromLayer (uint32_t layer_index,
71                                                uint32_t output_index) const;
72         const LayerOutputs* GetOutputsFromAllLayers() const;
75         Device*                         device_m;
76         DeviceType                      device_type_m;
78         // Index of the OpenCL device/queue used by this EO
79         uint8_t                         device_index_m;
80         std::string                     device_name_m;
82         up_malloc_ddr<char>                      tidl_extmem_heap_m;
83         up_malloc_ddr<OCL_TIDL_InitializeParams> shared_initialize_params_m;
84         up_malloc_ddr<OCL_TIDL_ProcessParams>    shared_process_params_m;
86         size_t                          in_size_m;
87         size_t                          out_size_m;
88         IODeviceArgInfo                 in_m[tidl::internal::NUM_CONTEXTS];
89         IODeviceArgInfo                 out_m[tidl::internal::NUM_CONTEXTS];
91         // Frame being processed by the EO
92         int current_frame_idx_m[tidl::internal::NUM_CONTEXTS];
94         // LayersGroupId being processed by the EO
95         int layers_group_id_m;
97         uint32_t                          num_network_layers_m;
98         up_malloc_ddr<OCL_TIDL_BufParams> trace_buf_params_m;
99         size_t                            trace_buf_params_sz_m;
101     private:
102         void SetupInitializeKernel(const DeviceArgInfo& create_arg,
103                                    const DeviceArgInfo& param_heap_arg);
104         void EnableOutputBufferTrace();
105         void SetupProcessKernel();
107         void HostWriteNetInput(uint32_t context_idx);
108         void HostReadNetOutput(uint32_t context_idx);
109         void ComputeInputOutputSizes();
111         std::unique_ptr<Kernel>         k_initialize_m;
112         std::unique_ptr<Kernel>         k_process_m;
113         std::unique_ptr<Kernel>         k_cleanup_m;
115         // Guarding sole access to input/output for one frame during execution
116         // Encoding: context at bit index, bit value: 0 for idle, 1 for busy
117         uint32_t                        idle_encoding_m;
118         std::mutex                      mutex_access_m;
119         std::condition_variable         cv_access_m;
121         const Configuration             configuration_m;
122 };
125 ExecutionObject::ExecutionObject(Device*    d,
126                                  DeviceType t,
127                                  uint8_t    device_index,
128                                  const      ArgInfo& create_arg,
129                                  const      ArgInfo& param_heap_arg,
130                                  const      Configuration& configuration,
131                                  int        layers_group_id)
133     TRACE::print("-> ExecutionObject::ExecutionObject()\n");
135     DeviceArgInfo create_arg_d(create_arg, DeviceArgInfo::Kind::BUFFER);
136     DeviceArgInfo param_heap_arg_d(param_heap_arg, DeviceArgInfo::Kind::BUFFER);
138     pimpl_m = std::unique_ptr<ExecutionObject::Impl>
139               { new ExecutionObject::Impl(d, t, device_index,
140                                           create_arg_d,
141                                           param_heap_arg_d,
142                                           configuration,
143                                           layers_group_id) };
144     TRACE::print("<- ExecutionObject::ExecutionObject()\n");
148 ExecutionObject::Impl::Impl(Device* d, DeviceType t, uint8_t device_index,
149                             const DeviceArgInfo& create_arg,
150                             const DeviceArgInfo& param_heap_arg,
151                             const Configuration& configuration,
152                             int    layers_group_id):
153     device_m(d),
154     device_type_m(t),
155     device_index_m(device_index),
156     tidl_extmem_heap_m (nullptr, &__free_ddr),
157     shared_initialize_params_m(nullptr, &__free_ddr),
158     shared_process_params_m(nullptr, &__free_ddr),
159     in_size_m(0),
160     out_size_m(0),
161     layers_group_id_m(layers_group_id),
162     num_network_layers_m(0),
163     trace_buf_params_m(nullptr, &__free_ddr),
164     trace_buf_params_sz_m(0),
165     k_initialize_m(nullptr),
166     k_process_m(nullptr),
167     k_cleanup_m(nullptr),
168     idle_encoding_m(0),  // all contexts are idle
169     configuration_m(configuration)
171     device_name_m = device_m->GetDeviceName() + std::to_string(device_index_m);
172     // Save number of layers in the network
173     const TIDL_CreateParams* cp =
174                 static_cast<const TIDL_CreateParams *>(create_arg.ptr());
175     num_network_layers_m = cp->net.numLayers;
177     SetupInitializeKernel(create_arg, param_heap_arg);
179     if (configuration_m.enableOutputTrace)
180         EnableOutputBufferTrace();
182     SetupProcessKernel();
184     for (int i = 0; i < tidl::internal::NUM_CONTEXTS; i++)
185         current_frame_idx_m[i] = 0;
188 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
189 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
190 // unique_ptr's destructor requires a complete type in order to invoke delete
191 ExecutionObject::~ExecutionObject() = default;
193 char* ExecutionObject::GetInputBufferPtr() const
195     return static_cast<char *>(pimpl_m->in_m[0].GetArg().ptr());
198 size_t ExecutionObject::GetInputBufferSizeInBytes() const
200     return pimpl_m->in_size_m;
203 char* ExecutionObject::GetOutputBufferPtr() const
205     return static_cast<char *>(pimpl_m->out_m[0].GetArg().ptr());
208 size_t ExecutionObject::GetOutputBufferSizeInBytes() const
210     return pimpl_m->out_size_m;
213 void  ExecutionObject::SetFrameIndex(int idx)
215     pimpl_m->current_frame_idx_m[0] = idx;
218 int ExecutionObject::GetFrameIndex() const
220     return pimpl_m->current_frame_idx_m[0];
223 void ExecutionObject::SetInputOutputBuffer(const ArgInfo& in,
224                                            const ArgInfo& out)
226     pimpl_m->in_m[0]  = IODeviceArgInfo(in);
227     pimpl_m->out_m[0] = IODeviceArgInfo(out);
230 bool ExecutionObject::ProcessFrameStartAsync()
232     return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS, 0);
235 bool ExecutionObject::ProcessFrameWait()
237     return pimpl_m->Wait(ExecutionObject::CallType::PROCESS, 0);
240 bool ExecutionObject::RunAsync (CallType ct)
242     return pimpl_m->RunAsync(ct, 0);
245 bool ExecutionObject::Wait (CallType ct)
247     return pimpl_m->Wait(ct, 0);
251 bool ExecutionObject::AcquireAndRunContext(uint32_t& context_idx,
252                                           int frame_idx,
253                                           const IODeviceArgInfo& in,
254                                           const IODeviceArgInfo& out)
256     pimpl_m->AcquireContext(context_idx);
258     pimpl_m->current_frame_idx_m[context_idx] = frame_idx;
259     pimpl_m->in_m[context_idx]  = in;
260     pimpl_m->out_m[context_idx] = out;
262     return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS,
263                                     context_idx);
266 bool ExecutionObject::WaitAndReleaseContext(uint32_t context_idx)
268     TRACE::print("-> ExecutionObject::WaitAndReleaseContext(%d)\n",
269                  context_idx);
271     bool status = pimpl_m->Wait(ExecutionObject::CallType::PROCESS,
272                                 context_idx);
273     pimpl_m->ReleaseContext(context_idx);
275     return status;
278 bool ExecutionObject::AddCallback(CallType ct, void *user_data,
279                                   uint32_t context_idx)
281     return pimpl_m->AddCallback(ct, user_data, context_idx);
284 float ExecutionObject::GetProcessTimeInMilliSeconds() const
286     float frequency = pimpl_m->device_m->GetFrequencyInMhz() * 1000000;
287     return ((float)pimpl_m->GetProcessCycles(0)) / frequency * 1000;
290 void
291 ExecutionObject::WriteLayerOutputsToFile(const std::string& filename_prefix) const
293     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
296 const LayerOutput* ExecutionObject::GetOutputFromLayer(
297                          uint32_t layer_index, uint32_t output_index) const
299     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
302 const LayerOutputs* ExecutionObject::GetOutputsFromAllLayers() const
304     return pimpl_m->GetOutputsFromAllLayers();
307 int ExecutionObject::GetLayersGroupId() const
309     return pimpl_m->layers_group_id_m;
312 const std::string& ExecutionObject::GetDeviceName() const
314     return pimpl_m->device_name_m;
318 //
319 // Create a kernel to call the "initialize" function
320 //
321 void
322 ExecutionObject::Impl::SetupInitializeKernel(const DeviceArgInfo& create_arg,
323                                              const DeviceArgInfo& param_heap_arg)
325     // Allocate a heap for TI DL to use on the device
326     tidl_extmem_heap_m.reset(
327                          malloc_ddr<char>(configuration_m.NETWORK_HEAP_SIZE));
329     // Create a kernel for cleanup
330     KernelArgs cleanup_args;
331     k_cleanup_m.reset(new Kernel(device_m,
332                                  STRING(CLEANUP_KERNEL),
333                                  cleanup_args, device_index_m));
335     // Set up parameter struct for the initialize kernel
336     shared_initialize_params_m.reset(malloc_ddr<OCL_TIDL_InitializeParams>());
337     memset(shared_initialize_params_m.get(), 0,
338            sizeof(OCL_TIDL_InitializeParams));
340     shared_initialize_params_m->tidlHeapSize =configuration_m.NETWORK_HEAP_SIZE;
341     shared_initialize_params_m->l2HeapSize   = tidl::internal::DMEM1_SIZE;
342     shared_initialize_params_m->l1HeapSize   = tidl::internal::DMEM0_SIZE;
343     shared_initialize_params_m->numContexts  = tidl::internal::NUM_CONTEXTS;
345     // Set up execution trace specified in the configuration
346     EnableExecutionTrace(configuration_m,
347                          &shared_initialize_params_m->enableTrace);
349     // Setup kernel arguments for initialize
350     KernelArgs args = { create_arg,
351                         param_heap_arg,
352                         DeviceArgInfo(tidl_extmem_heap_m.get(),
353                                       configuration_m.NETWORK_HEAP_SIZE,
354                                       DeviceArgInfo::Kind::BUFFER),
355                         DeviceArgInfo(shared_initialize_params_m.get(),
356                                       sizeof(OCL_TIDL_InitializeParams),
357                                       DeviceArgInfo::Kind::BUFFER),
358                         device_m->type() == CL_DEVICE_TYPE_ACCELERATOR ?
359                             DeviceArgInfo(nullptr, tidl::internal::DMEM1_SIZE,
360                                           DeviceArgInfo::Kind::LOCAL):
361                             DeviceArgInfo(nullptr, 4,
362                                           DeviceArgInfo::Kind::LOCAL) };
364     k_initialize_m.reset(new Kernel(device_m,
365                                     STRING(INIT_KERNEL), args,
366                                     device_index_m));
369 //
370 // Allocate an OpenCL buffer for TIDL layer output buffer metadata.
371 // The device will populate metadata for every buffer that is used as an
372 // output buffer by a layer.  This needs to be done before setting up
373 // process kernel.
374 //
375 void ExecutionObject::Impl::EnableOutputBufferTrace()
377     trace_buf_params_sz_m = (sizeof(OCL_TIDL_BufParams)*
378                              num_network_layers_m*
379                              TIDL_NUM_OUT_BUFS);
381     trace_buf_params_m.reset(malloc_ddr<OCL_TIDL_BufParams>
382                              (trace_buf_params_sz_m));
384     // Device will update bufferId if there is valid data for the entry
385     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
386     for (uint32_t i = 0; i < num_network_layers_m; i++)
387         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
388         {
389             OCL_TIDL_BufParams *bufP =
390                                 &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
391             bufP->bufferId = UINT_MAX;
392         }
395 //
396 // Create a kernel to call the "process" function
397 //
398 void
399 ExecutionObject::Impl::SetupProcessKernel()
401     shared_process_params_m.reset(malloc_ddr<OCL_TIDL_ProcessParams>(
402                tidl::internal::NUM_CONTEXTS * sizeof(OCL_TIDL_ProcessParams)));
404     // Set up execution trace specified in the configuration
405     for (int i = 0; i < tidl::internal::NUM_CONTEXTS; i++)
406     {
407         OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get() + i;
408         EnableExecutionTrace(configuration_m, &p_params->enableTrace);
409     }
411     uint32_t context_idx = 0;
412     KernelArgs args = { DeviceArgInfo(shared_process_params_m.get(),
413                                       tidl::internal::NUM_CONTEXTS *
414                                       sizeof(OCL_TIDL_ProcessParams),
415                                       DeviceArgInfo::Kind::BUFFER),
416                         DeviceArgInfo(tidl_extmem_heap_m.get(),
417                                       shared_initialize_params_m->tidlHeapSize,
418                                       DeviceArgInfo::Kind::BUFFER),
419                         DeviceArgInfo(trace_buf_params_m.get(),
420                                       trace_buf_params_sz_m,
421                                       DeviceArgInfo::Kind::BUFFER),
422                         DeviceArgInfo(&context_idx,
423                                       sizeof(uint32_t),
424                                       DeviceArgInfo::Kind::SCALAR)
425                       };
427     k_process_m.reset(new Kernel(device_m,
428                                  STRING(PROCESS_KERNEL), args,
429                                  device_index_m));
433 static size_t readDataS8(const char *readPtr, char *ptr, int roi, int n,
434                          int width, int height, int pitch,
435                          int chOffset)
437     if (!readPtr)  return 0;
439     for(int i2 = 0; i2 < roi; i2++)
440         for(int i0 = 0; i0 < n; i0++)
441             for(int i1 = 0; i1 < height; i1++)
442                 memcpy(&ptr[i2*n*chOffset + i0*chOffset + i1*pitch],
443                        &readPtr[i2*n*width*height + i0*width*height+ i1*width],
444                        width);
446     return width*height*n*roi;
449 static size_t writeDataS8(char *writePtr, const char *ptr, int n, int width,
450                           int height, int pitch, int chOffset)
452     if (!writePtr)  return 0;
454     for(int i0 = 0; i0 < n; i0++)
455         for(int i1 = 0; i1 < height; i1++)
456             memcpy(&writePtr[i0*width*height + i1*width],
457                    &ptr[i0*chOffset + i1*pitch],
458                    width);
460     return width*height*n;
463 //
464 // Copy from host buffer to TIDL device buffer
465 //
466 void ExecutionObject::Impl::HostWriteNetInput(uint32_t context_idx)
468     const char*     readPtr  = (const char *) in_m[context_idx].GetArg().ptr();
469     const PipeInfo& pipe     = in_m[context_idx].GetPipe();
470     OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get()
471                                        + context_idx;
473     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
474     {
475         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
476         char *inBufAddr = tidl_extmem_heap_m.get() + inBuf->bufPlaneBufOffset
477                           + context_idx * inBuf->contextSize;
479             readPtr += readDataS8(
480                 readPtr,
481                 (char *) inBufAddr
482                     + inBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
483                     + OCL_TIDL_MAX_PAD_SIZE,
484                 inBuf->numROIs,
485                 inBuf->numChannels,
486                 inBuf->ROIWidth,
487                 inBuf->ROIHeight,
488                 inBuf->bufPlaneWidth,
489                 ((inBuf->bufPlaneWidth * inBuf->bufPlaneHeight) /
490                  inBuf->numChannels));
492         p_params->dataQ[i] = pipe.dataQ_m[i];
493     }
496 //
497 // Copy from TIDL device buffer into host buffer
498 //
499 void ExecutionObject::Impl::HostReadNetOutput(uint32_t context_idx)
501     char* writePtr = (char *) out_m[context_idx].GetArg().ptr();
502     PipeInfo& pipe = out_m[context_idx].GetPipe();
503     OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get()
504                                        + context_idx;
506     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
507     {
508         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
509         char *outBufAddr = tidl_extmem_heap_m.get() + outBuf->bufPlaneBufOffset
510                            + context_idx * outBuf->contextSize;
511         if (writePtr != nullptr)
512         {
513             writePtr += writeDataS8(
514                 writePtr,
515                 (char *) outBufAddr
516                     + outBuf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
517                     + OCL_TIDL_MAX_PAD_SIZE,
518                 outBuf->numChannels,
519                 outBuf->ROIWidth,
520                 outBuf->ROIHeight,
521                 outBuf->bufPlaneWidth,
522                 ((outBuf->bufPlaneWidth * outBuf->bufPlaneHeight)/
523                  outBuf->numChannels));
524         }
526         pipe.dataQ_m[i]   = p_params->dataQ[i];
527     }
530 void ExecutionObject::Impl::ComputeInputOutputSizes()
532     if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)  return;
534     if (shared_initialize_params_m->numInBufs > OCL_TIDL_MAX_IN_BUFS ||
535         shared_initialize_params_m->numOutBufs > OCL_TIDL_MAX_OUT_BUFS)
536     {
537         std::cout << "Num input/output bufs ("
538                   << shared_initialize_params_m->numInBufs << ", "
539                   << shared_initialize_params_m->numOutBufs
540                   << ") exceeded limit!" << std::endl;
541         shared_initialize_params_m->errorCode = OCL_TIDL_INIT_FAIL;
542         return;
543     }
545     in_size_m  = 0;
546     out_size_m = 0;
547     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
548     {
549         OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
550         in_size_m += inBuf->numROIs * inBuf->numChannels * inBuf->ROIWidth *
551                      inBuf->ROIHeight;
552     }
553     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
554     {
555         OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
556         out_size_m += outBuf->numChannels * outBuf->ROIWidth *outBuf->ROIHeight;
557     }
560 bool ExecutionObject::Impl::RunAsync(CallType ct, uint32_t context_idx)
562     switch (ct)
563     {
564         case CallType::INIT:
565         {
566             k_initialize_m->RunAsync();
567             break;
568         }
569         case CallType::PROCESS:
570         {
571             RecordEvent(current_frame_idx_m[context_idx],
572                         (layers_group_id_m == 1) ? TimeStamp::EO1_PFSA_START:
573                                                    TimeStamp::EO2_PFSA_START,
574                         static_cast<int>(device_type_m),
575                         device_index_m);
577             OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get()
578                                                + context_idx;
579             p_params->frameIdx = current_frame_idx_m[context_idx];
580             HostWriteNetInput(context_idx);
581             {
582                 std::unique_lock<std::mutex> lock(mutex_access_m);
583                 k_process_m->UpdateScalarArg(3, sizeof(uint32_t), &context_idx);
584                 k_process_m->RunAsync(context_idx);
585             }
587             RecordEvent(current_frame_idx_m[context_idx],
588                         (layers_group_id_m == 1) ?  TimeStamp::EO1_PFSA_END:
589                                                     TimeStamp::EO2_PFSA_END);
590             break;
591         }
592         case CallType::CLEANUP:
593         {
594             k_cleanup_m->RunAsync();
595             break;
596         }
597         default:
598             return false;
599     }
601     return true;
604 bool ExecutionObject::Impl::Wait(CallType ct, uint32_t context_idx)
606     switch (ct)
607     {
608         case CallType::INIT:
609         {
610             bool has_work = k_initialize_m->Wait();
612             if (has_work)
613             {
614                 ComputeInputOutputSizes();
615                 if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)
616                     throw Exception(shared_initialize_params_m->errorCode,
617                                     __FILE__, __FUNCTION__, __LINE__);
618             }
619             return has_work;
620         }
621         case CallType::PROCESS:
622         {
623             RecordEvent(current_frame_idx_m[context_idx],
624                         (layers_group_id_m == 1) ? TimeStamp::EO1_PFW_START:
625                                                    TimeStamp::EO2_PFW_START);
627             bool has_work = k_process_m->Wait(context_idx);
628             if (has_work)
629             {
630                 OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get()
631                                                    + context_idx;
632                 if (p_params->errorCode != OCL_TIDL_SUCCESS)
633                     throw Exception(p_params->errorCode,
634                                     __FILE__, __FUNCTION__, __LINE__);
636                 HostReadNetOutput(context_idx);
638                 RecordEvent(current_frame_idx_m[context_idx],
639                             (layers_group_id_m == 1) ? TimeStamp::EO1_PFW_END:
640                                                        TimeStamp::EO2_PFW_END);
641             }
642             else
643             {
644                 // If there is no work, reset start event time
645                 ResetEvent(current_frame_idx_m[context_idx],
646                            (layers_group_id_m == 1) ? TimeStamp::EO1_PFW_START:
647                                                       TimeStamp::EO2_PFW_START);
648             }
650             return has_work;
651         }
652         case CallType::CLEANUP:
653         {
654             return k_cleanup_m->Wait();
655             break;
656         }
657         default:
658             return false;
659     }
661     return false;
664 bool ExecutionObject::Impl::AddCallback(CallType ct, void *user_data,
665                                         uint32_t context_idx)
667     switch (ct)
668     {
669         case CallType::PROCESS:
670         {
671             return k_process_m->AddCallback(user_data, context_idx);
672             break;
673         }
674         default:
675             return false;
676     }
678     return false;
681 uint64_t ExecutionObject::Impl::GetProcessCycles(uint32_t context_idx) const
683     uint8_t factor = 1;
685     // ARP32 running at half frequency of VCOP, multiply by 2 for VCOP cycles
686     if (device_type_m == DeviceType::EVE)
687         factor = 2;
689     OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get() +
690                                        context_idx;
691     return p_params->cycles * factor;
694 //
695 // Write the trace data to output files
696 //
697 void
698 ExecutionObject::Impl::WriteLayerOutputsToFile(const std::string& filename_prefix) const
700     if (trace_buf_params_sz_m == 0)
701         return;
703     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
705     for (uint32_t i = 0; i < num_network_layers_m; i++)
706         for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
707         {
708             OCL_TIDL_BufParams* buf = &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
710             if (buf->bufferId == UINT_MAX)
711                 continue;
713             size_t buffer_size = buf->numChannels * buf->ROIHeight *
714                                  buf->ROIWidth;
716             char *tmp = new char[buffer_size];
718             if (tmp == nullptr)
719                 throw Exception("Out of memory, new failed",
720                         __FILE__, __FUNCTION__, __LINE__);
722             writeDataS8(
723                 tmp,
724                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
725                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
726                 + OCL_TIDL_MAX_PAD_SIZE,
727                 buf->numChannels,
728                 buf->ROIWidth,
729                 buf->ROIHeight,
730                 buf->bufPlaneWidth,
731                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
732                  buf->numChannels));
734             std::string filename(filename_prefix);
735             filename += std::to_string(buf->bufferId) + "_";
736             filename += std::to_string(buf->ROIWidth) + "x";
737             filename += std::to_string(buf->ROIHeight) + ".bin";
739             std::ofstream ofs;
740             ofs.open(filename, std::ofstream::out);
741             ofs.write(tmp, buffer_size);
742             ofs.close();
744             delete[] tmp;
745         }
749 const LayerOutput* ExecutionObject::Impl::GetOutputFromLayer(
750                             uint32_t layer_index, uint32_t output_index) const
752     if (trace_buf_params_sz_m == 0)
753         return nullptr;
755     if (layer_index > num_network_layers_m || output_index > TIDL_NUM_OUT_BUFS)
756         return nullptr;
758     OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
759     OCL_TIDL_BufParams* buf = &bufferParams[layer_index*TIDL_NUM_OUT_BUFS+
760                                             output_index];
762     if (buf->bufferId == UINT_MAX)
763         return nullptr;
765     size_t buffer_size = buf->numChannels * buf->ROIHeight *
766                          buf->ROIWidth;
768     char *data = new char[buffer_size];
770     if (data == nullptr)
771         throw Exception("Out of memory, new failed",
772                 __FILE__, __FUNCTION__, __LINE__);
774     writeDataS8(data,
775                 (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
776                 + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
777                 + OCL_TIDL_MAX_PAD_SIZE,
778                 buf->numChannels,
779                 buf->ROIWidth,
780                 buf->ROIHeight,
781                 buf->bufPlaneWidth,
782                 ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
783                  buf->numChannels));
785     return new LayerOutput(layer_index, output_index, buf->bufferId,
786                            buf->numROIs, buf->numChannels, buf->ROIHeight,
787                            buf->ROIWidth, data);
790 const LayerOutputs* ExecutionObject::Impl::GetOutputsFromAllLayers() const
792     LayerOutputs* result = new LayerOutputs;
794     for (uint32_t i=0; i < num_network_layers_m; i++)
795         for (int j=0; j < TIDL_NUM_OUT_BUFS; j++)
796         {
797             const LayerOutput* lo = GetOutputFromLayer(i, j);
798             if (lo)
799                 result->push_back(std::unique_ptr<const LayerOutput>{ lo });
800         }
802     return result;
805 LayerOutput::LayerOutput(int layer_index, int output_index, int buffer_id,
806                          int num_roi, int num_channels, size_t height,
807                          size_t width, const char* data):
808                         layer_index_m(layer_index), buffer_id_m(buffer_id),
809                         num_roi_m(num_roi), num_channels_m(num_channels),
810                         height_m(height), width_m(width), data_m(data)
811 { }
813 LayerOutput::~LayerOutput()
815     delete[] data_m;
818 void ExecutionObject::Impl::AcquireContext(uint32_t& context_idx)
820     std::unique_lock<std::mutex> lock(mutex_access_m);
821     cv_access_m.wait(lock, [this]{ return this->idle_encoding_m <
822                                    (1 << tidl::internal::NUM_CONTEXTS) - 1; });
824     for (uint32_t i = 0; i < tidl::internal::NUM_CONTEXTS; i++)
825         if (((1 << i) & idle_encoding_m) == 0)
826         {
827             context_idx = i;
828             break;
829         }
830     idle_encoding_m |= (1 << context_idx);  // mark the bit as busy
833 void ExecutionObject::Impl::ReleaseContext(uint32_t context_idx)
835     {
836         std::unique_lock<std::mutex> lock(mutex_access_m);
837         idle_encoding_m &= (~(1 << context_idx));  // mark the bit as free
838     }
839     cv_access_m.notify_all();