]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - tidl/tidl-api.git/blob - tidl_api/src/execution_object_pipeline.cpp
Fix g++ 8.3.0 compilation error
[tidl/tidl-api.git] / tidl_api / src / execution_object_pipeline.cpp
1 /******************************************************************************
2  * Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *      * Redistributions of source code must retain the above copyright
8  *        notice, this list of conditions and the following disclaimer.
9  *      * Redistributions in binary form must reproduce the above copyright
10  *        notice, this list of conditions and the following disclaimer in the
11  *        documentation and/or other materials provided with the distribution.
12  *      * Neither the name of Texas Instruments Incorporated nor the
13  *        names of its contributors may be used to endorse or promote products
14  *        derived from this software without specific prior written permission.
15  *
16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  *  THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
29 #include <assert.h>
30 #include <mutex>
31 #include <condition_variable>
32 #include "device_arginfo.h"
33 #include "execution_object_pipeline.h"
34 #include "parameters.h"
35 #include "util.h"
37 using namespace tidl;
39 class ExecutionObjectPipeline::Impl
40 {
41     public:
42         Impl(std::vector<ExecutionObject*> &eos);
43         ~Impl();
45         void SetInputOutputBuffer(const ArgInfo &in, const ArgInfo &out);
46         bool RunAsyncStart();
47         bool RunAsyncNext();
48         bool Wait();
50         // Trace related
51         void WriteLayerOutputsToFile(const std::string& filename_prefix) const;
52         const LayerOutput* GetOutputFromLayer(uint32_t layer_index,
53                                               uint32_t output_index) const;
54         const LayerOutputs* GetOutputsFromAllLayers() const;
56         //! for pipelined execution
57         std::vector<ExecutionObject*> eos_m;
58         std::vector<IODeviceArgInfo*> iobufs_m;
60         std::string device_name_m;
62         //! current frame index
63         int frame_idx_m;
65         //! current execution object index, and it context index
66         uint32_t curr_eo_idx_m;
67         uint32_t curr_eo_context_idx_m;
69     private:
70         //! @brief Initialize ExecutionObjectPipeline with given
71         //! ExecutionObjects: check consecutive layersGroup, allocate memory
72         void Initialize();
74         // flag, mutex and cond var for signaling completion and waiting
75         bool has_work_m, is_processed_m;
76         std::mutex mutex_m;
77         std::condition_variable cv_m;
78 };
80 ExecutionObjectPipeline::ExecutionObjectPipeline(
81     std::vector<ExecutionObject*> eos)
82 {
83     pimpl_m = std::unique_ptr<Impl> { new Impl(eos) };
84 }
86 ExecutionObjectPipeline::Impl::Impl(std::vector<ExecutionObject *> &eos) :
87     eos_m(eos), has_work_m(false), is_processed_m(false)
88 {
89     Initialize();
90 }
92 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
93 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
94 // unique_ptr's destructor requires a complete type in order to invoke delete
95 ExecutionObjectPipeline::~ExecutionObjectPipeline() = default;
97 char* ExecutionObjectPipeline::GetInputBufferPtr() const
98 {
99     return static_cast<char *>(pimpl_m->iobufs_m.front()->GetArg().ptr());
102 uint32_t ExecutionObjectPipeline::GetNumExecutionObjects() const
104     return pimpl_m->eos_m.size();
107 size_t ExecutionObjectPipeline::GetInputBufferSizeInBytes() const
109     return pimpl_m->eos_m.front()->GetInputBufferSizeInBytes();
112 char* ExecutionObjectPipeline::GetOutputBufferPtr() const
114     return static_cast<char *>(pimpl_m->iobufs_m.back()->GetArg().ptr());
117 size_t ExecutionObjectPipeline::GetOutputBufferSizeInBytes() const
119     return pimpl_m->eos_m.back()->GetOutputBufferSizeInBytes();
122 void ExecutionObjectPipeline::SetInputOutputBuffer(const ArgInfo& in,
123                                                    const ArgInfo& out)
125     assert(in.ptr() != nullptr  && in.size() >= GetInputBufferSizeInBytes());
126     assert(out.ptr() != nullptr && out.size() >= GetOutputBufferSizeInBytes());
127     pimpl_m->SetInputOutputBuffer(in, out);
130 void ExecutionObjectPipeline::SetFrameIndex(int idx)
132     pimpl_m->frame_idx_m = idx;
135 int ExecutionObjectPipeline::GetFrameIndex() const
137     return pimpl_m->frame_idx_m;
140 bool ExecutionObjectPipeline::ProcessFrameStartAsync()
142     RecordEvent(pimpl_m->frame_idx_m, TimeStamp::EOP_PFSA_START);
144     assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
145     bool st = pimpl_m->RunAsyncStart();
146     if (st)
147         st = pimpl_m->eos_m[0]->AddCallback(ExecutionObject::CallType::PROCESS,
148                                          this, pimpl_m->curr_eo_context_idx_m);
150     RecordEvent(pimpl_m->frame_idx_m, TimeStamp::EOP_PFSA_END);
151     return st;
154 bool ExecutionObjectPipeline::ProcessFrameWait()
156     return pimpl_m->Wait();
159 void CallbackWrapper(void *user_data)
161     int frame_index = ((ExecutionObjectPipeline *) user_data)->GetFrameIndex();
162     RecordEvent(frame_index, TimeStamp::EOP_RAN_START);
164     ((ExecutionObjectPipeline *) user_data)->RunAsyncNext();
166     RecordEvent(frame_index, TimeStamp::EOP_RAN_END);
169 void ExecutionObjectPipeline::RunAsyncNext()
171     bool has_next = pimpl_m->RunAsyncNext();
172     if (has_next)
173         pimpl_m->eos_m[pimpl_m->curr_eo_idx_m]->AddCallback(
174                                      ExecutionObject::CallType::PROCESS, this,
175                                      pimpl_m->curr_eo_context_idx_m);
178 const std::string& ExecutionObjectPipeline::GetDeviceName() const
180     return pimpl_m->device_name_m;
183 void
184 ExecutionObjectPipeline::WriteLayerOutputsToFile(
185     const std::string& filename_prefix) const
187     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
190 const LayerOutput*
191 ExecutionObjectPipeline::GetOutputFromLayer(uint32_t layer_index,
192     uint32_t output_index) const
194     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
197 const LayerOutputs*
198 ExecutionObjectPipeline::GetOutputsFromAllLayers() const
200     return pimpl_m->GetOutputsFromAllLayers();
204 /// Impl methods start here
207 static
208 void* AllocateMem(size_t size)
210     if (size == 0)  return nullptr;
211     void *ptr = malloc(size);
212     if (ptr == nullptr)
213         throw Exception("Out of memory, ExecutionObjectPipeline malloc failed",
214                         __FILE__, __FUNCTION__, __LINE__);
215     return ptr;
218 void ExecutionObjectPipeline::Impl::Initialize()
220     // Check consecutive layersGroups to form a pipeline
221     int prev_group = 0;
222     for (auto eo : eos_m)
223     {
224         int group = eo->GetLayersGroupId();
225         if (prev_group != 0 && group != prev_group + 1)
226             throw Exception(
227                 "Non-consecutive layersGroupIds in ExecutionObjectPipeline",
228                 __FILE__, __FUNCTION__, __LINE__);
229         prev_group = group;
230     }
232     for (auto eo : eos_m)
233         device_name_m += eo->GetDeviceName() + "+";
234     device_name_m.resize(device_name_m.size() - 1);
236     // Allocate input and output memory for EOs/layersGroups
237     // Note that i-th EO's output buffer is the same as (i+1)-th EO's input
238     // So, if n EOs, then (n+1) buffers: b EO b EO b EO b ... EO b
239     // User must set the first input buffer and the last output buffer
240     size_t size;
241     ArgInfo in(nullptr, 0);
242     iobufs_m.push_back(new IODeviceArgInfo(in));
243     for (auto eo : eos_m)
244     {
245         if (eo != eos_m.back())
246             size = eo->GetOutputBufferSizeInBytes();
247         else
248             size = 0;
250         void *ptr = AllocateMem(size);
251         ArgInfo out(ptr, size);
252         iobufs_m.push_back(new IODeviceArgInfo(out));
253     }
256 ExecutionObjectPipeline::Impl::~Impl()
258     int num_iobufs = iobufs_m.size();
259     for (int i = 0; i < num_iobufs; i++)
260     {
261         if (! (i == 0 || i == num_iobufs-1))
262             free(iobufs_m[i]->GetArg().ptr());
263         delete iobufs_m[i];
264     }
267 void ExecutionObjectPipeline::Impl::SetInputOutputBuffer(const ArgInfo &in,
268                                                          const ArgInfo &out)
270     delete iobufs_m.front();
271     delete iobufs_m.back();
272     iobufs_m.front() = new IODeviceArgInfo(in);
273     iobufs_m.back()  = new IODeviceArgInfo(out);
276 // Start execution on the first EO in the pipeline. Callbacks are used
277 // to trigger execution on subsequent EOs
278 bool ExecutionObjectPipeline::Impl::RunAsyncStart()
280     has_work_m = true;
281     is_processed_m = false;
282     curr_eo_idx_m = 0;
283     return eos_m[0]->AcquireAndRunContext(curr_eo_context_idx_m,
284                                           frame_idx_m,
285                                           *iobufs_m[0], *iobufs_m[1]);
288 // Invoked via the callback function, CallbackWrapper. Used to advance the
289 // pipeline.
290 // returns true if we have more EOs to execute
291 bool ExecutionObjectPipeline::Impl::RunAsyncNext()
293     eos_m[curr_eo_idx_m]->WaitAndReleaseContext(curr_eo_context_idx_m);
294     curr_eo_idx_m += 1;
295     if (curr_eo_idx_m < eos_m.size())
296     {
297         eos_m[curr_eo_idx_m]->AcquireAndRunContext(curr_eo_context_idx_m,
298                                                    frame_idx_m,
299                                                    *iobufs_m[curr_eo_idx_m],
300                                                    *iobufs_m[curr_eo_idx_m+1]);
301         return true;
302     }
303     else
304     {
305         {
306             std::lock_guard<std::mutex> lock(mutex_m);
307             is_processed_m = true;
308         }
309         cv_m.notify_all();
310         return false;
311     }
314 bool ExecutionObjectPipeline::Impl::Wait()
316     if (! has_work_m)  return false;
318     RecordEvent(frame_idx_m, TimeStamp::EOP_PFW_START);
320     std::unique_lock<std::mutex> lock(mutex_m);
321     cv_m.wait(lock, [this]{ return this->is_processed_m; });
322     has_work_m = false;
324     RecordEvent(frame_idx_m, TimeStamp::EOP_PFW_END);
326     return true;
329 void
330 ExecutionObjectPipeline::Impl::WriteLayerOutputsToFile(
331     const std::string& filename_prefix) const
333     for (auto eo : eos_m)
334         eo->WriteLayerOutputsToFile(filename_prefix);
337 const LayerOutput*
338 ExecutionObjectPipeline::Impl::GetOutputFromLayer(uint32_t layer_index,
339     uint32_t output_index) const
341     const LayerOutput* lo = nullptr;
342     for (auto eo : eos_m)
343     {
344         lo = eo->GetOutputFromLayer(layer_index, output_index);
345         if (lo != nullptr)  break;
346     }
347     return lo;
350 const LayerOutputs*
351 ExecutionObjectPipeline::Impl::GetOutputsFromAllLayers() const
353     LayerOutputs *all = new LayerOutputs;
354     for (auto eo : eos_m)
355     {
356         LayerOutputs *los = const_cast<LayerOutputs *>(
357                                                 eo->GetOutputsFromAllLayers());
358         for (auto& lo : *los)
359             all->push_back(std::unique_ptr<const LayerOutput>{ lo.release() });
360         delete los;
361     }
362     return all;