ExecutionObjectPipeline for executing layersGroups
[tidl/tidl-api.git] / tidl_api / src / execution_object_pipeline.cpp
1 /******************************************************************************
2  * Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
3  *  All rights reserved.
4  *
5  *  Redistribution and use in source and binary forms, with or without
6  *  modification, are permitted provided that the following conditions are met:
7  *      * Redistributions of source code must retain the above copyright
8  *        notice, this list of conditions and the following disclaimer.
9  *      * Redistributions in binary form must reproduce the above copyright
10  *        notice, this list of conditions and the following disclaimer in the
11  *        documentation and/or other materials provided with the distribution.
12  *      * Neither the name of Texas Instruments Incorporated nor the
13  *        names of its contributors may be used to endorse or promote products
14  *        derived from this software without specific prior written permission.
15  *
16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  *  THE POSSIBILITY OF SUCH DAMAGE.
27  *****************************************************************************/
29 #include <assert.h>
30 #include <mutex>
31 #include <condition_variable>
32 #include <chrono>
33 #include "device_arginfo.h"
34 #include "execution_object_pipeline.h"
36 using namespace tidl;
38 class ExecutionObjectPipeline::Impl
39 {
40     public:
41         Impl(std::vector<ExecutionObject*> &eos);
42         ~Impl();
44         void SetInputOutputBuffer(const ArgInfo &in, const ArgInfo &out);
45         bool RunAsyncStart();
46         bool RunAsyncNext();
47         bool Wait();
49         // Trace related
50         void WriteLayerOutputsToFile(const std::string& filename_prefix) const;
51         const LayerOutput* GetOutputFromLayer(uint32_t layer_index,
52                                               uint32_t output_index) const;
53         const LayerOutputs* GetOutputsFromAllLayers() const;
55         //! for pipelined execution
56         std::vector<ExecutionObject*> eos_m;
57         std::vector<IODeviceArgInfo*> iobufs_m;
59         std::string device_name_m;
61         //! current frame index
62         int frame_idx_m;
64         //! current execution object index
65         uint32_t curr_eo_idx_m;
67         // host time tracking: pipeline start to finish
68         float host_time_m;
70     private:
71         //! @brief Initialize ExecutionObjectPipeline with given
72         //! ExecutionObjects: check consecutive layersGroup, allocate memory
73         void Initialize();
75         // flag, mutex and cond var for signaling completion and waiting
76         bool has_work_m, is_processed_m;
77         std::mutex mutex_m;
78         std::condition_variable cv_m;
80         // host time tracking: pipeline start to finish
81         std::chrono::time_point<std::chrono::steady_clock> start_m;
82 };
84 ExecutionObjectPipeline::ExecutionObjectPipeline(
85     std::vector<ExecutionObject*> eos)
86 {
87     pimpl_m = std::unique_ptr<Impl> { new Impl(eos) };
88 }
90 ExecutionObjectPipeline::Impl::Impl(std::vector<ExecutionObject *> &eos) :
91     eos_m(eos), has_work_m(false), is_processed_m(false)
92 {
93     Initialize();
94 }
96 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
97 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
98 // unique_ptr's destructor requires a complete type in order to invoke delete
99 ExecutionObjectPipeline::~ExecutionObjectPipeline() = default;
101 char* ExecutionObjectPipeline::GetInputBufferPtr() const
103     return static_cast<char *>(pimpl_m->iobufs_m.front()->GetArg().ptr());
106 size_t ExecutionObjectPipeline::GetInputBufferSizeInBytes() const
108     return pimpl_m->eos_m.front()->GetInputBufferSizeInBytes();
111 char* ExecutionObjectPipeline::GetOutputBufferPtr() const
113     return static_cast<char *>(pimpl_m->iobufs_m.back()->GetArg().ptr());
116 size_t ExecutionObjectPipeline::GetOutputBufferSizeInBytes() const
118     return pimpl_m->eos_m.back()->GetOutputBufferSizeInBytes();
121 void ExecutionObjectPipeline::SetInputOutputBuffer(const ArgInfo& in,
122                                                    const ArgInfo& out)
124     assert(in.ptr() != nullptr  && in.size() >= GetInputBufferSizeInBytes());
125     assert(out.ptr() != nullptr && out.size() >= GetOutputBufferSizeInBytes());
126     pimpl_m->SetInputOutputBuffer(in, out);
129 void ExecutionObjectPipeline::SetFrameIndex(int idx)
131     pimpl_m->frame_idx_m = idx;
134 int ExecutionObjectPipeline::GetFrameIndex() const
136     return pimpl_m->frame_idx_m;
139 bool ExecutionObjectPipeline::ProcessFrameStartAsync()
141     assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
142     bool st = pimpl_m->RunAsyncStart();
143     if (st)
144         st = pimpl_m->eos_m[0]->AddCallback(ExecutionObject::CallType::PROCESS,
145                                             this);
146     return st;
149 bool ExecutionObjectPipeline::ProcessFrameWait()
151     return pimpl_m->Wait();
154 void CallbackWrapper(void *user_data)
156     ((ExecutionObjectPipeline *) user_data)->RunAsyncNext();
159 void ExecutionObjectPipeline::RunAsyncNext()
161     bool has_next = pimpl_m->RunAsyncNext();
162     if (has_next)
163         pimpl_m->eos_m[pimpl_m->curr_eo_idx_m]->AddCallback(
164                                      ExecutionObject::CallType::PROCESS, this);
167 float ExecutionObjectPipeline::GetProcessTimeInMilliSeconds() const
169     float total = 0.0f;
170     for (auto eo : pimpl_m->eos_m)
171         total += eo->GetProcessTimeInMilliSeconds();
172     return total;
175 float ExecutionObjectPipeline::GetHostProcessTimeInMilliSeconds() const
177     return pimpl_m->host_time_m;
180 const std::string& ExecutionObjectPipeline::GetDeviceName() const
182     return pimpl_m->device_name_m;
185 void
186 ExecutionObjectPipeline::WriteLayerOutputsToFile(
187     const std::string& filename_prefix) const
189     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
192 const LayerOutput*
193 ExecutionObjectPipeline::GetOutputFromLayer(uint32_t layer_index,
194     uint32_t output_index) const
196     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
199 const LayerOutputs*
200 ExecutionObjectPipeline::GetOutputsFromAllLayers() const
202     return pimpl_m->GetOutputsFromAllLayers();
206 /// Impl methods start here
209 static
210 void* AllocateMem(size_t size)
212     if (size == 0)  return nullptr;
213     void *ptr = malloc(size);
214     if (ptr == nullptr)
215         throw Exception("Out of memory, ExecutionObjectPipeline malloc failed",
216                         __FILE__, __FUNCTION__, __LINE__);
217     return ptr;
220 void ExecutionObjectPipeline::Impl::Initialize()
222     // Check consecutive layersGroups to form a pipeline
223     int prev_group = 0;
224     for (auto eo : eos_m)
225     {
226         int group = eo->GetLayersGroupId();
227         if (prev_group != 0 && group != prev_group + 1)
228             throw Exception(
229                 "Non-consecutive layersGroupIds in ExecutionObjectPipeline",
230                 __FILE__, __FUNCTION__, __LINE__);
231         prev_group = group;
232     }
234     for (auto eo : eos_m)
235         device_name_m += eo->GetDeviceName() + "+";
236     device_name_m.resize(device_name_m.size() - 1);
238     // Allocate input and output memory for EOs/layersGroups
239     // Note that i-th EO's output buffer is the same as (i+1)-th EO's input
240     // So, if n EOs, then (n+1) buffers: b EO b EO b EO b ... EO b
241     // User must set the first input buffer and the last output buffer
242     size_t size;
243     ArgInfo in(nullptr, 0);
244     iobufs_m.push_back(new IODeviceArgInfo(in));
245     for (auto eo : eos_m)
246     {
247         if (eo != eos_m.back())
248             size = eo->GetOutputBufferSizeInBytes();
249         else
250             size = 0;
252         void *ptr = AllocateMem(size);
253         ArgInfo out(ptr, size);
254         iobufs_m.push_back(new IODeviceArgInfo(out));
255     }
258 ExecutionObjectPipeline::Impl::~Impl()
260     int num_iobufs = iobufs_m.size();
261     for (int i = 0; i < num_iobufs; i++)
262     {
263         if (! (i == 0 || i == num_iobufs-1))
264             free(iobufs_m[i]->GetArg().ptr());
265         delete iobufs_m[i];
266     }
269 void ExecutionObjectPipeline::Impl::SetInputOutputBuffer(const ArgInfo &in,
270                                                          const ArgInfo &out)
272     delete iobufs_m.front();
273     delete iobufs_m.back();
274     iobufs_m.front() = new IODeviceArgInfo(in);
275     iobufs_m.back()  = new IODeviceArgInfo(out);
278 bool ExecutionObjectPipeline::Impl::RunAsyncStart()
280     start_m = std::chrono::steady_clock::now();
281     has_work_m = true;
282     is_processed_m = false;
283     host_time_m = 0.0f;
284     curr_eo_idx_m = 0;
285     eos_m[0]->AcquireLock();
286     eos_m[0]->SetInputOutputBuffer(iobufs_m[0], iobufs_m[1]);
287     return eos_m[0]->ProcessFrameStartAsync();
290 // returns true if we have more EOs to execute
291 bool ExecutionObjectPipeline::Impl::RunAsyncNext()
293     eos_m[curr_eo_idx_m]->ProcessFrameWait();
294     eos_m[curr_eo_idx_m]->ReleaseLock();
295     curr_eo_idx_m += 1;
296     if (curr_eo_idx_m < eos_m.size())
297     {
298         eos_m[curr_eo_idx_m]->AcquireLock();
299         eos_m[curr_eo_idx_m]->SetInputOutputBuffer(iobufs_m[curr_eo_idx_m],
300                                                    iobufs_m[curr_eo_idx_m+1]);
301         eos_m[curr_eo_idx_m]->ProcessFrameStartAsync();
302         return true;
303     }
304     else
305     {
306         std::chrono::duration<float> elapsed = std::chrono::steady_clock::now()
307                                                - start_m;
308         host_time_m = elapsed.count() * 1000;  // seconds to milliseconds
309         is_processed_m = true;
310         cv_m.notify_all();
311         return false;
312     }
315 bool ExecutionObjectPipeline::Impl::Wait()
317     if (! has_work_m)  return false;
319     std::unique_lock<std::mutex> lock(mutex_m);
320     cv_m.wait(lock, [this]{ return this->is_processed_m; });
321     has_work_m = false;
322     return true;
325 void
326 ExecutionObjectPipeline::Impl::WriteLayerOutputsToFile(
327     const std::string& filename_prefix) const
329     for (auto eo : eos_m)
330         eo->WriteLayerOutputsToFile(filename_prefix);
333 const LayerOutput*
334 ExecutionObjectPipeline::Impl::GetOutputFromLayer(uint32_t layer_index,
335     uint32_t output_index) const
337     const LayerOutput* lo = nullptr;
338     for (auto eo : eos_m)
339     {
340         lo = eo->GetOutputFromLayer(layer_index, output_index);
341         if (lo != nullptr)  break;
342     }
343     return lo;
346 const LayerOutputs*
347 ExecutionObjectPipeline::Impl::GetOutputsFromAllLayers() const
349     LayerOutputs *all = new LayerOutputs;
350     for (auto eo : eos_m)
351     {
352         LayerOutputs *los = const_cast<LayerOutputs *>(
353                                                 eo->GetOutputsFromAllLayers());
354         for (auto& lo : *los)
355             all->push_back(std::unique_ptr<const LayerOutput>{ lo.release() });
356         delete los;
357     }
358     return all;