1 /******************************************************************************
2 * Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
29 #include <assert.h>
30 #include <mutex>
31 #include <condition_variable>
32 #include "device_arginfo.h"
33 #include "execution_object_pipeline.h"
34 #include "parameters.h"
35 #include "util.h"
37 using namespace tidl;
39 class ExecutionObjectPipeline::Impl
40 {
41 public:
42 Impl(std::vector<ExecutionObject*> &eos);
43 ~Impl();
45 void SetInputOutputBuffer(const ArgInfo &in, const ArgInfo &out);
46 bool RunAsyncStart();
47 bool RunAsyncNext();
48 bool Wait();
50 // Trace related
51 void WriteLayerOutputsToFile(const std::string& filename_prefix) const;
52 const LayerOutput* GetOutputFromLayer(uint32_t layer_index,
53 uint32_t output_index) const;
54 const LayerOutputs* GetOutputsFromAllLayers() const;
56 //! for pipelined execution
57 std::vector<ExecutionObject*> eos_m;
58 std::vector<IODeviceArgInfo*> iobufs_m;
60 std::string device_name_m;
62 //! current frame index
63 int frame_idx_m;
65 //! current execution object index, and it context index
66 uint32_t curr_eo_idx_m;
67 uint32_t curr_eo_context_idx_m;
69 private:
70 //! @brief Initialize ExecutionObjectPipeline with given
71 //! ExecutionObjects: check consecutive layersGroup, allocate memory
72 void Initialize();
74 // flag, mutex and cond var for signaling completion and waiting
75 bool has_work_m, is_processed_m;
76 std::mutex mutex_m;
77 std::condition_variable cv_m;
78 };
80 ExecutionObjectPipeline::ExecutionObjectPipeline(
81 std::vector<ExecutionObject*> eos)
82 {
83 pimpl_m = std::unique_ptr<Impl> { new Impl(eos) };
84 }
86 ExecutionObjectPipeline::Impl::Impl(std::vector<ExecutionObject *> &eos) :
87 eos_m(eos), has_work_m(false), is_processed_m(false)
88 {
89 Initialize();
90 }
92 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
93 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
94 // unique_ptr's destructor requires a complete type in order to invoke delete
95 ExecutionObjectPipeline::~ExecutionObjectPipeline() = default;
97 char* ExecutionObjectPipeline::GetInputBufferPtr() const
98 {
99 return static_cast<char *>(pimpl_m->iobufs_m.front()->GetArg().ptr());
100 }
102 uint32_t ExecutionObjectPipeline::GetNumExecutionObjects() const
103 {
104 return pimpl_m->eos_m.size();
105 }
107 size_t ExecutionObjectPipeline::GetInputBufferSizeInBytes() const
108 {
109 return pimpl_m->eos_m.front()->GetInputBufferSizeInBytes();
110 }
112 char* ExecutionObjectPipeline::GetOutputBufferPtr() const
113 {
114 return static_cast<char *>(pimpl_m->iobufs_m.back()->GetArg().ptr());
115 }
117 size_t ExecutionObjectPipeline::GetOutputBufferSizeInBytes() const
118 {
119 return pimpl_m->eos_m.back()->GetOutputBufferSizeInBytes();
120 }
122 void ExecutionObjectPipeline::SetInputOutputBuffer(const ArgInfo& in,
123 const ArgInfo& out)
124 {
125 assert(in.ptr() != nullptr && in.size() >= GetInputBufferSizeInBytes());
126 assert(out.ptr() != nullptr && out.size() >= GetOutputBufferSizeInBytes());
127 pimpl_m->SetInputOutputBuffer(in, out);
128 }
130 void ExecutionObjectPipeline::SetFrameIndex(int idx)
131 {
132 pimpl_m->frame_idx_m = idx;
133 }
135 int ExecutionObjectPipeline::GetFrameIndex() const
136 {
137 return pimpl_m->frame_idx_m;
138 }
140 bool ExecutionObjectPipeline::ProcessFrameStartAsync()
141 {
142 RecordEvent(pimpl_m->frame_idx_m, TimeStamp::EOP_PFSA_START);
144 assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
145 bool st = pimpl_m->RunAsyncStart();
146 if (st)
147 st = pimpl_m->eos_m[0]->AddCallback(ExecutionObject::CallType::PROCESS,
148 this, pimpl_m->curr_eo_context_idx_m);
150 RecordEvent(pimpl_m->frame_idx_m, TimeStamp::EOP_PFSA_END);
151 return st;
152 }
154 bool ExecutionObjectPipeline::ProcessFrameWait()
155 {
156 return pimpl_m->Wait();
157 }
159 void CallbackWrapper(void *user_data)
160 {
161 int frame_index = ((ExecutionObjectPipeline *) user_data)->GetFrameIndex();
162 RecordEvent(frame_index, TimeStamp::EOP_RAN_START);
164 ((ExecutionObjectPipeline *) user_data)->RunAsyncNext();
166 RecordEvent(frame_index, TimeStamp::EOP_RAN_END);
167 }
169 void ExecutionObjectPipeline::RunAsyncNext()
170 {
171 bool has_next = pimpl_m->RunAsyncNext();
172 if (has_next)
173 pimpl_m->eos_m[pimpl_m->curr_eo_idx_m]->AddCallback(
174 ExecutionObject::CallType::PROCESS, this,
175 pimpl_m->curr_eo_context_idx_m);
176 }
178 const std::string& ExecutionObjectPipeline::GetDeviceName() const
179 {
180 return pimpl_m->device_name_m;
181 }
183 void
184 ExecutionObjectPipeline::WriteLayerOutputsToFile(
185 const std::string& filename_prefix) const
186 {
187 pimpl_m->WriteLayerOutputsToFile(filename_prefix);
188 }
190 const LayerOutput*
191 ExecutionObjectPipeline::GetOutputFromLayer(uint32_t layer_index,
192 uint32_t output_index) const
193 {
194 return pimpl_m->GetOutputFromLayer(layer_index, output_index);
195 }
197 const LayerOutputs*
198 ExecutionObjectPipeline::GetOutputsFromAllLayers() const
199 {
200 return pimpl_m->GetOutputsFromAllLayers();
201 }
204 /// Impl methods start here
207 static
208 void* AllocateMem(size_t size)
209 {
210 if (size == 0) return nullptr;
211 void *ptr = malloc(size);
212 if (ptr == nullptr)
213 throw Exception("Out of memory, ExecutionObjectPipeline malloc failed",
214 __FILE__, __FUNCTION__, __LINE__);
215 return ptr;
216 }
218 void ExecutionObjectPipeline::Impl::Initialize()
219 {
220 // Check consecutive layersGroups to form a pipeline
221 int prev_group = 0;
222 for (auto eo : eos_m)
223 {
224 int group = eo->GetLayersGroupId();
225 if (prev_group != 0 && group != prev_group + 1)
226 throw Exception(
227 "Non-consecutive layersGroupIds in ExecutionObjectPipeline",
228 __FILE__, __FUNCTION__, __LINE__);
229 prev_group = group;
230 }
232 for (auto eo : eos_m)
233 device_name_m += eo->GetDeviceName() + "+";
234 device_name_m.resize(device_name_m.size() - 1);
236 // Allocate input and output memory for EOs/layersGroups
237 // Note that i-th EO's output buffer is the same as (i+1)-th EO's input
238 // So, if n EOs, then (n+1) buffers: b EO b EO b EO b ... EO b
239 // User must set the first input buffer and the last output buffer
240 size_t size;
241 ArgInfo in(nullptr, 0);
242 iobufs_m.push_back(new IODeviceArgInfo(in));
243 for (auto eo : eos_m)
244 {
245 if (eo != eos_m.back())
246 size = eo->GetOutputBufferSizeInBytes();
247 else
248 size = 0;
250 void *ptr = AllocateMem(size);
251 ArgInfo out(ptr, size);
252 iobufs_m.push_back(new IODeviceArgInfo(out));
253 }
254 }
256 ExecutionObjectPipeline::Impl::~Impl()
257 {
258 int num_iobufs = iobufs_m.size();
259 for (int i = 0; i < num_iobufs; i++)
260 {
261 if (! (i == 0 || i == num_iobufs-1))
262 free(iobufs_m[i]->GetArg().ptr());
263 delete iobufs_m[i];
264 }
265 }
267 void ExecutionObjectPipeline::Impl::SetInputOutputBuffer(const ArgInfo &in,
268 const ArgInfo &out)
269 {
270 delete iobufs_m.front();
271 delete iobufs_m.back();
272 iobufs_m.front() = new IODeviceArgInfo(in);
273 iobufs_m.back() = new IODeviceArgInfo(out);
274 }
276 // Start execution on the first EO in the pipeline. Callbacks are used
277 // to trigger execution on subsequent EOs
278 bool ExecutionObjectPipeline::Impl::RunAsyncStart()
279 {
280 has_work_m = true;
281 is_processed_m = false;
282 curr_eo_idx_m = 0;
283 return eos_m[0]->AcquireAndRunContext(curr_eo_context_idx_m,
284 frame_idx_m,
285 *iobufs_m[0], *iobufs_m[1]);
286 }
288 // Invoked via the callback function, CallbackWrapper. Used to advance the
289 // pipeline.
290 // returns true if we have more EOs to execute
291 bool ExecutionObjectPipeline::Impl::RunAsyncNext()
292 {
293 eos_m[curr_eo_idx_m]->WaitAndReleaseContext(curr_eo_context_idx_m);
294 curr_eo_idx_m += 1;
295 if (curr_eo_idx_m < eos_m.size())
296 {
297 eos_m[curr_eo_idx_m]->AcquireAndRunContext(curr_eo_context_idx_m,
298 frame_idx_m,
299 *iobufs_m[curr_eo_idx_m],
300 *iobufs_m[curr_eo_idx_m+1]);
301 return true;
302 }
303 else
304 {
305 {
306 std::lock_guard<std::mutex> lock(mutex_m);
307 is_processed_m = true;
308 }
309 cv_m.notify_all();
310 return false;
311 }
312 }
314 bool ExecutionObjectPipeline::Impl::Wait()
315 {
316 if (! has_work_m) return false;
318 RecordEvent(frame_idx_m, TimeStamp::EOP_PFW_START);
320 std::unique_lock<std::mutex> lock(mutex_m);
321 cv_m.wait(lock, [this]{ return this->is_processed_m; });
322 has_work_m = false;
324 RecordEvent(frame_idx_m, TimeStamp::EOP_PFW_END);
326 return true;
327 }
329 void
330 ExecutionObjectPipeline::Impl::WriteLayerOutputsToFile(
331 const std::string& filename_prefix) const
332 {
333 for (auto eo : eos_m)
334 eo->WriteLayerOutputsToFile(filename_prefix);
335 }
337 const LayerOutput*
338 ExecutionObjectPipeline::Impl::GetOutputFromLayer(uint32_t layer_index,
339 uint32_t output_index) const
340 {
341 const LayerOutput* lo = nullptr;
342 for (auto eo : eos_m)
343 {
344 lo = eo->GetOutputFromLayer(layer_index, output_index);
345 if (lo != nullptr) break;
346 }
347 return lo;
348 }
350 const LayerOutputs*
351 ExecutionObjectPipeline::Impl::GetOutputsFromAllLayers() const
352 {
353 LayerOutputs *all = new LayerOutputs;
354 for (auto eo : eos_m)
355 {
356 LayerOutputs *los = const_cast<LayerOutputs *>(
357 eo->GetOutputsFromAllLayers());
358 for (auto& lo : *los)
359 all->push_back(std::unique_ptr<const LayerOutput>{ lo.release() });
360 delete los;
361 }
362 return all;
363 }