1 /******************************************************************************
2 * Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
29 #include <assert.h>
30 #include <mutex>
31 #include <condition_variable>
32 #include <chrono>
33 #include "device_arginfo.h"
34 #include "execution_object_pipeline.h"
35 #include "parameters.h"
37 using namespace tidl;
39 class ExecutionObjectPipeline::Impl
40 {
41 public:
42 Impl(std::vector<ExecutionObject*> &eos);
43 ~Impl();
45 void SetInputOutputBuffer(const ArgInfo &in, const ArgInfo &out);
46 bool RunAsyncStart();
47 bool RunAsyncNext();
48 bool Wait();
50 // Trace related
51 void WriteLayerOutputsToFile(const std::string& filename_prefix) const;
52 const LayerOutput* GetOutputFromLayer(uint32_t layer_index,
53 uint32_t output_index) const;
54 const LayerOutputs* GetOutputsFromAllLayers() const;
56 //! for pipelined execution
57 std::vector<ExecutionObject*> eos_m;
58 std::vector<IODeviceArgInfo*> iobufs_m;
59 std::vector<float> eo_device_time_m;
60 std::vector<float> eo_host_time_m;
62 std::string device_name_m;
64 //! current frame index
65 int frame_idx_m;
67 //! current execution object index, and it context index
68 uint32_t curr_eo_idx_m;
69 uint32_t curr_eo_context_idx_m;
71 // device and host time tracking: pipeline start to finish
72 float device_time_m;
73 float host_time_m;
75 private:
76 //! @brief Initialize ExecutionObjectPipeline with given
77 //! ExecutionObjects: check consecutive layersGroup, allocate memory
78 void Initialize();
80 // flag, mutex and cond var for signaling completion and waiting
81 bool has_work_m, is_processed_m;
82 std::mutex mutex_m;
83 std::condition_variable cv_m;
85 // host time tracking: pipeline start to finish
86 std::chrono::time_point<std::chrono::steady_clock> start_m;
87 };
89 ExecutionObjectPipeline::ExecutionObjectPipeline(
90 std::vector<ExecutionObject*> eos)
91 {
92 pimpl_m = std::unique_ptr<Impl> { new Impl(eos) };
93 }
95 ExecutionObjectPipeline::Impl::Impl(std::vector<ExecutionObject *> &eos) :
96 eos_m(eos), has_work_m(false), is_processed_m(false)
97 {
98 Initialize();
99 }
101 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
102 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
103 // unique_ptr's destructor requires a complete type in order to invoke delete
104 ExecutionObjectPipeline::~ExecutionObjectPipeline() = default;
106 char* ExecutionObjectPipeline::GetInputBufferPtr() const
107 {
108 return static_cast<char *>(pimpl_m->iobufs_m.front()->GetArg().ptr());
109 }
111 uint32_t ExecutionObjectPipeline::GetNumExecutionObjects() const
112 {
113 return pimpl_m->eos_m.size();
114 }
116 size_t ExecutionObjectPipeline::GetInputBufferSizeInBytes() const
117 {
118 return pimpl_m->eos_m.front()->GetInputBufferSizeInBytes();
119 }
121 char* ExecutionObjectPipeline::GetOutputBufferPtr() const
122 {
123 return static_cast<char *>(pimpl_m->iobufs_m.back()->GetArg().ptr());
124 }
126 size_t ExecutionObjectPipeline::GetOutputBufferSizeInBytes() const
127 {
128 return pimpl_m->eos_m.back()->GetOutputBufferSizeInBytes();
129 }
131 void ExecutionObjectPipeline::SetInputOutputBuffer(const ArgInfo& in,
132 const ArgInfo& out)
133 {
134 assert(in.ptr() != nullptr && in.size() >= GetInputBufferSizeInBytes());
135 assert(out.ptr() != nullptr && out.size() >= GetOutputBufferSizeInBytes());
136 pimpl_m->SetInputOutputBuffer(in, out);
137 }
139 void ExecutionObjectPipeline::SetFrameIndex(int idx)
140 {
141 pimpl_m->frame_idx_m = idx;
142 }
144 int ExecutionObjectPipeline::GetFrameIndex() const
145 {
146 return pimpl_m->frame_idx_m;
147 }
149 bool ExecutionObjectPipeline::ProcessFrameStartAsync()
150 {
151 assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
152 bool st = pimpl_m->RunAsyncStart();
153 if (st)
154 st = pimpl_m->eos_m[0]->AddCallback(ExecutionObject::CallType::PROCESS,
155 this, pimpl_m->curr_eo_context_idx_m);
156 return st;
157 }
159 bool ExecutionObjectPipeline::ProcessFrameWait()
160 {
161 return pimpl_m->Wait();
162 }
164 void CallbackWrapper(void *user_data)
165 {
166 ((ExecutionObjectPipeline *) user_data)->RunAsyncNext();
167 }
169 void ExecutionObjectPipeline::RunAsyncNext()
170 {
171 bool has_next = pimpl_m->RunAsyncNext();
172 if (has_next)
173 pimpl_m->eos_m[pimpl_m->curr_eo_idx_m]->AddCallback(
174 ExecutionObject::CallType::PROCESS, this,
175 pimpl_m->curr_eo_context_idx_m);
176 }
178 float ExecutionObjectPipeline::GetProcessTimeInMilliSeconds() const
179 {
180 return pimpl_m->device_time_m;
181 }
183 float ExecutionObjectPipeline::GetHostProcessTimeInMilliSeconds() const
184 {
185 return pimpl_m->host_time_m;
186 }
188 float ExecutionObjectPipeline::GetProcessTimeInMilliSeconds(
189 uint32_t eo_index) const
190 {
191 assert(eo_index < pimpl_m->eos_m.size());
192 return pimpl_m->eo_device_time_m[eo_index];
193 }
195 float ExecutionObjectPipeline::GetHostProcessTimeInMilliSeconds(
196 uint32_t eo_index) const
197 {
198 assert(eo_index < pimpl_m->eos_m.size());
199 return pimpl_m->eo_host_time_m[eo_index];
200 }
202 const std::string& ExecutionObjectPipeline::GetDeviceName() const
203 {
204 return pimpl_m->device_name_m;
205 }
207 void
208 ExecutionObjectPipeline::WriteLayerOutputsToFile(
209 const std::string& filename_prefix) const
210 {
211 pimpl_m->WriteLayerOutputsToFile(filename_prefix);
212 }
214 const LayerOutput*
215 ExecutionObjectPipeline::GetOutputFromLayer(uint32_t layer_index,
216 uint32_t output_index) const
217 {
218 return pimpl_m->GetOutputFromLayer(layer_index, output_index);
219 }
221 const LayerOutputs*
222 ExecutionObjectPipeline::GetOutputsFromAllLayers() const
223 {
224 return pimpl_m->GetOutputsFromAllLayers();
225 }
228 /// Impl methods start here
231 static
232 void* AllocateMem(size_t size)
233 {
234 if (size == 0) return nullptr;
235 void *ptr = malloc(size);
236 if (ptr == nullptr)
237 throw Exception("Out of memory, ExecutionObjectPipeline malloc failed",
238 __FILE__, __FUNCTION__, __LINE__);
239 return ptr;
240 }
242 void ExecutionObjectPipeline::Impl::Initialize()
243 {
244 // Check consecutive layersGroups to form a pipeline
245 int prev_group = 0;
246 for (auto eo : eos_m)
247 {
248 int group = eo->GetLayersGroupId();
249 if (prev_group != 0 && group != prev_group + 1)
250 throw Exception(
251 "Non-consecutive layersGroupIds in ExecutionObjectPipeline",
252 __FILE__, __FUNCTION__, __LINE__);
253 prev_group = group;
254 }
256 for (auto eo : eos_m)
257 device_name_m += eo->GetDeviceName() + "+";
258 device_name_m.resize(device_name_m.size() - 1);
260 // Allocate input and output memory for EOs/layersGroups
261 // Note that i-th EO's output buffer is the same as (i+1)-th EO's input
262 // So, if n EOs, then (n+1) buffers: b EO b EO b EO b ... EO b
263 // User must set the first input buffer and the last output buffer
264 size_t size;
265 ArgInfo in(nullptr, 0);
266 iobufs_m.push_back(new IODeviceArgInfo(in));
267 for (auto eo : eos_m)
268 {
269 if (eo != eos_m.back())
270 size = eo->GetOutputBufferSizeInBytes();
271 else
272 size = 0;
274 void *ptr = AllocateMem(size);
275 ArgInfo out(ptr, size);
276 iobufs_m.push_back(new IODeviceArgInfo(out));
277 }
279 // Record keeping for each EO's device time and host time
280 // because EO could be shared by another EOP
281 eo_device_time_m.resize(eos_m.size());
282 eo_host_time_m.resize(eos_m.size());
283 }
285 ExecutionObjectPipeline::Impl::~Impl()
286 {
287 int num_iobufs = iobufs_m.size();
288 for (int i = 0; i < num_iobufs; i++)
289 {
290 if (! (i == 0 || i == num_iobufs-1))
291 free(iobufs_m[i]->GetArg().ptr());
292 delete iobufs_m[i];
293 }
294 }
296 void ExecutionObjectPipeline::Impl::SetInputOutputBuffer(const ArgInfo &in,
297 const ArgInfo &out)
298 {
299 delete iobufs_m.front();
300 delete iobufs_m.back();
301 iobufs_m.front() = new IODeviceArgInfo(in);
302 iobufs_m.back() = new IODeviceArgInfo(out);
303 }
305 bool ExecutionObjectPipeline::Impl::RunAsyncStart()
306 {
307 has_work_m = true;
308 is_processed_m = false;
309 device_time_m = 0.0f;
310 host_time_m = 0.0f;
311 curr_eo_idx_m = 0;
312 eos_m[0]->AcquireContext(curr_eo_context_idx_m);
313 if (tidl::internal::NUM_CONTEXTS == 1)
314 start_m = std::chrono::steady_clock::now();
315 eos_m[0]->SetFrameIndex(frame_idx_m);
316 eos_m[0]->SetInputOutputBuffer(iobufs_m[0], iobufs_m[1],
317 curr_eo_context_idx_m);
318 return eos_m[0]->ProcessFrameStartAsync(curr_eo_context_idx_m);
319 }
321 // returns true if we have more EOs to execute
322 bool ExecutionObjectPipeline::Impl::RunAsyncNext()
323 {
324 eos_m[curr_eo_idx_m]->ProcessFrameWait(curr_eo_context_idx_m);
325 // need to capture EO's device/host time before we release its lock
326 eo_device_time_m[curr_eo_idx_m] = eos_m[curr_eo_idx_m]->
327 GetProcessTimeInMilliSeconds(curr_eo_context_idx_m);
328 eo_host_time_m[curr_eo_idx_m] = eos_m[curr_eo_idx_m]->
329 GetHostProcessTimeInMilliSeconds(curr_eo_context_idx_m);
330 device_time_m += eo_device_time_m[curr_eo_idx_m];
331 if (tidl::internal::NUM_CONTEXTS > 1)
332 host_time_m += eo_host_time_m[curr_eo_idx_m];
333 eos_m[curr_eo_idx_m]->ReleaseContext(curr_eo_context_idx_m);
334 curr_eo_idx_m += 1;
335 if (curr_eo_idx_m < eos_m.size())
336 {
337 eos_m[curr_eo_idx_m]->AcquireContext(curr_eo_context_idx_m);
338 eos_m[curr_eo_idx_m]->SetFrameIndex(frame_idx_m);
339 eos_m[curr_eo_idx_m]->SetInputOutputBuffer(iobufs_m[curr_eo_idx_m],
340 iobufs_m[curr_eo_idx_m+1], curr_eo_context_idx_m);
341 eos_m[curr_eo_idx_m]->ProcessFrameStartAsync(curr_eo_context_idx_m);
342 return true;
343 }
344 else
345 {
346 if (tidl::internal::NUM_CONTEXTS == 1)
347 {
348 std::chrono::duration<float> elapsed =
349 std::chrono::steady_clock::now() - start_m;
350 host_time_m = elapsed.count() * 1000; // seconds to milliseconds
351 }
352 is_processed_m = true;
353 cv_m.notify_all();
354 return false;
355 }
356 }
358 bool ExecutionObjectPipeline::Impl::Wait()
359 {
360 if (! has_work_m) return false;
362 std::unique_lock<std::mutex> lock(mutex_m);
363 cv_m.wait(lock, [this]{ return this->is_processed_m; });
364 has_work_m = false;
365 return true;
366 }
368 void
369 ExecutionObjectPipeline::Impl::WriteLayerOutputsToFile(
370 const std::string& filename_prefix) const
371 {
372 for (auto eo : eos_m)
373 eo->WriteLayerOutputsToFile(filename_prefix);
374 }
376 const LayerOutput*
377 ExecutionObjectPipeline::Impl::GetOutputFromLayer(uint32_t layer_index,
378 uint32_t output_index) const
379 {
380 const LayerOutput* lo = nullptr;
381 for (auto eo : eos_m)
382 {
383 lo = eo->GetOutputFromLayer(layer_index, output_index);
384 if (lo != nullptr) break;
385 }
386 return lo;
387 }
389 const LayerOutputs*
390 ExecutionObjectPipeline::Impl::GetOutputsFromAllLayers() const
391 {
392 LayerOutputs *all = new LayerOutputs;
393 for (auto eo : eos_m)
394 {
395 LayerOutputs *los = const_cast<LayerOutputs *>(
396 eo->GetOutputsFromAllLayers());
397 for (auto& lo : *los)
398 all->push_back(std::unique_ptr<const LayerOutput>{ lo.release() });
399 delete los;
400 }
401 return all;
402 }