tidl_api/src/execution_object_pipeline.cpp

   1 /******************************************************************************
   2  * Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
   3  *  All rights reserved.
   4  *
   5  *  Redistribution and use in source and binary forms, with or without
   6  *  modification, are permitted provided that the following conditions are met:
   7  *      * Redistributions of source code must retain the above copyright
   8  *        notice, this list of conditions and the following disclaimer.
   9  *      * Redistributions in binary form must reproduce the above copyright
  10  *        notice, this list of conditions and the following disclaimer in the
  11  *        documentation and/or other materials provided with the distribution.
  12  *      * Neither the name of Texas Instruments Incorporated nor the
  13  *        names of its contributors may be used to endorse or promote products
  14  *        derived from this software without specific prior written permission.
  15  *
  16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  26  *  THE POSSIBILITY OF SUCH DAMAGE.
  27  *****************************************************************************/
  28
  29 #include <assert.h>
  30 #include <mutex>
  31 #include <condition_variable>
  32 #include "device_arginfo.h"
  33 #include "execution_object_pipeline.h"
  34 #include "parameters.h"
  35 #include "util.h"
  36
  37 using namespace tidl;
  38
  39 class ExecutionObjectPipeline::Impl
  40 {
  41     public:
  42         Impl(std::vector<ExecutionObject*> &eos);
  43         ~Impl();
  44
  45         void SetInputOutputBuffer(const ArgInfo &in, const ArgInfo &out);
  46         bool RunAsyncStart();
  47         bool RunAsyncNext();
  48         bool Wait();
  49
  50         // Trace related
  51         void WriteLayerOutputsToFile(const std::string& filename_prefix) const;
  52         const LayerOutput* GetOutputFromLayer(uint32_t layer_index,
  53                                               uint32_t output_index) const;
  54         const LayerOutputs* GetOutputsFromAllLayers() const;
  55
  56         //! for pipelined execution
  57         std::vector<ExecutionObject*> eos_m;
  58         std::vector<IODeviceArgInfo*> iobufs_m;
  59
  60         std::string device_name_m;
  61
  62         //! current frame index
  63         int frame_idx_m;
  64
  65         //! current execution object index, and it context index
  66         uint32_t curr_eo_idx_m;
  67         uint32_t curr_eo_context_idx_m;
  68
  69     private:
  70         //! @brief Initialize ExecutionObjectPipeline with given
  71         //! ExecutionObjects: check consecutive layersGroup, allocate memory
  72         void Initialize();
  73
  74         // flag, mutex and cond var for signaling completion and waiting
  75         bool has_work_m, is_processed_m;
  76         std::mutex mutex_m;
  77         std::condition_variable cv_m;
  78 };
  79
  80 ExecutionObjectPipeline::ExecutionObjectPipeline(
  81     std::vector<ExecutionObject*> eos)
  82 {
  83     pimpl_m = std::unique_ptr<Impl> { new Impl(eos) };
  84 }
  85
  86 ExecutionObjectPipeline::Impl::Impl(std::vector<ExecutionObject *> &eos) :
  87     eos_m(eos), has_work_m(false), is_processed_m(false)
  88 {
  89     Initialize();
  90 }
  91
  92 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
  93 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
  94 // unique_ptr's destructor requires a complete type in order to invoke delete
  95 ExecutionObjectPipeline::~ExecutionObjectPipeline() = default;
  96
  97 char* ExecutionObjectPipeline::GetInputBufferPtr() const
  98 {
  99     return static_cast<char *>(pimpl_m->iobufs_m.front()->GetArg().ptr());
 100 }
 101
 102 uint32_t ExecutionObjectPipeline::GetNumExecutionObjects() const
 103 {
 104     return pimpl_m->eos_m.size();
 105 }
 106
 107 size_t ExecutionObjectPipeline::GetInputBufferSizeInBytes() const
 108 {
 109     return pimpl_m->eos_m.front()->GetInputBufferSizeInBytes();
 110 }
 111
 112 char* ExecutionObjectPipeline::GetOutputBufferPtr() const
 113 {
 114     return static_cast<char *>(pimpl_m->iobufs_m.back()->GetArg().ptr());
 115 }
 116
 117 size_t ExecutionObjectPipeline::GetOutputBufferSizeInBytes() const
 118 {
 119     return pimpl_m->eos_m.back()->GetOutputBufferSizeInBytes();
 120 }
 121
 122 void ExecutionObjectPipeline::SetInputOutputBuffer(const ArgInfo& in,
 123                                                    const ArgInfo& out)
 124 {
 125     assert(in.ptr() != nullptr  && in.size() >= GetInputBufferSizeInBytes());
 126     assert(out.ptr() != nullptr && out.size() >= GetOutputBufferSizeInBytes());
 127     pimpl_m->SetInputOutputBuffer(in, out);
 128 }
 129
 130 void ExecutionObjectPipeline::SetFrameIndex(int idx)
 131 {
 132     pimpl_m->frame_idx_m = idx;
 133 }
 134
 135 int ExecutionObjectPipeline::GetFrameIndex() const
 136 {
 137     return pimpl_m->frame_idx_m;
 138 }
 139
 140 bool ExecutionObjectPipeline::ProcessFrameStartAsync()
 141 {
 142     RecordEvent(pimpl_m->frame_idx_m, TimeStamp::EOP_PFSA_START);
 143
 144     assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
 145     bool st = pimpl_m->RunAsyncStart();
 146     if (st)
 147         st = pimpl_m->eos_m[0]->AddCallback(ExecutionObject::CallType::PROCESS,
 148                                          this, pimpl_m->curr_eo_context_idx_m);
 149
 150     RecordEvent(pimpl_m->frame_idx_m, TimeStamp::EOP_PFSA_END);
 151     return st;
 152 }
 153
 154 bool ExecutionObjectPipeline::ProcessFrameWait()
 155 {
 156     return pimpl_m->Wait();
 157 }
 158
 159 void CallbackWrapper(void *user_data)
 160 {
 161     int frame_index = ((ExecutionObjectPipeline *) user_data)->GetFrameIndex();
 162     RecordEvent(frame_index, TimeStamp::EOP_RAN_START);
 163
 164     ((ExecutionObjectPipeline *) user_data)->RunAsyncNext();
 165
 166     RecordEvent(frame_index, TimeStamp::EOP_RAN_END);
 167 }
 168
 169 void ExecutionObjectPipeline::RunAsyncNext()
 170 {
 171     bool has_next = pimpl_m->RunAsyncNext();
 172     if (has_next)
 173         pimpl_m->eos_m[pimpl_m->curr_eo_idx_m]->AddCallback(
 174                                      ExecutionObject::CallType::PROCESS, this,
 175                                      pimpl_m->curr_eo_context_idx_m);
 176 }
 177
 178 const std::string& ExecutionObjectPipeline::GetDeviceName() const
 179 {
 180     return pimpl_m->device_name_m;
 181 }
 182
 183 void
 184 ExecutionObjectPipeline::WriteLayerOutputsToFile(
 185     const std::string& filename_prefix) const
 186 {
 187     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
 188 }
 189
 190 const LayerOutput*
 191 ExecutionObjectPipeline::GetOutputFromLayer(uint32_t layer_index,
 192     uint32_t output_index) const
 193 {
 194     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
 195 }
 196
 197 const LayerOutputs*
 198 ExecutionObjectPipeline::GetOutputsFromAllLayers() const
 199 {
 200     return pimpl_m->GetOutputsFromAllLayers();
 201 }
 202
 203
 204 /// Impl methods start here
 205
 206
 207 static
 208 void* AllocateMem(size_t size)
 209 {
 210     if (size == 0)  return nullptr;
 211     void *ptr = malloc(size);
 212     if (ptr == nullptr)
 213         throw Exception("Out of memory, ExecutionObjectPipeline malloc failed",
 214                         __FILE__, __FUNCTION__, __LINE__);
 215     return ptr;
 216 }
 217
 218 void ExecutionObjectPipeline::Impl::Initialize()
 219 {
 220     // Check consecutive layersGroups to form a pipeline
 221     int prev_group = 0;
 222     for (auto eo : eos_m)
 223     {
 224         int group = eo->GetLayersGroupId();
 225         if (prev_group != 0 && group != prev_group + 1)
 226             throw Exception(
 227                 "Non-consecutive layersGroupIds in ExecutionObjectPipeline",
 228                 __FILE__, __FUNCTION__, __LINE__);
 229         prev_group = group;
 230     }
 231
 232     for (auto eo : eos_m)
 233         device_name_m += eo->GetDeviceName() + "+";
 234     device_name_m.resize(device_name_m.size() - 1);
 235
 236     // Allocate input and output memory for EOs/layersGroups
 237     // Note that i-th EO's output buffer is the same as (i+1)-th EO's input
 238     // So, if n EOs, then (n+1) buffers: b EO b EO b EO b ... EO b
 239     // User must set the first input buffer and the last output buffer
 240     size_t size;
 241     ArgInfo in(nullptr, 0);
 242     iobufs_m.push_back(new IODeviceArgInfo(in));
 243     for (auto eo : eos_m)
 244     {
 245         if (eo != eos_m.back())
 246             size = eo->GetOutputBufferSizeInBytes();
 247         else
 248             size = 0;
 249
 250         void *ptr = AllocateMem(size);
 251         ArgInfo out(ptr, size);
 252         iobufs_m.push_back(new IODeviceArgInfo(out));
 253     }
 254 }
 255
 256 ExecutionObjectPipeline::Impl::~Impl()
 257 {
 258     int num_iobufs = iobufs_m.size();
 259     for (int i = 0; i < num_iobufs; i++)
 260     {
 261         if (! (i == 0 || i == num_iobufs-1))
 262             free(iobufs_m[i]->GetArg().ptr());
 263         delete iobufs_m[i];
 264     }
 265 }
 266
 267 void ExecutionObjectPipeline::Impl::SetInputOutputBuffer(const ArgInfo &in,
 268                                                          const ArgInfo &out)
 269 {
 270     delete iobufs_m.front();
 271     delete iobufs_m.back();
 272     iobufs_m.front() = new IODeviceArgInfo(in);
 273     iobufs_m.back()  = new IODeviceArgInfo(out);
 274 }
 275
 276 // Start execution on the first EO in the pipeline. Callbacks are used
 277 // to trigger execution on subsequent EOs
 278 bool ExecutionObjectPipeline::Impl::RunAsyncStart()
 279 {
 280     has_work_m = true;
 281     is_processed_m = false;
 282     curr_eo_idx_m = 0;
 283     return eos_m[0]->AcquireAndRunContext(curr_eo_context_idx_m,
 284                                           frame_idx_m,
 285                                           *iobufs_m[0], *iobufs_m[1]);
 286 }
 287
 288 // Invoked via the callback function, CallbackWrapper. Used to advance the
 289 // pipeline.
 290 // returns true if we have more EOs to execute
 291 bool ExecutionObjectPipeline::Impl::RunAsyncNext()
 292 {
 293     eos_m[curr_eo_idx_m]->WaitAndReleaseContext(curr_eo_context_idx_m);
 294     curr_eo_idx_m += 1;
 295     if (curr_eo_idx_m < eos_m.size())
 296     {
 297         eos_m[curr_eo_idx_m]->AcquireAndRunContext(curr_eo_context_idx_m,
 298                                                    frame_idx_m,
 299                                                    *iobufs_m[curr_eo_idx_m],
 300                                                    *iobufs_m[curr_eo_idx_m+1]);
 301         return true;
 302     }
 303     else
 304     {
 305         {
 306             std::lock_guard<std::mutex> lock(mutex_m);
 307             is_processed_m = true;
 308         }
 309         cv_m.notify_all();
 310         return false;
 311     }
 312 }
 313
 314 bool ExecutionObjectPipeline::Impl::Wait()
 315 {
 316     if (! has_work_m)  return false;
 317
 318     RecordEvent(frame_idx_m, TimeStamp::EOP_PFW_START);
 319
 320     std::unique_lock<std::mutex> lock(mutex_m);
 321     cv_m.wait(lock, [this]{ return this->is_processed_m; });
 322     has_work_m = false;
 323
 324     RecordEvent(frame_idx_m, TimeStamp::EOP_PFW_END);
 325
 326     return true;
 327 }
 328
 329 void
 330 ExecutionObjectPipeline::Impl::WriteLayerOutputsToFile(
 331     const std::string& filename_prefix) const
 332 {
 333     for (auto eo : eos_m)
 334         eo->WriteLayerOutputsToFile(filename_prefix);
 335 }
 336
 337 const LayerOutput*
 338 ExecutionObjectPipeline::Impl::GetOutputFromLayer(uint32_t layer_index,
 339     uint32_t output_index) const
 340 {
 341     const LayerOutput* lo = nullptr;
 342     for (auto eo : eos_m)
 343     {
 344         lo = eo->GetOutputFromLayer(layer_index, output_index);
 345         if (lo != nullptr)  break;
 346     }
 347     return lo;
 348 }
 349
 350 const LayerOutputs*
 351 ExecutionObjectPipeline::Impl::GetOutputsFromAllLayers() const
 352 {
 353     LayerOutputs *all = new LayerOutputs;
 354     for (auto eo : eos_m)
 355     {
 356         LayerOutputs *los = const_cast<LayerOutputs *>(
 357                                                 eo->GetOutputsFromAllLayers());
 358         for (auto& lo : *los)
 359             all->push_back(std::unique_ptr<const LayerOutput>{ lo.release() });
 360         delete los;
 361     }
 362     return all;
 363 }
 364