tidl_api/src/execution_object_pipeline.cpp

   1 /******************************************************************************
   2  * Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
   3  *  All rights reserved.
   4  *
   5  *  Redistribution and use in source and binary forms, with or without
   6  *  modification, are permitted provided that the following conditions are met:
   7  *      * Redistributions of source code must retain the above copyright
   8  *        notice, this list of conditions and the following disclaimer.
   9  *      * Redistributions in binary form must reproduce the above copyright
  10  *        notice, this list of conditions and the following disclaimer in the
  11  *        documentation and/or other materials provided with the distribution.
  12  *      * Neither the name of Texas Instruments Incorporated nor the
  13  *        names of its contributors may be used to endorse or promote products
  14  *        derived from this software without specific prior written permission.
  15  *
  16  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17  *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  *  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20  *  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21  *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22  *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23  *  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24  *  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25  *  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  26  *  THE POSSIBILITY OF SUCH DAMAGE.
  27  *****************************************************************************/
  28
  29 #include <assert.h>
  30 #include <mutex>
  31 #include <condition_variable>
  32 #include <chrono>
  33 #include "device_arginfo.h"
  34 #include "execution_object_pipeline.h"
  35
  36 using namespace tidl;
  37
  38 class ExecutionObjectPipeline::Impl
  39 {
  40     public:
  41         Impl(std::vector<ExecutionObject*> &eos);
  42         ~Impl();
  43
  44         void SetInputOutputBuffer(const ArgInfo &in, const ArgInfo &out);
  45         bool RunAsyncStart();
  46         bool RunAsyncNext();
  47         bool Wait();
  48
  49         // Trace related
  50         void WriteLayerOutputsToFile(const std::string& filename_prefix) const;
  51         const LayerOutput* GetOutputFromLayer(uint32_t layer_index,
  52                                               uint32_t output_index) const;
  53         const LayerOutputs* GetOutputsFromAllLayers() const;
  54
  55         //! for pipelined execution
  56         std::vector<ExecutionObject*> eos_m;
  57         std::vector<IODeviceArgInfo*> iobufs_m;
  58
  59         std::string device_name_m;
  60
  61         //! current frame index
  62         int frame_idx_m;
  63
  64         //! current execution object index
  65         uint32_t curr_eo_idx_m;
  66
  67         // host time tracking: pipeline start to finish
  68         float host_time_m;
  69
  70     private:
  71         //! @brief Initialize ExecutionObjectPipeline with given
  72         //! ExecutionObjects: check consecutive layersGroup, allocate memory
  73         void Initialize();
  74
  75         // flag, mutex and cond var for signaling completion and waiting
  76         bool has_work_m, is_processed_m;
  77         std::mutex mutex_m;
  78         std::condition_variable cv_m;
  79
  80         // host time tracking: pipeline start to finish
  81         std::chrono::time_point<std::chrono::steady_clock> start_m;
  82 };
  83
  84 ExecutionObjectPipeline::ExecutionObjectPipeline(
  85     std::vector<ExecutionObject*> eos)
  86 {
  87     pimpl_m = std::unique_ptr<Impl> { new Impl(eos) };
  88 }
  89
  90 ExecutionObjectPipeline::Impl::Impl(std::vector<ExecutionObject *> &eos) :
  91     eos_m(eos), has_work_m(false), is_processed_m(false)
  92 {
  93     Initialize();
  94 }
  95
  96 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
  97 // Both unique_ptr and shared_ptr can be instantiated with an incomplete type
  98 // unique_ptr's destructor requires a complete type in order to invoke delete
  99 ExecutionObjectPipeline::~ExecutionObjectPipeline() = default;
 100
 101 char* ExecutionObjectPipeline::GetInputBufferPtr() const
 102 {
 103     return static_cast<char *>(pimpl_m->iobufs_m.front()->GetArg().ptr());
 104 }
 105
 106 size_t ExecutionObjectPipeline::GetInputBufferSizeInBytes() const
 107 {
 108     return pimpl_m->eos_m.front()->GetInputBufferSizeInBytes();
 109 }
 110
 111 char* ExecutionObjectPipeline::GetOutputBufferPtr() const
 112 {
 113     return static_cast<char *>(pimpl_m->iobufs_m.back()->GetArg().ptr());
 114 }
 115
 116 size_t ExecutionObjectPipeline::GetOutputBufferSizeInBytes() const
 117 {
 118     return pimpl_m->eos_m.back()->GetOutputBufferSizeInBytes();
 119 }
 120
 121 void ExecutionObjectPipeline::SetInputOutputBuffer(const ArgInfo& in,
 122                                                    const ArgInfo& out)
 123 {
 124     assert(in.ptr() != nullptr  && in.size() >= GetInputBufferSizeInBytes());
 125     assert(out.ptr() != nullptr && out.size() >= GetOutputBufferSizeInBytes());
 126     pimpl_m->SetInputOutputBuffer(in, out);
 127 }
 128
 129 void ExecutionObjectPipeline::SetFrameIndex(int idx)
 130 {
 131     pimpl_m->frame_idx_m = idx;
 132 }
 133
 134 int ExecutionObjectPipeline::GetFrameIndex() const
 135 {
 136     return pimpl_m->frame_idx_m;
 137 }
 138
 139 bool ExecutionObjectPipeline::ProcessFrameStartAsync()
 140 {
 141     assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
 142     bool st = pimpl_m->RunAsyncStart();
 143     if (st)
 144         st = pimpl_m->eos_m[0]->AddCallback(ExecutionObject::CallType::PROCESS,
 145                                             this);
 146     return st;
 147 }
 148
 149 bool ExecutionObjectPipeline::ProcessFrameWait()
 150 {
 151     return pimpl_m->Wait();
 152 }
 153
 154 void CallbackWrapper(void *user_data)
 155 {
 156     ((ExecutionObjectPipeline *) user_data)->RunAsyncNext();
 157 }
 158
 159 void ExecutionObjectPipeline::RunAsyncNext()
 160 {
 161     bool has_next = pimpl_m->RunAsyncNext();
 162     if (has_next)
 163         pimpl_m->eos_m[pimpl_m->curr_eo_idx_m]->AddCallback(
 164                                      ExecutionObject::CallType::PROCESS, this);
 165 }
 166
 167 float ExecutionObjectPipeline::GetProcessTimeInMilliSeconds() const
 168 {
 169     float total = 0.0f;
 170     for (auto eo : pimpl_m->eos_m)
 171         total += eo->GetProcessTimeInMilliSeconds();
 172     return total;
 173 }
 174
 175 float ExecutionObjectPipeline::GetHostProcessTimeInMilliSeconds() const
 176 {
 177     return pimpl_m->host_time_m;
 178 }
 179
 180 const std::string& ExecutionObjectPipeline::GetDeviceName() const
 181 {
 182     return pimpl_m->device_name_m;
 183 }
 184
 185 void
 186 ExecutionObjectPipeline::WriteLayerOutputsToFile(
 187     const std::string& filename_prefix) const
 188 {
 189     pimpl_m->WriteLayerOutputsToFile(filename_prefix);
 190 }
 191
 192 const LayerOutput*
 193 ExecutionObjectPipeline::GetOutputFromLayer(uint32_t layer_index,
 194     uint32_t output_index) const
 195 {
 196     return pimpl_m->GetOutputFromLayer(layer_index, output_index);
 197 }
 198
 199 const LayerOutputs*
 200 ExecutionObjectPipeline::GetOutputsFromAllLayers() const
 201 {
 202     return pimpl_m->GetOutputsFromAllLayers();
 203 }
 204
 205
 206 /// Impl methods start here
 207
 208
 209 static
 210 void* AllocateMem(size_t size)
 211 {
 212     if (size == 0)  return nullptr;
 213     void *ptr = malloc(size);
 214     if (ptr == nullptr)
 215         throw Exception("Out of memory, ExecutionObjectPipeline malloc failed",
 216                         __FILE__, __FUNCTION__, __LINE__);
 217     return ptr;
 218 }
 219
 220 void ExecutionObjectPipeline::Impl::Initialize()
 221 {
 222     // Check consecutive layersGroups to form a pipeline
 223     int prev_group = 0;
 224     for (auto eo : eos_m)
 225     {
 226         int group = eo->GetLayersGroupId();
 227         if (prev_group != 0 && group != prev_group + 1)
 228             throw Exception(
 229                 "Non-consecutive layersGroupIds in ExecutionObjectPipeline",
 230                 __FILE__, __FUNCTION__, __LINE__);
 231         prev_group = group;
 232     }
 233
 234     for (auto eo : eos_m)
 235         device_name_m += eo->GetDeviceName() + "+";
 236     device_name_m.resize(device_name_m.size() - 1);
 237
 238     // Allocate input and output memory for EOs/layersGroups
 239     // Note that i-th EO's output buffer is the same as (i+1)-th EO's input
 240     // So, if n EOs, then (n+1) buffers: b EO b EO b EO b ... EO b
 241     // User must set the first input buffer and the last output buffer
 242     size_t size;
 243     ArgInfo in(nullptr, 0);
 244     iobufs_m.push_back(new IODeviceArgInfo(in));
 245     for (auto eo : eos_m)
 246     {
 247         if (eo != eos_m.back())
 248             size = eo->GetOutputBufferSizeInBytes();
 249         else
 250             size = 0;
 251
 252         void *ptr = AllocateMem(size);
 253         ArgInfo out(ptr, size);
 254         iobufs_m.push_back(new IODeviceArgInfo(out));
 255     }
 256 }
 257
 258 ExecutionObjectPipeline::Impl::~Impl()
 259 {
 260     int num_iobufs = iobufs_m.size();
 261     for (int i = 0; i < num_iobufs; i++)
 262     {
 263         if (! (i == 0 || i == num_iobufs-1))
 264             free(iobufs_m[i]->GetArg().ptr());
 265         delete iobufs_m[i];
 266     }
 267 }
 268
 269 void ExecutionObjectPipeline::Impl::SetInputOutputBuffer(const ArgInfo &in,
 270                                                          const ArgInfo &out)
 271 {
 272     delete iobufs_m.front();
 273     delete iobufs_m.back();
 274     iobufs_m.front() = new IODeviceArgInfo(in);
 275     iobufs_m.back()  = new IODeviceArgInfo(out);
 276 }
 277
 278 bool ExecutionObjectPipeline::Impl::RunAsyncStart()
 279 {
 280     start_m = std::chrono::steady_clock::now();
 281     has_work_m = true;
 282     is_processed_m = false;
 283     host_time_m = 0.0f;
 284     curr_eo_idx_m = 0;
 285     eos_m[0]->AcquireLock();
 286     eos_m[0]->SetInputOutputBuffer(iobufs_m[0], iobufs_m[1]);
 287     return eos_m[0]->ProcessFrameStartAsync();
 288 }
 289
 290 // returns true if we have more EOs to execute
 291 bool ExecutionObjectPipeline::Impl::RunAsyncNext()
 292 {
 293     eos_m[curr_eo_idx_m]->ProcessFrameWait();
 294     eos_m[curr_eo_idx_m]->ReleaseLock();
 295     curr_eo_idx_m += 1;
 296     if (curr_eo_idx_m < eos_m.size())
 297     {
 298         eos_m[curr_eo_idx_m]->AcquireLock();
 299         eos_m[curr_eo_idx_m]->SetInputOutputBuffer(iobufs_m[curr_eo_idx_m],
 300                                                    iobufs_m[curr_eo_idx_m+1]);
 301         eos_m[curr_eo_idx_m]->ProcessFrameStartAsync();
 302         return true;
 303     }
 304     else
 305     {
 306         std::chrono::duration<float> elapsed = std::chrono::steady_clock::now()
 307                                                - start_m;
 308         host_time_m = elapsed.count() * 1000;  // seconds to milliseconds
 309         is_processed_m = true;
 310         cv_m.notify_all();
 311         return false;
 312     }
 313 }
 314
 315 bool ExecutionObjectPipeline::Impl::Wait()
 316 {
 317     if (! has_work_m)  return false;
 318
 319     std::unique_lock<std::mutex> lock(mutex_m);
 320     cv_m.wait(lock, [this]{ return this->is_processed_m; });
 321     has_work_m = false;
 322     return true;
 323 }
 324
 325 void
 326 ExecutionObjectPipeline::Impl::WriteLayerOutputsToFile(
 327     const std::string& filename_prefix) const
 328 {
 329     for (auto eo : eos_m)
 330         eo->WriteLayerOutputsToFile(filename_prefix);
 331 }
 332
 333 const LayerOutput*
 334 ExecutionObjectPipeline::Impl::GetOutputFromLayer(uint32_t layer_index,
 335     uint32_t output_index) const
 336 {
 337     const LayerOutput* lo = nullptr;
 338     for (auto eo : eos_m)
 339     {
 340         lo = eo->GetOutputFromLayer(layer_index, output_index);
 341         if (lo != nullptr)  break;
 342     }
 343     return lo;
 344 }
 345
 346 const LayerOutputs*
 347 ExecutionObjectPipeline::Impl::GetOutputsFromAllLayers() const
 348 {
 349     LayerOutputs *all = new LayerOutputs;
 350     for (auto eo : eos_m)
 351     {
 352         LayerOutputs *los = const_cast<LayerOutputs *>(
 353                                                 eo->GetOutputsFromAllLayers());
 354         for (auto& lo : *los)
 355             all->push_back(std::unique_ptr<const LayerOutput>{ lo.release() });
 356         delete los;
 357     }
 358     return all;
 359 }
 360