summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 06d3f5e)
raw | patch | inline | side by side (parent: 06d3f5e)
author | Yuan Zhao <yuanzhao@ti.com> | |
Wed, 23 Oct 2019 19:21:09 +0000 (14:21 -0500) | ||
committer | Yuan Zhao <yuanzhao@ti.com> | |
Mon, 28 Oct 2019 20:10:07 +0000 (15:10 -0500) |
- ResM class provides top level encapsulation
- All allocation of core resources and buffers, and all creation of
Executor, ExecutionObject, ExecutionObjectPipeline are encapsulated.
- Auto-partition last few layers to DSP if profitable, also encapsulated.
- MCT-1223, MCT-1224
- All allocation of core resources and buffers, and all creation of
Executor, ExecutionObject, ExecutionObjectPipeline are encapsulated.
- Auto-partition last few layers to DSP if profitable, also encapsulated.
- MCT-1223, MCT-1224
tidl_api/Makefile | patch | blob | history | |
tidl_api/inc/subgraph_runtime.h | [new file with mode: 0644] | patch | blob |
tidl_api/src/subgraph_data_conv.h | [new file with mode: 0644] | patch | blob |
tidl_api/src/subgraph_runtime.cpp | [new file with mode: 0644] | patch | blob |
diff --git a/tidl_api/Makefile b/tidl_api/Makefile
index 8da13e482cc408f2f852506a2c98bd8c5131cd27..988cdc94f7c6706a1057612a3c1e5edb20c61f2b 100644 (file)
--- a/tidl_api/Makefile
+++ b/tidl_api/Makefile
SRCS = ocl_device.cpp configuration_parser.cpp configuration.cpp\
executor.cpp execution_object.cpp trace.cpp util.cpp \
- execution_object_pipeline.cpp
+ execution_object_pipeline.cpp \
+ subgraph_runtime.cpp
SRCS_IMGUTIL = imgutil.cpp
SRCS_PYBIND = pybind_eo.cpp pybind_eop.cpp pybind_executor.cpp \
pybind_configuration.cpp pybind_helpers.cpp
diff --git a/tidl_api/inc/subgraph_runtime.h b/tidl_api/inc/subgraph_runtime.h
--- /dev/null
@@ -0,0 +1,140 @@
+/******************************************************************************
+ * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+//! @file subgraph_runtime.h
+
+#pragma once
+#include <vector>
+#include <mutex>
+#include <condition_variable>
+#include "execution_object_pipeline.h"
+
+
+namespace tidl {
+
+#if 0
+// Auto-generated code from Relay/TVM compilation step after
+// partitioning and lowering to backend implementation
+
+// TODO: need to figure out exact arguments and format
+extern void tidl::RunSubgraphImpl(int subgraph_id,
+ const std::vector<float*>&,
+ const std::vector<float*>&);
+
+void tidlRunSubgraph(int subgraph_id,
+ int num_input_tensors, int num_output_tensors,
+ PackedArgs args)
+{
+ std::vector<float *> in_data, out_data;
+
+ for (int i = 0; i < num_input_tensors + num_output_tensors; i++)
+ if (i < num_input_tensors)
+ in_data.push_back(args.data[i]);
+ else
+ out_data.push_back(args.data[i]);
+
+ tidl::RunSubgraphImpl(subgraph_id, in_data, out_data);
+}
+#endif
+
+
+#if 0
+// user application code
+// subgraph_id will be used to find TIDL config file
+// e.g. subgraph_1.cfg, subgraph_2.cfg, etc
+void RunSubgraphImpl(int subgraph_id,
+ int total_num_subgraphs,
+ const std::vector<float*>& ext_in_data,
+ const std::vector<float*>& ext_out_data)
+{
+ ResM& res = ResM::Instance(total_num_subgraphs);
+ const ExecutionObjectPipeline& eop = res.GetEOP(subgraph_id);
+ const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id);
+ const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
+
+ in_data = eop.GetInputBufferPtr();
+ in_conv.ScaleQuant(ext_in_data, in_data);
+ eop.ProcessFrameStartAsync();
+ eop.ProcessFrameWait();
+ out_data = eop.GetOutputBufferPtr();
+ out_conv.ScaleDeQuant(out_data, ext_out_data);
+ res.FreeEOP(subgraph_id, eop);
+}
+#endif
+
+
+// Singleton ResM .h file
+// Resource manager for available EVE and DSP devices,
+// - Allocates EVEs and DSPs
+// - Constructs Executors (tidl_setup) and ExecutionObjects (tid_init)
+// - Creates set of ExecutionPipelines (with or without DSP)
+// - Allocating EOP on demand (acquire and free semantics)
+// - Allocates input/output buffers
+class ResM {
+ public:
+ ResM();
+ ~ResM();
+ static ResM& Instance(uint32_t total_num_subgraphs = 1);
+
+ // how to ge
+ ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id);
+ void FreeEOP(uint32_t subgraph_id,
+ ExecutionObjectPipeline* eop);
+ Configuration& GetConfiguration(uint32_t subgraph_id);
+ //const SubgraphDataConv& GetInConv(uint32_t subgraph_id);
+ //const SubgraphDataConv& GetOutConv(uint32_t subgraph_id);
+
+
+ private:
+ void Init(uint32_t num_subgraphs);
+
+ bool enable_trace_m;
+ uint32_t num_subgraphs_m;
+ uint32_t num_es_per_subgraph_m;
+ uint32_t num_eves_m;
+ uint32_t num_dsps_m;
+ uint32_t num_lg2_dsps_used_m; // in partitioned execution case
+ std::mutex mutex_init_m;
+
+ // indexed by subgraph_id for resources
+ struct ResEOP {
+ ResEOP() : free_eop_index(0), is_used(), eops(nullptr) {}
+
+ uint32_t free_eop_index;
+ std::mutex mutex_eops;
+ std::condition_variable cv_eops;
+ std::vector<bool> is_used;
+ std::vector<ExecutionObjectPipeline*>* eops;
+ };
+ std::vector<Configuration> cs_m;
+ std::vector<Executor*> es_m;
+ std::vector<Executor*> e2s_m;
+ std::vector<ResEOP> *eops_m;
+};
+
+} // namespace tidl
diff --git a/tidl_api/src/subgraph_data_conv.h b/tidl_api/src/subgraph_data_conv.h
--- /dev/null
@@ -0,0 +1,128 @@
+/******************************************************************************
+ * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+namespace tidl {
+
+/* @class SubgraphDataConv
+ @brief Handles data conversion at subgraph boundaries
+ At calibration time, consume either external input or external
+ output tensors, determine sign and scaling factor.
+ At inference time, use sign and scaling factor to perform data
+ conversion between TIDL tensors and external tensors
+
+ Example use for EstScaleQuant:
+ SubgraphDataConv conv({}, {}, {}, {1,3,64,64,1,3,28,28});
+ conv.EstScaleQuant(in);
+ WriteQuantizationParams(conv.GetIsSigned(), conv.getScaleQ());
+ conv.ScaleQuant(in, out);
+
+ Example use for EstScaleDequant:
+ SubgraphDataConv conv({}, {}, {}, {1,3,64,64,1,3,28,28});
+ conv.EstScaleDeQuant(out);
+ WriteDeQuantizationParams(conv.GetIsSigned(), conv.getScaleQ());
+
+ Example use for ScaleQuant:
+ // one time setup
+ ... Parse json file for is_signed, scaleQ, is_NCHW, dims ...
+ SubgraphDataConv conv(is_signed, scaleQ, is_NCHW, dims);
+
+ // per inference
+ out = eop.GetInputBufferPtr();
+ conv.ScaleQuant(in, out);
+ eop.ProcessFrameStartAsync();
+
+ Example use for ScaleDeQuant:
+ // one time setup
+ ... Parse json file for is_signed, scaleQ, is_NCHW, dims ...
+ SubgraphDataConv conv(is_signed, scaleQ, is_NCHW, dims);
+
+ // per inference
+ eop.ProcessFrameWait();
+ in = eop.GetOutputBufferPtr();
+ conv.ScaleDeQuant(in, out);
+*/
+class SubgraphDataConv
+{
+ public:
+ //! @brief Creates a SubgraphDataConv.
+ //! @param None
+ SubgraphDataConv() {}
+
+ SubgraphDataConv(const vector<bool>& is_signed,
+ const vector<float>& scaleQ,
+ const vector<bool>& is_NCHW,
+ const vector<int>& dims
+ ) : is_signed_m(is_signed), scaleQ_m(scaleQ),
+ is_NCHW_m(is_NCHW), dims_m(dims)
+ {}
+
+ const std::vector<bool>& GetIsSigned() { return is_signed_m; }
+ const std::vector<float>& GetScaleQ() { return scaleQ_m; }
+ const std::vector<bool>& GetIsNCHW() { return is_NCHW_m; }
+
+ //! @brief Estimate parameters for Quantization
+ //! @param in vector of floating point external tensor data at input
+ void EstScaleQuant(const std::vector<float*>& in);
+
+ //! @brief Estimate paramters for DeQuantization
+ //! @param out vector of floating point external tensor data at output
+ void EstScaleDequant(const std::vector<float*>& out);
+
+ //! @brief Quantizes floating point {in} to 8-bit Quantized {out}
+ //! and transposes buffer from NHWC to NCHW format (if needed),
+ //! results are put into out pointer consecutively, as expected
+ //! by TIDL
+ //! @param in floating point vector input to quantize
+ //! @param out 8-bit Quantized output (quantized from in)
+ void ScaleQuant(const std::vector<float*>& in, uint8_t* out);
+
+ //! @brief De-Quantizes 8-bit Quantized {in} to floating point {out}
+ //! and transposes buffer from NCHW to NHWC format (if needed),
+ //! the results are put into out vector, one vector per
+ //! tensor, as expected by external tensors
+ //! @param in 8-bit Quantized input to De-Quantize
+ //! @param out floating point output (De-Quantized from in)
+ void ScaleDequant(const uint8_t *in, std::vector<float*>& out);
+
+ private:
+ //! if tensor needs to be evaluated as signed char
+ std::vector<bool> is_signed_m;
+
+ //! Q value for Quantization and Dequantization
+ std::vector<float> scaleQ_m;
+
+ //! the format of external tensors, NCHW or NHWC
+ //! if data needs to be transposed between TIDL NCHW tensors and
+ //! external tensors
+ std::vector<bool> is_NCHW_m;
+
+ //! flattened 4d dims of external tensors
+ std::vector<int> dims_m;
+}
+
+} // namespace tidl
diff --git a/tidl_api/src/subgraph_runtime.cpp b/tidl_api/src/subgraph_runtime.cpp
--- /dev/null
@@ -0,0 +1,321 @@
+/******************************************************************************
+ * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#include <pthread.h>
+#define LOKI_PTHREAD_H
+#include <loki/Singleton.h>
+
+#include "util.h"
+#include "subgraph_runtime.h"
+
+
+#if 0
+// Auto-generated code from Relay/TVM compilation step after
+// partitioning and lowering to backend implementation
+
+// TODO: need to figure out exact arguments and format
+extern void tidl::RunSubgraphImpl(int subgraph_id,
+ const std::vector<float*>&,
+ const std::vector<float*>&);
+
+void tidlRunSubgraph(int subgraph_id,
+ int num_input_tensors, int num_output_tensors,
+ PackedArgs args)
+{
+ std::vector<float *> in_data, out_data;
+
+ for (int i = 0; i < num_input_tensors + num_output_tensors; i++)
+ if (i < num_input_tensors)
+ in_data.push_back(args.data[i]);
+ else
+ out_data.push_back(args.data[i]);
+
+ tidl::RunSubgraphImpl(subgraph_id, in_data, out_data);
+}
+#endif
+
+
+#if 0
+// user application code
+// subgraph_id will be used to find TIDL config file
+// e.g. subgraph_1.cfg, subgraph_2.cfg, etc
+void RunSubgraphImpl(int subgraph_id,
+ int total_num_subgraphs,
+ const std::vector<float*>& ext_in_data,
+ const std::vector<float*>& ext_out_data)
+{
+ ResM& res = ResM::Instance(total_num_subgraphs);
+ const ExecutionObjectPipeline& eop = res.GetEOP(subgraph_id);
+ const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id);
+ const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
+
+ in_data = eop.GetInputBufferPtr();
+ in_conv.ScaleQuant(ext_in_data, in_data);
+ eop.ProcessFrameStartAsync();
+ eop.ProcessFrameWait();
+ out_data = eop.GetOutputBufferPtr();
+ out_conv.ScaleDeQuant(out_data, ext_out_data);
+ res.FreeEOP(subgraph_id, eop);
+}
+#endif
+
+
+
+// Singleton ResM .cpp
+using namespace tidl;
+
+typedef Loki::SingletonHolder <tidl::ResM, Loki::CreateUsingNew,
+Loki::DefaultLifetime, Loki::ClassLevelLockable> tidlSingleResM;
+
+ResM::ResM() : enable_trace_m(false), num_subgraphs_m(0),
+ num_lg2_dsps_used_m(0), eops_m(nullptr)
+{
+}
+
+ResM::~ResM()
+{
+ if (eops_m != nullptr)
+ {
+ for (const ResEOP& res_eop : *eops_m)
+ {
+ if (res_eop.eops != nullptr)
+ {
+ for (const ExecutionObjectPipeline* eop : *(res_eop.eops))
+ {
+ free(eop->GetInputBufferPtr());
+ free(eop->GetOutputBufferPtr());
+ delete eop;
+ }
+ }
+ }
+ delete eops_m;
+ eops_m = nullptr;
+ }
+
+ for (const Executor* e : es_m)
+ if (e != nullptr) delete e;
+ for (const Executor* e : e2s_m)
+ if (e != nullptr) delete e;
+}
+
+ResM& ResM::Instance(uint32_t total_num_subgraphs)
+{
+ ResM& res = tidlSingleResM::Instance();
+ res.Init(total_num_subgraphs);
+ return res;
+}
+
+void ResM::Init(uint32_t num_subgraphs)
+{
+ std::lock_guard<std::mutex> lock(mutex_init_m);
+
+ if (num_subgraphs_m == 0)
+ {
+ num_subgraphs_m = num_subgraphs;
+
+ if (getenv("TIDL_SUBGRAPH_TRACE") != nullptr) enable_trace_m = true;
+
+ // Allocating resources
+ num_eves_m = Executor::GetNumDevices(DeviceType::EVE);
+ num_eves_m = 1; // TODO: to remove after debugging
+ num_dsps_m = Executor::GetNumDevices(DeviceType::DSP);
+
+ assert(num_eves_m > 0 || num_dsps_m > 0);
+ assert(num_subgraphs_m <= num_eves_m || num_subgraphs_m <= num_dsps_m);
+ num_es_per_subgraph_m = num_eves_m / num_subgraphs_m;
+ if (num_eves_m == 0)
+ num_es_per_subgraph_m = num_dsps_m / num_subgraphs_m;
+
+ cs_m.resize(num_subgraphs_m);
+ es_m.resize(num_subgraphs_m, nullptr);
+ e2s_m.resize(num_subgraphs_m, nullptr);
+ eops_m = new std::vector<ResEOP>(num_subgraphs_m);
+ }
+}
+
+ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
+{
+ assert(subgraph_id < num_subgraphs_m);
+ ResEOP& res_eop = (*eops_m)[subgraph_id];
+
+ std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
+
+ if (res_eop.eops == nullptr)
+ {
+ if (enable_trace_m)
+ printf("Subgraph %d: initialing E/EOPs with %d cores\n",
+ subgraph_id, num_es_per_subgraph_m);
+
+ // Constructing EOPs if not already constructed
+ // Each subgraph -> num_eves_per_subgraph_m EOPs
+ // Each EOP -> use_count
+ std::string cfg_file = "subgraph" + std::to_string(subgraph_id) + ".cfg";
+ bool status = cs_m[subgraph_id].ReadFromFile(cfg_file);
+ assert(status);
+
+ // Check if last few layers can be offloaded to DSPs
+ // and DSPs are available
+ DeviceIds e_ids, e2_ids;
+ for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
+ e_ids.insert(static_cast<DeviceId>(
+ subgraph_id * num_es_per_subgraph_m + i));
+ // uint32_t num_dsps_used = 0;
+ if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet)
+ {
+ sTIDL_Network_t *net = new sTIDL_Network_t;
+ bool status = ReadNetworkBinary(cs_m[subgraph_id].netBinFile,
+ reinterpret_cast<char *>(net));
+ assert(status);
+ int32_t start_layer = net->numLayers -1;
+ int32_t end_layer = 0;
+ if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
+ start_layer -= 1;
+ if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
+ end_layer += 1;
+ int32_t i = start_layer;
+ for ( ; i > end_layer; i--)
+ {
+ int32_t layer_type = net->TIDLLayers[i].layerType;
+ if (layer_type != (int32_t) TIDL_SoftMaxLayer &&
+ layer_type != (int32_t) TIDL_InnerProductLayer &&
+ layer_type != (int32_t) TIDL_PoolingLayer)
+ break;
+ }
+ i += 1;
+ if (i <= start_layer)
+ {
+ if (num_lg2_dsps_used_m < num_dsps_m)
+ {
+ if (enable_trace_m)
+ printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
+ subgraph_id, i, start_layer);
+ while (i <= start_layer)
+ cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
+ e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
+ num_lg2_dsps_used_m += 1;
+ }
+ }
+ delete net;
+ }
+
+ if (e2_ids.empty())
+ cs_m[subgraph_id].runFullNet = true;
+ cs_m[subgraph_id].enableApiTrace = enable_trace_m;
+
+ // Constructing Es and EOPs
+ res_eop.eops = new std::vector<ExecutionObjectPipeline*>;
+ uint32_t buffer_factor = 2; // double buffering factor
+ if (num_eves_m > 0)
+ {
+ es_m[subgraph_id] = new Executor(DeviceType::EVE, e_ids,
+ cs_m[subgraph_id], 1);
+ if (! e2_ids.empty())
+ {
+ e2s_m[subgraph_id] = new Executor(DeviceType::DSP, e2_ids,
+ cs_m[subgraph_id], 2);
+ for (uint32_t j = 0; j < buffer_factor; j++)
+ for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
+ res_eop.eops->emplace_back(new ExecutionObjectPipeline(
+ {(*es_m[subgraph_id])[i],
+ (*e2s_m[subgraph_id])[i % e2_ids.size()]}));
+ }
+ else
+ {
+ for (uint32_t j = 0; j < buffer_factor; j++)
+ for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
+ res_eop.eops->emplace_back(new ExecutionObjectPipeline(
+ {(*es_m[subgraph_id])[i]}));
+ }
+ }
+ else
+ {
+ es_m[subgraph_id] = new Executor(DeviceType::DSP, e_ids,
+ cs_m[subgraph_id], 1);
+ for (uint32_t j = 0; j < buffer_factor; j++)
+ for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
+ res_eop.eops->emplace_back(new ExecutionObjectPipeline(
+ {(*es_m[subgraph_id])[i]}));
+ }
+
+ if (enable_trace_m)
+ printf("Subgraph %d: Allocating input/output buffers for %d EOPs\n",
+ subgraph_id, res_eop.eops->size());
+ // Allocate input/output buffers
+ for (auto eop : *(res_eop.eops))
+ {
+ size_t in_size = eop->GetInputBufferSizeInBytes();
+ size_t out_size = eop->GetOutputBufferSizeInBytes();
+ void* in_ptr = malloc(in_size);
+ void* out_ptr = malloc(out_size);
+ assert(in_ptr != nullptr && out_ptr != nullptr);
+
+ ArgInfo in(in_ptr, in_size);
+ ArgInfo out(out_ptr, out_size);
+ eop->SetInputOutputBuffer(in, out);
+ }
+
+ res_eop.free_eop_index = 0;
+ res_eop.is_used.resize(res_eop.eops->size(), false);
+ }
+
+ // Return an available EOP (round robin allocation)
+ uint32_t curr_eop = res_eop.free_eop_index;
+ res_eop.cv_eops.wait(lock, [this, subgraph_id, curr_eop]{
+ return this->eops_m->at(subgraph_id).is_used[curr_eop] == false; });
+ res_eop.is_used[curr_eop] = true;
+ res_eop.free_eop_index = (curr_eop + 1) % res_eop.eops->size();
+ if (enable_trace_m)
+ printf("Subgraph %d: return EOP %d for GetEOP()\n", subgraph_id, curr_eop);
+ return res_eop.eops->at(curr_eop);
+}
+
+void ResM::FreeEOP(uint32_t subgraph_id, ExecutionObjectPipeline* eop)
+{
+ ResEOP& res_eop = (*eops_m)[subgraph_id];
+ {
+ std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
+ for (uint32_t i = 0; i < res_eop.is_used.size(); i++)
+ if (res_eop.eops->at(i) == eop)
+ {
+ res_eop.is_used[i] = false;
+ if (enable_trace_m)
+ printf("Subgraph %d: FreeEOP %d\n", subgraph_id, i);
+ break;
+ }
+ }
+ res_eop.cv_eops.notify_all();
+}
+
+Configuration& ResM::GetConfiguration(uint32_t subgraph_id)
+{
+ assert(subgraph_id < num_subgraphs_m);
+ assert((*eops_m)[subgraph_id].eops != nullptr);
+ return cs_m[subgraph_id];
+}
+
+