aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuan Zhao2019-10-23 14:21:09 -0500
committerYuan Zhao2019-10-28 15:10:07 -0500
commit34d236d1b2870e973ab045063af8980d779ec49a (patch)
tree8af39b29a49908393e2886ec082471a71e3e2217
parent06d3f5efb33ad9a06ff57a1cbd2bd943a0f4cfc2 (diff)
downloadtidl-api-34d236d1b2870e973ab045063af8980d779ec49a.tar.gz
tidl-api-34d236d1b2870e973ab045063af8980d779ec49a.tar.xz
tidl-api-34d236d1b2870e973ab045063af8980d779ec49a.zip
Subgraph offloading to TIDL: first commit
- ResM class provides top level encapsulation - All allocation of core resources and buffers, and all creation of Executor, ExecutionObject, ExecutionObjectPipeline are encapsulated. - Auto-partition last few layers to DSP if profitable, also encapsulated. - MCT-1223, MCT-1224
-rw-r--r--tidl_api/Makefile3
-rw-r--r--tidl_api/inc/subgraph_runtime.h140
-rw-r--r--tidl_api/src/subgraph_data_conv.h128
-rw-r--r--tidl_api/src/subgraph_runtime.cpp321
4 files changed, 591 insertions, 1 deletions
diff --git a/tidl_api/Makefile b/tidl_api/Makefile
index 8da13e4..988cdc9 100644
--- a/tidl_api/Makefile
+++ b/tidl_api/Makefile
@@ -40,7 +40,8 @@ AR = ar
40 40
41SRCS = ocl_device.cpp configuration_parser.cpp configuration.cpp\ 41SRCS = ocl_device.cpp configuration_parser.cpp configuration.cpp\
42 executor.cpp execution_object.cpp trace.cpp util.cpp \ 42 executor.cpp execution_object.cpp trace.cpp util.cpp \
43 execution_object_pipeline.cpp 43 execution_object_pipeline.cpp \
44 subgraph_runtime.cpp
44SRCS_IMGUTIL = imgutil.cpp 45SRCS_IMGUTIL = imgutil.cpp
45SRCS_PYBIND = pybind_eo.cpp pybind_eop.cpp pybind_executor.cpp \ 46SRCS_PYBIND = pybind_eo.cpp pybind_eop.cpp pybind_executor.cpp \
46 pybind_configuration.cpp pybind_helpers.cpp 47 pybind_configuration.cpp pybind_helpers.cpp
diff --git a/tidl_api/inc/subgraph_runtime.h b/tidl_api/inc/subgraph_runtime.h
new file mode 100644
index 0000000..09cf970
--- /dev/null
+++ b/tidl_api/inc/subgraph_runtime.h
@@ -0,0 +1,140 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29//! @file subgraph_runtime.h
30
31#pragma once
32#include <vector>
33#include <mutex>
34#include <condition_variable>
35#include "execution_object_pipeline.h"
36
37
38namespace tidl {
39
40#if 0
41// Auto-generated code from Relay/TVM compilation step after
42// partitioning and lowering to backend implementation
43
44// TODO: need to figure out exact arguments and format
45extern void tidl::RunSubgraphImpl(int subgraph_id,
46 const std::vector<float*>&,
47 const std::vector<float*>&);
48
49void tidlRunSubgraph(int subgraph_id,
50 int num_input_tensors, int num_output_tensors,
51 PackedArgs args)
52{
53 std::vector<float *> in_data, out_data;
54
55 for (int i = 0; i < num_input_tensors + num_output_tensors; i++)
56 if (i < num_input_tensors)
57 in_data.push_back(args.data[i]);
58 else
59 out_data.push_back(args.data[i]);
60
61 tidl::RunSubgraphImpl(subgraph_id, in_data, out_data);
62}
63#endif
64
65
66#if 0
67// user application code
68// subgraph_id will be used to find TIDL config file
69// e.g. subgraph_1.cfg, subgraph_2.cfg, etc
70void RunSubgraphImpl(int subgraph_id,
71 int total_num_subgraphs,
72 const std::vector<float*>& ext_in_data,
73 const std::vector<float*>& ext_out_data)
74{
75 ResM& res = ResM::Instance(total_num_subgraphs);
76 const ExecutionObjectPipeline& eop = res.GetEOP(subgraph_id);
77 const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id);
78 const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
79
80 in_data = eop.GetInputBufferPtr();
81 in_conv.ScaleQuant(ext_in_data, in_data);
82 eop.ProcessFrameStartAsync();
83 eop.ProcessFrameWait();
84 out_data = eop.GetOutputBufferPtr();
85 out_conv.ScaleDeQuant(out_data, ext_out_data);
86 res.FreeEOP(subgraph_id, eop);
87}
88#endif
89
90
91// Singleton ResM .h file
92// Resource manager for available EVE and DSP devices,
93// - Allocates EVEs and DSPs
94// - Constructs Executors (tidl_setup) and ExecutionObjects (tid_init)
95// - Creates set of ExecutionPipelines (with or without DSP)
96// - Allocating EOP on demand (acquire and free semantics)
97// - Allocates input/output buffers
98class ResM {
99 public:
100 ResM();
101 ~ResM();
102 static ResM& Instance(uint32_t total_num_subgraphs = 1);
103
104 // how to ge
105 ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id);
106 void FreeEOP(uint32_t subgraph_id,
107 ExecutionObjectPipeline* eop);
108 Configuration& GetConfiguration(uint32_t subgraph_id);
109 //const SubgraphDataConv& GetInConv(uint32_t subgraph_id);
110 //const SubgraphDataConv& GetOutConv(uint32_t subgraph_id);
111
112
113 private:
114 void Init(uint32_t num_subgraphs);
115
116 bool enable_trace_m;
117 uint32_t num_subgraphs_m;
118 uint32_t num_es_per_subgraph_m;
119 uint32_t num_eves_m;
120 uint32_t num_dsps_m;
121 uint32_t num_lg2_dsps_used_m; // in partitioned execution case
122 std::mutex mutex_init_m;
123
124 // indexed by subgraph_id for resources
125 struct ResEOP {
126 ResEOP() : free_eop_index(0), is_used(), eops(nullptr) {}
127
128 uint32_t free_eop_index;
129 std::mutex mutex_eops;
130 std::condition_variable cv_eops;
131 std::vector<bool> is_used;
132 std::vector<ExecutionObjectPipeline*>* eops;
133 };
134 std::vector<Configuration> cs_m;
135 std::vector<Executor*> es_m;
136 std::vector<Executor*> e2s_m;
137 std::vector<ResEOP> *eops_m;
138};
139
140} // namespace tidl
diff --git a/tidl_api/src/subgraph_data_conv.h b/tidl_api/src/subgraph_data_conv.h
new file mode 100644
index 0000000..24920fc
--- /dev/null
+++ b/tidl_api/src/subgraph_data_conv.h
@@ -0,0 +1,128 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29namespace tidl {
30
31/* @class SubgraphDataConv
32 @brief Handles data conversion at subgraph boundaries
33 At calibration time, consume either external input or external
34 output tensors, determine sign and scaling factor.
35 At inference time, use sign and scaling factor to perform data
36 conversion between TIDL tensors and external tensors
37
38 Example use for EstScaleQuant:
39 SubgraphDataConv conv({}, {}, {}, {1,3,64,64,1,3,28,28});
40 conv.EstScaleQuant(in);
41 WriteQuantizationParams(conv.GetIsSigned(), conv.getScaleQ());
42 conv.ScaleQuant(in, out);
43
44 Example use for EstScaleDequant:
45 SubgraphDataConv conv({}, {}, {}, {1,3,64,64,1,3,28,28});
46 conv.EstScaleDeQuant(out);
47 WriteDeQuantizationParams(conv.GetIsSigned(), conv.getScaleQ());
48
49 Example use for ScaleQuant:
50 // one time setup
51 ... Parse json file for is_signed, scaleQ, is_NCHW, dims ...
52 SubgraphDataConv conv(is_signed, scaleQ, is_NCHW, dims);
53
54 // per inference
55 out = eop.GetInputBufferPtr();
56 conv.ScaleQuant(in, out);
57 eop.ProcessFrameStartAsync();
58
59 Example use for ScaleDeQuant:
60 // one time setup
61 ... Parse json file for is_signed, scaleQ, is_NCHW, dims ...
62 SubgraphDataConv conv(is_signed, scaleQ, is_NCHW, dims);
63
64 // per inference
65 eop.ProcessFrameWait();
66 in = eop.GetOutputBufferPtr();
67 conv.ScaleDeQuant(in, out);
68*/
69class SubgraphDataConv
70{
71 public:
72 //! @brief Creates a SubgraphDataConv.
73 //! @param None
74 SubgraphDataConv() {}
75
76 SubgraphDataConv(const vector<bool>& is_signed,
77 const vector<float>& scaleQ,
78 const vector<bool>& is_NCHW,
79 const vector<int>& dims
80 ) : is_signed_m(is_signed), scaleQ_m(scaleQ),
81 is_NCHW_m(is_NCHW), dims_m(dims)
82 {}
83
84 const std::vector<bool>& GetIsSigned() { return is_signed_m; }
85 const std::vector<float>& GetScaleQ() { return scaleQ_m; }
86 const std::vector<bool>& GetIsNCHW() { return is_NCHW_m; }
87
88 //! @brief Estimate parameters for Quantization
89 //! @param in vector of floating point external tensor data at input
90 void EstScaleQuant(const std::vector<float*>& in);
91
92 //! @brief Estimate paramters for DeQuantization
93 //! @param out vector of floating point external tensor data at output
94 void EstScaleDequant(const std::vector<float*>& out);
95
96 //! @brief Quantizes floating point {in} to 8-bit Quantized {out}
97 //! and transposes buffer from NHWC to NCHW format (if needed),
98 //! results are put into out pointer consecutively, as expected
99 //! by TIDL
100 //! @param in floating point vector input to quantize
101 //! @param out 8-bit Quantized output (quantized from in)
102 void ScaleQuant(const std::vector<float*>& in, uint8_t* out);
103
104 //! @brief De-Quantizes 8-bit Quantized {in} to floating point {out}
105 //! and transposes buffer from NCHW to NHWC format (if needed),
106 //! the results are put into out vector, one vector per
107 //! tensor, as expected by external tensors
108 //! @param in 8-bit Quantized input to De-Quantize
109 //! @param out floating point output (De-Quantized from in)
110 void ScaleDequant(const uint8_t *in, std::vector<float*>& out);
111
112 private:
113 //! if tensor needs to be evaluated as signed char
114 std::vector<bool> is_signed_m;
115
116 //! Q value for Quantization and Dequantization
117 std::vector<float> scaleQ_m;
118
119 //! the format of external tensors, NCHW or NHWC
120 //! if data needs to be transposed between TIDL NCHW tensors and
121 //! external tensors
122 std::vector<bool> is_NCHW_m;
123
124 //! flattened 4d dims of external tensors
125 std::vector<int> dims_m;
126}
127
128} // namespace tidl
diff --git a/tidl_api/src/subgraph_runtime.cpp b/tidl_api/src/subgraph_runtime.cpp
new file mode 100644
index 0000000..5445b9b
--- /dev/null
+++ b/tidl_api/src/subgraph_runtime.cpp
@@ -0,0 +1,321 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#include <pthread.h>
30#define LOKI_PTHREAD_H
31#include <loki/Singleton.h>
32
33#include "util.h"
34#include "subgraph_runtime.h"
35
36
37#if 0
38// Auto-generated code from Relay/TVM compilation step after
39// partitioning and lowering to backend implementation
40
41// TODO: need to figure out exact arguments and format
42extern void tidl::RunSubgraphImpl(int subgraph_id,
43 const std::vector<float*>&,
44 const std::vector<float*>&);
45
46void tidlRunSubgraph(int subgraph_id,
47 int num_input_tensors, int num_output_tensors,
48 PackedArgs args)
49{
50 std::vector<float *> in_data, out_data;
51
52 for (int i = 0; i < num_input_tensors + num_output_tensors; i++)
53 if (i < num_input_tensors)
54 in_data.push_back(args.data[i]);
55 else
56 out_data.push_back(args.data[i]);
57
58 tidl::RunSubgraphImpl(subgraph_id, in_data, out_data);
59}
60#endif
61
62
63#if 0
64// user application code
65// subgraph_id will be used to find TIDL config file
66// e.g. subgraph_1.cfg, subgraph_2.cfg, etc
67void RunSubgraphImpl(int subgraph_id,
68 int total_num_subgraphs,
69 const std::vector<float*>& ext_in_data,
70 const std::vector<float*>& ext_out_data)
71{
72 ResM& res = ResM::Instance(total_num_subgraphs);
73 const ExecutionObjectPipeline& eop = res.GetEOP(subgraph_id);
74 const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id);
75 const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
76
77 in_data = eop.GetInputBufferPtr();
78 in_conv.ScaleQuant(ext_in_data, in_data);
79 eop.ProcessFrameStartAsync();
80 eop.ProcessFrameWait();
81 out_data = eop.GetOutputBufferPtr();
82 out_conv.ScaleDeQuant(out_data, ext_out_data);
83 res.FreeEOP(subgraph_id, eop);
84}
85#endif
86
87
88
89// Singleton ResM .cpp
90using namespace tidl;
91
92typedef Loki::SingletonHolder <tidl::ResM, Loki::CreateUsingNew,
93Loki::DefaultLifetime, Loki::ClassLevelLockable> tidlSingleResM;
94
95ResM::ResM() : enable_trace_m(false), num_subgraphs_m(0),
96 num_lg2_dsps_used_m(0), eops_m(nullptr)
97{
98}
99
100ResM::~ResM()
101{
102 if (eops_m != nullptr)
103 {
104 for (const ResEOP& res_eop : *eops_m)
105 {
106 if (res_eop.eops != nullptr)
107 {
108 for (const ExecutionObjectPipeline* eop : *(res_eop.eops))
109 {
110 free(eop->GetInputBufferPtr());
111 free(eop->GetOutputBufferPtr());
112 delete eop;
113 }
114 }
115 }
116 delete eops_m;
117 eops_m = nullptr;
118 }
119
120 for (const Executor* e : es_m)
121 if (e != nullptr) delete e;
122 for (const Executor* e : e2s_m)
123 if (e != nullptr) delete e;
124}
125
126ResM& ResM::Instance(uint32_t total_num_subgraphs)
127{
128 ResM& res = tidlSingleResM::Instance();
129 res.Init(total_num_subgraphs);
130 return res;
131}
132
133void ResM::Init(uint32_t num_subgraphs)
134{
135 std::lock_guard<std::mutex> lock(mutex_init_m);
136
137 if (num_subgraphs_m == 0)
138 {
139 num_subgraphs_m = num_subgraphs;
140
141 if (getenv("TIDL_SUBGRAPH_TRACE") != nullptr) enable_trace_m = true;
142
143 // Allocating resources
144 num_eves_m = Executor::GetNumDevices(DeviceType::EVE);
145 num_eves_m = 1; // TODO: to remove after debugging
146 num_dsps_m = Executor::GetNumDevices(DeviceType::DSP);
147
148 assert(num_eves_m > 0 || num_dsps_m > 0);
149 assert(num_subgraphs_m <= num_eves_m || num_subgraphs_m <= num_dsps_m);
150 num_es_per_subgraph_m = num_eves_m / num_subgraphs_m;
151 if (num_eves_m == 0)
152 num_es_per_subgraph_m = num_dsps_m / num_subgraphs_m;
153
154 cs_m.resize(num_subgraphs_m);
155 es_m.resize(num_subgraphs_m, nullptr);
156 e2s_m.resize(num_subgraphs_m, nullptr);
157 eops_m = new std::vector<ResEOP>(num_subgraphs_m);
158 }
159}
160
161ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
162{
163 assert(subgraph_id < num_subgraphs_m);
164 ResEOP& res_eop = (*eops_m)[subgraph_id];
165
166 std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
167
168 if (res_eop.eops == nullptr)
169 {
170 if (enable_trace_m)
171 printf("Subgraph %d: initialing E/EOPs with %d cores\n",
172 subgraph_id, num_es_per_subgraph_m);
173
174 // Constructing EOPs if not already constructed
175 // Each subgraph -> num_eves_per_subgraph_m EOPs
176 // Each EOP -> use_count
177 std::string cfg_file = "subgraph" + std::to_string(subgraph_id) + ".cfg";
178 bool status = cs_m[subgraph_id].ReadFromFile(cfg_file);
179 assert(status);
180
181 // Check if last few layers can be offloaded to DSPs
182 // and DSPs are available
183 DeviceIds e_ids, e2_ids;
184 for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
185 e_ids.insert(static_cast<DeviceId>(
186 subgraph_id * num_es_per_subgraph_m + i));
187 // uint32_t num_dsps_used = 0;
188 if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet)
189 {
190 sTIDL_Network_t *net = new sTIDL_Network_t;
191 bool status = ReadNetworkBinary(cs_m[subgraph_id].netBinFile,
192 reinterpret_cast<char *>(net));
193 assert(status);
194 int32_t start_layer = net->numLayers -1;
195 int32_t end_layer = 0;
196 if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
197 start_layer -= 1;
198 if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
199 end_layer += 1;
200 int32_t i = start_layer;
201 for ( ; i > end_layer; i--)
202 {
203 int32_t layer_type = net->TIDLLayers[i].layerType;
204 if (layer_type != (int32_t) TIDL_SoftMaxLayer &&
205 layer_type != (int32_t) TIDL_InnerProductLayer &&
206 layer_type != (int32_t) TIDL_PoolingLayer)
207 break;
208 }
209 i += 1;
210 if (i <= start_layer)
211 {
212 if (num_lg2_dsps_used_m < num_dsps_m)
213 {
214 if (enable_trace_m)
215 printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
216 subgraph_id, i, start_layer);
217 while (i <= start_layer)
218 cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
219 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
220 num_lg2_dsps_used_m += 1;
221 }
222 }
223 delete net;
224 }
225
226 if (e2_ids.empty())
227 cs_m[subgraph_id].runFullNet = true;
228 cs_m[subgraph_id].enableApiTrace = enable_trace_m;
229
230 // Constructing Es and EOPs
231 res_eop.eops = new std::vector<ExecutionObjectPipeline*>;
232 uint32_t buffer_factor = 2; // double buffering factor
233 if (num_eves_m > 0)
234 {
235 es_m[subgraph_id] = new Executor(DeviceType::EVE, e_ids,
236 cs_m[subgraph_id], 1);
237 if (! e2_ids.empty())
238 {
239 e2s_m[subgraph_id] = new Executor(DeviceType::DSP, e2_ids,
240 cs_m[subgraph_id], 2);
241 for (uint32_t j = 0; j < buffer_factor; j++)
242 for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
243 res_eop.eops->emplace_back(new ExecutionObjectPipeline(
244 {(*es_m[subgraph_id])[i],
245 (*e2s_m[subgraph_id])[i % e2_ids.size()]}));
246 }
247 else
248 {
249 for (uint32_t j = 0; j < buffer_factor; j++)
250 for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
251 res_eop.eops->emplace_back(new ExecutionObjectPipeline(
252 {(*es_m[subgraph_id])[i]}));
253 }
254 }
255 else
256 {
257 es_m[subgraph_id] = new Executor(DeviceType::DSP, e_ids,
258 cs_m[subgraph_id], 1);
259 for (uint32_t j = 0; j < buffer_factor; j++)
260 for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
261 res_eop.eops->emplace_back(new ExecutionObjectPipeline(
262 {(*es_m[subgraph_id])[i]}));
263 }
264
265 if (enable_trace_m)
266 printf("Subgraph %d: Allocating input/output buffers for %d EOPs\n",
267 subgraph_id, res_eop.eops->size());
268 // Allocate input/output buffers
269 for (auto eop : *(res_eop.eops))
270 {
271 size_t in_size = eop->GetInputBufferSizeInBytes();
272 size_t out_size = eop->GetOutputBufferSizeInBytes();
273 void* in_ptr = malloc(in_size);
274 void* out_ptr = malloc(out_size);
275 assert(in_ptr != nullptr && out_ptr != nullptr);
276
277 ArgInfo in(in_ptr, in_size);
278 ArgInfo out(out_ptr, out_size);
279 eop->SetInputOutputBuffer(in, out);
280 }
281
282 res_eop.free_eop_index = 0;
283 res_eop.is_used.resize(res_eop.eops->size(), false);
284 }
285
286 // Return an available EOP (round robin allocation)
287 uint32_t curr_eop = res_eop.free_eop_index;
288 res_eop.cv_eops.wait(lock, [this, subgraph_id, curr_eop]{
289 return this->eops_m->at(subgraph_id).is_used[curr_eop] == false; });
290 res_eop.is_used[curr_eop] = true;
291 res_eop.free_eop_index = (curr_eop + 1) % res_eop.eops->size();
292 if (enable_trace_m)
293 printf("Subgraph %d: return EOP %d for GetEOP()\n", subgraph_id, curr_eop);
294 return res_eop.eops->at(curr_eop);
295}
296
297void ResM::FreeEOP(uint32_t subgraph_id, ExecutionObjectPipeline* eop)
298{
299 ResEOP& res_eop = (*eops_m)[subgraph_id];
300 {
301 std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
302 for (uint32_t i = 0; i < res_eop.is_used.size(); i++)
303 if (res_eop.eops->at(i) == eop)
304 {
305 res_eop.is_used[i] = false;
306 if (enable_trace_m)
307 printf("Subgraph %d: FreeEOP %d\n", subgraph_id, i);
308 break;
309 }
310 }
311 res_eop.cv_eops.notify_all();
312}
313
314Configuration& ResM::GetConfiguration(uint32_t subgraph_id)
315{
316 assert(subgraph_id < num_subgraphs_m);
317 assert((*eops_m)[subgraph_id].eops != nullptr);
318 return cs_m[subgraph_id];
319}
320
321