aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuan Zhao2019-10-31 15:17:38 -0500
committerYuan Zhao2019-11-05 12:10:52 -0600
commitd8571df2dae7a18f400e8b678005a301bb2c75ad (patch)
tree5cfbc38fc332f79e7fbb3b319a4821f34ca005bd
parentcc826f3237dd2764226c2240f03dc5bfb90586aa (diff)
downloadtidl-api-d8571df2dae7a18f400e8b678005a301bb2c75ad.tar.gz
tidl-api-d8571df2dae7a18f400e8b678005a301bb2c75ad.tar.xz
tidl-api-d8571df2dae7a18f400e8b678005a301bb2c75ad.zip
Subgraph: support batch processing
- MCT-1223
-rw-r--r--examples/mobilenet_subgraph/Makefile1
-rw-r--r--examples/mobilenet_subgraph/main.cpp64
-rw-r--r--tidl_api/Makefile4
-rw-r--r--tidl_api/inc/subgraph_data_conv.h2
-rw-r--r--tidl_api/inc/subgraph_runtime.h117
-rw-r--r--tidl_api/src/subgraph_runtime.cpp118
-rw-r--r--tidl_api/src/subgraph_runtime_impl.h94
7 files changed, 286 insertions, 114 deletions
diff --git a/examples/mobilenet_subgraph/Makefile b/examples/mobilenet_subgraph/Makefile
index aed396c..ffeb69d 100644
--- a/examples/mobilenet_subgraph/Makefile
+++ b/examples/mobilenet_subgraph/Makefile
@@ -28,6 +28,7 @@ EXE = imagenet
28 28
29include ../make.common 29include ../make.common
30 30
31# overwrite LIBS, -ltidl_api should be able to pull -lOpenCL
31LIBS += -lopencv_highgui -lopencv_imgcodecs -lopencv_videoio\ 32LIBS += -lopencv_highgui -lopencv_imgcodecs -lopencv_videoio\
32 -lopencv_imgproc -lopencv_core 33 -lopencv_imgproc -lopencv_core
33LIBS += -ljson-c 34LIBS += -ljson-c
diff --git a/examples/mobilenet_subgraph/main.cpp b/examples/mobilenet_subgraph/main.cpp
index c5da647..5534df3 100644
--- a/examples/mobilenet_subgraph/main.cpp
+++ b/examples/mobilenet_subgraph/main.cpp
@@ -44,6 +44,7 @@
44#include "execution_object.h" 44#include "execution_object.h"
45#include "execution_object_pipeline.h" 45#include "execution_object_pipeline.h"
46#include "subgraph_runtime.h" 46#include "subgraph_runtime.h"
47#include "subgraph_data_conv.h"
47#include "configuration.h" 48#include "configuration.h"
48#include "../common/object_classes.h" 49#include "../common/object_classes.h"
49#include "imgutil.h" 50#include "imgutil.h"
@@ -70,7 +71,8 @@ const char *default_inputs[NUM_DEFAULT_INPUTS] =
70std::unique_ptr<ObjectClasses> object_classes; 71std::unique_ptr<ObjectClasses> object_classes;
71 72
72bool RunConfiguration(cmdline_opts_t& opts); 73bool RunConfiguration(cmdline_opts_t& opts);
73bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs); 74bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs,
75 int batch_size);
74bool WriteFrameOutput(float *out, const cmdline_opts_t& opts); 76bool WriteFrameOutput(float *out, const cmdline_opts_t& opts);
75void DisplayHelp(); 77void DisplayHelp();
76 78
@@ -140,6 +142,7 @@ bool RunConfiguration(cmdline_opts_t& opts)
140 VideoCapture cap; 142 VideoCapture cap;
141 if (! SetVideoInputOutput(cap, opts, "ImageNet")) return false; 143 if (! SetVideoInputOutput(cap, opts, "ImageNet")) return false;
142 144
145 cout << "\n##### Batch size 1 testing ######\n" << endl;
143 try 146 try
144 { 147 {
145 float **inputs = new float *[1]; 148 float **inputs = new float *[1];
@@ -152,8 +155,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
152 chrono::time_point<chrono::steady_clock> tloop0, tloop1; 155 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
153 tloop0 = chrono::steady_clock::now(); 156 tloop0 = chrono::steady_clock::now();
154 157
155 ReadFrame(opts, cap, inputs); 158 ReadFrame(opts, cap, inputs, 1);
156 TidlRunSubgraph(1, 0, 1, 1, inputs, outputs); 159 TidlRunSubgraph(1, 0, 1, 1, 1, inputs, outputs);
157 WriteFrameOutput(outputs[0], opts); 160 WriteFrameOutput(outputs[0], opts);
158 161
159 tloop1 = chrono::steady_clock::now(); 162 tloop1 = chrono::steady_clock::now();
@@ -175,11 +178,56 @@ bool RunConfiguration(cmdline_opts_t& opts)
175 status = false; 178 status = false;
176 } 179 }
177 180
181 int batch_size = 8;
182 cout << "\n##### Batch size " << batch_size << " testing ######\n" << endl;
183 try
184 {
185 float **inputs = new float *[batch_size];
186 float **outputs = new float *[batch_size];
187 for (int i = 0; i < batch_size; i++)
188 {
189 inputs[i] = new float[1*3*224*224];
190 outputs[i] = new float[1001];
191 }
192
193 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
194 tloop0 = chrono::steady_clock::now();
195
196 ReadFrame(opts, cap, inputs, batch_size);
197 TidlRunSubgraph(1, 0, batch_size, 1, 1, inputs, outputs);
198 for (int i = 0; i < batch_size; i++)
199 {
200 cout << "Frame " << i << " of " << batch_size << " output:" << endl;
201 WriteFrameOutput(outputs[i], opts);
202 }
203
204 tloop1 = chrono::steady_clock::now();
205 chrono::duration<float> elapsed = tloop1 - tloop0;
206 cout << "Batch size " << batch_size
207 << " time (including read/write/opencv/print/etc): "
208 << setw(6) << setprecision(4)
209 << (elapsed.count() * 1000) << "ms" << endl;
210
211 for (int i = 0; i < batch_size; i++)
212 {
213 delete [] inputs[i];
214 delete [] outputs[i];
215 }
216 delete [] inputs;
217 delete [] outputs;
218 }
219 catch (tidl::Exception &e)
220 {
221 cerr << e.what() << endl;
222 status = false;
223 }
224
178 return status; 225 return status;
179} 226}
180 227
181 228
182bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs) 229bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs,
230 int batch_size)
183{ 231{
184 Configuration c; 232 Configuration c;
185 c.inNumChannels = 3;; 233 c.inNumChannels = 3;;
@@ -226,8 +274,11 @@ bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs)
226 274
227 // TI DL image preprocessing, into frame_buffer 275 // TI DL image preprocessing, into frame_buffer
228 bool status = imgutil::PreprocessImage(image, frame_buffer, c); 276 bool status = imgutil::PreprocessImage(image, frame_buffer, c);
229 std::vector<float *> in_data_v{inputs[0]}; 277 for (int i = 0; i < batch_size; i++)
230 in_conv.ScaleDequant((const uint8_t *)frame_buffer, in_data_v); 278 {
279 std::vector<float *> in_data_v{inputs[i]};
280 in_conv.ScaleDequant((const uint8_t *)frame_buffer, in_data_v);
281 }
231 delete [] frame_buffer; 282 delete [] frame_buffer;
232 return status; 283 return status;
233} 284}
@@ -247,6 +298,7 @@ bool WriteFrameOutput(float *out, const cmdline_opts_t& opts)
247 auto cmp = [](val_index &left, val_index &right) 298 auto cmp = [](val_index &left, val_index &right)
248 { return left.first > right.first; }; 299 { return left.first > right.first; };
249 priority_queue<val_index, vector<val_index>, decltype(cmp)> queue(cmp); 300 priority_queue<val_index, vector<val_index>, decltype(cmp)> queue(cmp);
301
250 // initialize priority queue with smallest value on top 302 // initialize priority queue with smallest value on top
251 for (int i = 0; i < k; i++) 303 for (int i = 0; i < k; i++)
252 queue.push(val_index(out[i], i)); 304 queue.push(val_index(out[i], i));
diff --git a/tidl_api/Makefile b/tidl_api/Makefile
index ca91878..a04e604 100644
--- a/tidl_api/Makefile
+++ b/tidl_api/Makefile
@@ -95,7 +95,7 @@ $(LIB_NAME): $(HOST_OBJ_FILES)
95 $(AR) cr $@ $(HOST_OBJ_FILES) 95 $(AR) cr $@ $(HOST_OBJ_FILES)
96 96
97$(SHARED_LIB_NAME): $(HOST_OBJ_FILES) 97$(SHARED_LIB_NAME): $(HOST_OBJ_FILES)
98 $(CXX) -shared $(HOST_OBJ_FILES) -o $@ 98 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL $(HOST_OBJ_FILES) -o $@
99 99
100$(PY_LIB_NAME): $(HOST_OBJ_PYBIND_FILES) $(LIB_NAME) 100$(PY_LIB_NAME): $(HOST_OBJ_PYBIND_FILES) $(LIB_NAME)
101 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL -locl_util $^ -o $@ 101 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL -locl_util $^ -o $@
@@ -104,7 +104,7 @@ $(LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
104 $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES) 104 $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES)
105 105
106$(SHARED_LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES) 106$(SHARED_LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
107 $(CXX) -shared $(HOST_OBJ_IMGUTIL_FILES) -o $@ 107 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared $(HOST_OBJ_IMGUTIL_FILES) -o $@
108 108
109clean:: 109clean::
110 $(RM) -f $(LIB_NAME) $(PY_LIB_NAME) 110 $(RM) -f $(LIB_NAME) $(PY_LIB_NAME)
diff --git a/tidl_api/inc/subgraph_data_conv.h b/tidl_api/inc/subgraph_data_conv.h
index 49b4315..6b7c4b1 100644
--- a/tidl_api/inc/subgraph_data_conv.h
+++ b/tidl_api/inc/subgraph_data_conv.h
@@ -26,6 +26,8 @@
26 * THE POSSIBILITY OF SUCH DAMAGE. 26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/ 27 *****************************************************************************/
28 28
29#pragma once
30
29#include <stdint.h> 31#include <stdint.h>
30#include <vector> 32#include <vector>
31 33
diff --git a/tidl_api/inc/subgraph_runtime.h b/tidl_api/inc/subgraph_runtime.h
index c153485..37e771d 100644
--- a/tidl_api/inc/subgraph_runtime.h
+++ b/tidl_api/inc/subgraph_runtime.h
@@ -29,25 +29,36 @@
29//! @file subgraph_runtime.h 29//! @file subgraph_runtime.h
30 30
31#pragma once 31#pragma once
32#include <vector>
33#include <mutex>
34#include <condition_variable>
35#include "execution_object_pipeline.h"
36#include "subgraph_data_conv.h"
37 32
38extern "C" { 33extern "C" {
39 34
40void TidlRunSubgraph(int total_subgraphs, 35//! @brief Top level inference to run a TIDL subgraph
41 int subgraph_id, 36//! @param total_subgraphs total number of TIDL subgraphs in whole inference
42 int num_inputs, 37//! @param subgraph_id index of current TIDL subgraph
43 int num_outputs, 38//! @param batch_size number of samples/inferences in this batch
44 float **inputTensors, 39//! @param num_inputs_per_inference number of inputs to TIDL subgraph
45 float **outputTensors 40//! for every sample/inference
46 ); 41//! @param num_outputs_per_inference number of outputs from TIDL subgraph
42//! for every sample/inference
43//! @param input_tensors input data to TIDL subgraph, layout as
44//! batch1_input1, batch1_input2, ..., batch1_inputM,
45//! ... ... ...
46//! batchN_input1, batchN_input2, ..., batchN_inputM
47//! @param output_tensors output data from TIDL subgraph, layout as
48//! batch1_output1, batch1_output2, ..., batch1_outputK,
49//! ... ... ...
50//! batchN_output1, batchN_output2, ..., batchN_outputK
51extern void TidlRunSubgraph(int total_subgraphs,
52 int subgraph_id,
53 int batch_size,
54 int num_inputs_per_inference,
55 int num_outputs_per_inference,
56 float **input_tensors,
57 float **output_tensors
58 );
47 59
48} // extern "C" 60} // extern "C"
49 61
50namespace tidl {
51 62
52#if 0 63#if 0
53// Auto-generated code from Relay/TVM compilation step after 64// Auto-generated code from Relay/TVM compilation step after
@@ -57,20 +68,25 @@ void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
57 int num_input_tensors, int num_output_tensors, 68 int num_input_tensors, int num_output_tensors,
58 PackedArgs args) 69 PackedArgs args)
59{ 70{
60 float** in_data = new float*[num_input_tensors]; 71 float** in_data = new float*[num_inputs_per_inference * batch_size];
61 float** out_data = new float*[num_output_tensors]; 72 float** out_data = new float*[num_outputs_per_inference * batch_size];
62 73
63 for (int i = 0; i < num_input_tensors + num_output_tensors; i++) 74 for (in j = 0; j < batch_size; j++)
64 if (i < num_input_tensors) 75 {
65 in_data[i] = args.data[i]; 76 for (int i = 0; i < num_inputs_per_inference + num_outputs_per_inference;
66 else 77 i++)
67 out_data[i - num_input_tensors] = args.data[i]; 78 if (i < num_inputs_per_inference)
79 in_data[j * num_inputs_per_inference + i] = args.data[i][j];
80 else
81 out_data[j * num_outpus_per_inference + i - num_inputs_per_inference]
82 = args.data[i][j];
83 }
68 84
69 // call into this function in libtidl.so 85 // call into this function in libtidl.so
70 // dlopen("libtidl.so") 86 // dlopen("libtidl_api.so")
71 // TidlFunc = dlsym("TidlRunSubgraph"); 87 // TidlFunc = dlsym("TidlRunSubgraph");
72 (*TidlFunc)(total_subgraphs, subgraph_id, 88 (*TidlFunc)(total_subgraphs, subgraph_id, batch_size
73 num_input_tensors, num_output_tensors, 89 num_inputs_per_inference, num_outputs_per_inference,
74 in_data, out_data); 90 in_data, out_data);
75 91
76 delete [] in_data; 92 delete [] in_data;
@@ -78,56 +94,3 @@ void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
78} 94}
79#endif 95#endif
80 96
81
82// Singleton ResM .h file
83// Resource manager for available EVE and DSP devices,
84// - Allocates EVEs and DSPs
85// - Constructs Executors (tidl_setup) and ExecutionObjects (tid_init)
86// - Creates set of ExecutionPipelines (with or without DSP)
87// - Allocating EOP on demand (acquire and free semantics)
88// - Allocates input/output buffers
89class ResM {
90 public:
91 ResM();
92 ~ResM();
93 static ResM& Instance(uint32_t total_num_subgraphs = 1);
94
95 // how to ge
96 ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id);
97 void FreeEOP(uint32_t subgraph_id,
98 ExecutionObjectPipeline* eop);
99 Configuration& GetConfiguration(uint32_t subgraph_id);
100 const SubgraphDataConv& GetInConv(uint32_t subgraph_id);
101 const SubgraphDataConv& GetOutConv(uint32_t subgraph_id);
102
103
104 private:
105 void Init(uint32_t num_subgraphs);
106
107 bool enable_trace_m;
108 uint32_t num_subgraphs_m;
109 uint32_t num_es_per_subgraph_m;
110 uint32_t num_eves_m;
111 uint32_t num_dsps_m;
112 uint32_t num_lg2_dsps_used_m; // in partitioned execution case
113 std::mutex mutex_init_m;
114
115 // indexed by subgraph_id for resources
116 struct ResEOP {
117 ResEOP() : free_eop_index(0), is_used(), eops(nullptr) {}
118
119 uint32_t free_eop_index;
120 std::mutex mutex_eops;
121 std::condition_variable cv_eops;
122 std::vector<bool> is_used;
123 std::vector<ExecutionObjectPipeline*>* eops;
124 };
125 std::vector<Configuration> cs_m;
126 std::vector<Executor*> es_m;
127 std::vector<Executor*> e2s_m;
128 std::vector<ResEOP> *eops_m;
129 std::vector<SubgraphDataConv*> in_conv_m;
130 std::vector<SubgraphDataConv*> out_conv_m;
131};
132
133} // namespace tidl
diff --git a/tidl_api/src/subgraph_runtime.cpp b/tidl_api/src/subgraph_runtime.cpp
index ad5a11a..09905fc 100644
--- a/tidl_api/src/subgraph_runtime.cpp
+++ b/tidl_api/src/subgraph_runtime.cpp
@@ -32,6 +32,7 @@
32 32
33#include "util.h" 33#include "util.h"
34#include "subgraph_runtime.h" 34#include "subgraph_runtime.h"
35#include "subgraph_runtime_impl.h"
35 36
36 37
37#if 0 38#if 0
@@ -42,20 +43,25 @@ void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
42 int num_input_tensors, int num_output_tensors, 43 int num_input_tensors, int num_output_tensors,
43 PackedArgs args) 44 PackedArgs args)
44{ 45{
45 float** in_data = new float*[num_input_tensors]; 46 float** in_data = new float*[num_inputs_per_inference * batch_size];
46 float** out_data = new float*[num_output_tensors]; 47 float** out_data = new float*[num_outputs_per_inference * batch_size];
47 48
48 for (int i = 0; i < num_input_tensors + num_output_tensors; i++) 49 for (in j = 0; j < batch_size; j++)
49 if (i < num_input_tensors) 50 {
50 in_data[i] = args.data[i]; 51 for (int i = 0; i < num_inputs_per_inference + num_outputs_per_inference;
51 else 52 i++)
52 out_data[i - num_input_tensors] = args.data[i]; 53 if (i < num_inputs_per_inference)
54 in_data[j * num_inputs_per_inference + i] = args.data[i][j];
55 else
56 out_data[j * num_outpus_per_inference + i - num_inputs_per_inference]
57 = args.data[i][j];
58 }
53 59
54 // call into this function in libtidl.so 60 // call into this function in libtidl.so
55 // dlopen("libtidl.so") 61 // dlopen("libtidl_api.so")
56 // TidlFunc = dlsym("TidlRunSubgraph"); 62 // TidlFunc = dlsym("TidlRunSubgraph");
57 (*TidlFunc)(total_subgraphs, subgraph_id, 63 (*TidlFunc)(total_subgraphs, subgraph_id, batch_size
58 num_input_tensors, num_output_tensors, 64 num_inputs_per_inference, num_outputs_per_inference,
59 in_data, out_data); 65 in_data, out_data);
60 66
61 delete [] in_data; 67 delete [] in_data;
@@ -70,31 +76,57 @@ using namespace tidl;
70 76
71void TidlRunSubgraph(int total_subgraphs, 77void TidlRunSubgraph(int total_subgraphs,
72 int subgraph_id, 78 int subgraph_id,
73 int num_inputs, 79 int batch_size,
74 int num_outputs, 80 int num_inputs_per_inference,
75 float **inputTensors, 81 int num_outputs_per_inference,
76 float **outputTensors 82 float **input_tensors,
83 float **output_tensors
77 ) 84 )
78{ 85{
79 ResM& res = ResM::Instance(total_subgraphs); 86 ResM& res = ResM::Instance(total_subgraphs);
80 ExecutionObjectPipeline* eop = res.GetEOP(subgraph_id); 87 res.InitSubgraph(subgraph_id);
88 int num_eops = res.GetNumEOPs(subgraph_id);
89 if (num_eops > batch_size) num_eops = batch_size;
90 std::vector<ExecutionObjectPipeline*> eops(num_eops);
91 for (int i = 0; i < num_eops; i++)
92 eops[i] = res.GetEOP(subgraph_id);
81 const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id); 93 const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id);
82 const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id); 94 const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
83 95
84 std::vector<float *> in_data_v, out_data_v; 96 std::vector<std::vector<float *>> in_data_v(batch_size),
85 for (int i = 0; i < num_inputs; i++) 97 out_data_v(batch_size);
86 in_data_v.emplace_back(inputTensors[i]); 98 for (int frame_idx = 0; frame_idx < batch_size; frame_idx++)
87 for (int i = 0; i < num_outputs; i++) 99 {
88 out_data_v.emplace_back(outputTensors[i]); 100 for (int i = 0; i < num_inputs_per_inference; i++)
89 char* in_data = eop->GetInputBufferPtr(); 101 in_data_v[frame_idx].emplace_back(input_tensors[
90 in_conv.ScaleQuant(in_data_v, (uint8_t *) in_data); 102 frame_idx * num_inputs_per_inference + i]);
103 for (int i = 0; i < num_outputs_per_inference; i++)
104 out_data_v[frame_idx].emplace_back(output_tensors[
105 frame_idx * num_inputs_per_inference + i]);
106 }
91 107
92 eop->ProcessFrameStartAsync(); 108 // Process batch_size frames with available eops in pipelined manner
93 eop->ProcessFrameWait(); 109 // additional num_eops iterations to flush the pipeline (epilogue)
110 for (int frame_idx = 0; frame_idx < batch_size + num_eops; frame_idx++)
111 {
112 ExecutionObjectPipeline *eop = eops[frame_idx % num_eops];
94 113
95 char* out_data = eop->GetOutputBufferPtr(); 114 if (eop->ProcessFrameWait())
96 out_conv.ScaleDequant((const uint8_t *) out_data, out_data_v); 115 {
97 res.FreeEOP(subgraph_id, eop); 116 const uint8_t *out_data = (const uint8_t*) eop->GetOutputBufferPtr();
117 out_conv.ScaleDequant(out_data, out_data_v[frame_idx - num_eops]);
118 }
119
120 if (frame_idx < batch_size)
121 {
122 uint8_t *in_data = (uint8_t *) eop->GetInputBufferPtr();
123 in_conv.ScaleQuant(in_data_v[frame_idx], in_data);
124 eop->ProcessFrameStartAsync();
125 }
126 }
127
128 for (int i = 0; i < num_eops; i++)
129 res.FreeEOP(subgraph_id, eops[i]);
98} 130}
99 131
100 132
@@ -155,7 +187,6 @@ void ResM::Init(uint32_t num_subgraphs)
155 187
156 // Allocating resources 188 // Allocating resources
157 num_eves_m = Executor::GetNumDevices(DeviceType::EVE); 189 num_eves_m = Executor::GetNumDevices(DeviceType::EVE);
158 num_eves_m = 1; // TODO: to remove after debugging
159 num_dsps_m = Executor::GetNumDevices(DeviceType::DSP); 190 num_dsps_m = Executor::GetNumDevices(DeviceType::DSP);
160 191
161 assert(num_eves_m > 0 || num_dsps_m > 0); 192 assert(num_eves_m > 0 || num_dsps_m > 0);
@@ -180,7 +211,8 @@ void ResM::Init(uint32_t num_subgraphs)
180 } 211 }
181} 212}
182 213
183ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id) 214
215void ResM::InitSubgraph(uint32_t subgraph_id)
184{ 216{
185 assert(subgraph_id < num_subgraphs_m); 217 assert(subgraph_id < num_subgraphs_m);
186 ResEOP& res_eop = (*eops_m)[subgraph_id]; 218 ResEOP& res_eop = (*eops_m)[subgraph_id];
@@ -240,6 +272,11 @@ ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
240 cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2; 272 cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
241 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m)); 273 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
242 num_lg2_dsps_used_m += 1; 274 num_lg2_dsps_used_m += 1;
275 if (num_subgraphs_m == 1) // Allocate all dsps if only one subgraph
276 {
277 while (num_lg2_dsps_used_m < num_dsps_m)
278 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++));
279 }
243 } 280 }
244 } 281 }
245 delete net; 282 delete net;
@@ -304,6 +341,24 @@ ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
304 res_eop.free_eop_index = 0; 341 res_eop.free_eop_index = 0;
305 res_eop.is_used.resize(res_eop.eops->size(), false); 342 res_eop.is_used.resize(res_eop.eops->size(), false);
306 } 343 }
344}
345
346uint32_t ResM::GetNumEOPs(uint32_t subgraph_id)
347{
348 assert(subgraph_id < num_subgraphs_m);
349 ResEOP& res_eop = (*eops_m)[subgraph_id];
350 assert (res_eop.eops != nullptr);
351
352 return res_eop.eops->size();
353}
354
355ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
356{
357 assert(subgraph_id < num_subgraphs_m);
358 ResEOP& res_eop = (*eops_m)[subgraph_id];
359 assert(res_eop.eops != nullptr);
360
361 std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
307 362
308 // Return an available EOP (round robin allocation) 363 // Return an available EOP (round robin allocation)
309 uint32_t curr_eop = res_eop.free_eop_index; 364 uint32_t curr_eop = res_eop.free_eop_index;
@@ -318,7 +373,10 @@ ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
318 373
319void ResM::FreeEOP(uint32_t subgraph_id, ExecutionObjectPipeline* eop) 374void ResM::FreeEOP(uint32_t subgraph_id, ExecutionObjectPipeline* eop)
320{ 375{
376 assert(subgraph_id < num_subgraphs_m);
321 ResEOP& res_eop = (*eops_m)[subgraph_id]; 377 ResEOP& res_eop = (*eops_m)[subgraph_id];
378 assert(res_eop.eops != nullptr);
379
322 { 380 {
323 std::unique_lock<std::mutex> lock(res_eop.mutex_eops); 381 std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
324 for (uint32_t i = 0; i < res_eop.is_used.size(); i++) 382 for (uint32_t i = 0; i < res_eop.is_used.size(); i++)
@@ -342,12 +400,14 @@ Configuration& ResM::GetConfiguration(uint32_t subgraph_id)
342 400
343const SubgraphDataConv& ResM::GetInConv(uint32_t subgraph_id) 401const SubgraphDataConv& ResM::GetInConv(uint32_t subgraph_id)
344{ 402{
403 assert(subgraph_id < num_subgraphs_m);
345 assert(in_conv_m[subgraph_id] != nullptr); 404 assert(in_conv_m[subgraph_id] != nullptr);
346 return *in_conv_m[subgraph_id]; 405 return *in_conv_m[subgraph_id];
347} 406}
348 407
349const SubgraphDataConv& ResM::GetOutConv(uint32_t subgraph_id) 408const SubgraphDataConv& ResM::GetOutConv(uint32_t subgraph_id)
350{ 409{
410 assert(subgraph_id < num_subgraphs_m);
351 assert(out_conv_m[subgraph_id] != nullptr); 411 assert(out_conv_m[subgraph_id] != nullptr);
352 return *out_conv_m[subgraph_id]; 412 return *out_conv_m[subgraph_id];
353} 413}
diff --git a/tidl_api/src/subgraph_runtime_impl.h b/tidl_api/src/subgraph_runtime_impl.h
new file mode 100644
index 0000000..a792757
--- /dev/null
+++ b/tidl_api/src/subgraph_runtime_impl.h
@@ -0,0 +1,94 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29//! @file subgraph_runtime_impl.h
30
31#pragma once
32#include <vector>
33#include <mutex>
34#include <condition_variable>
35#include "execution_object_pipeline.h"
36#include "subgraph_data_conv.h"
37
38
39namespace tidl {
40
41// Singleton ResM .h file
42// Resource manager for available EVE and DSP devices,
43// - Allocates EVEs and DSPs
44// - Constructs Executors (tidl_setup) and ExecutionObjects (tid_init)
45// - Creates set of ExecutionPipelines (with or without DSP)
46// - Allocating EOP on demand (acquire and free semantics)
47// - Allocates input/output buffers
48class ResM {
49 public:
50 ResM();
51 ~ResM();
52 static ResM& Instance(uint32_t total_num_subgraphs = 1);
53
54 // how to get resources for subgraph_id
55 void InitSubgraph(uint32_t subgraph_id);
56 uint32_t GetNumEOPs(uint32_t subgraph_id);
57 ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id);
58 void FreeEOP(uint32_t subgraph_id,
59 ExecutionObjectPipeline* eop);
60 Configuration& GetConfiguration(uint32_t subgraph_id);
61 const SubgraphDataConv& GetInConv(uint32_t subgraph_id);
62 const SubgraphDataConv& GetOutConv(uint32_t subgraph_id);
63
64
65 private:
66 void Init(uint32_t num_subgraphs);
67
68 bool enable_trace_m;
69 uint32_t num_subgraphs_m;
70 uint32_t num_es_per_subgraph_m;
71 uint32_t num_eves_m;
72 uint32_t num_dsps_m;
73 uint32_t num_lg2_dsps_used_m; // in partitioned execution case
74 std::mutex mutex_init_m;
75
76 // indexed by subgraph_id for resources
77 struct ResEOP {
78 ResEOP() : free_eop_index(0), is_used(), eops(nullptr) {}
79
80 uint32_t free_eop_index;
81 std::mutex mutex_eops;
82 std::condition_variable cv_eops;
83 std::vector<bool> is_used;
84 std::vector<ExecutionObjectPipeline*>* eops;
85 };
86 std::vector<Configuration> cs_m;
87 std::vector<Executor*> es_m;
88 std::vector<Executor*> e2s_m;
89 std::vector<ResEOP> *eops_m;
90 std::vector<SubgraphDataConv*> in_conv_m;
91 std::vector<SubgraphDataConv*> out_conv_m;
92};
93
94} // namespace tidl