aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--examples/mobilenet_subgraph/main.cpp36
-rw-r--r--tidl_api/inc/subgraph_runtime.h7
-rw-r--r--tidl_api/src/subgraph_runtime.cpp124
-rw-r--r--tidl_api/src/subgraph_runtime_impl.h1
4 files changed, 106 insertions, 62 deletions
diff --git a/examples/mobilenet_subgraph/main.cpp b/examples/mobilenet_subgraph/main.cpp
index 8a77f65..2233d96 100644
--- a/examples/mobilenet_subgraph/main.cpp
+++ b/examples/mobilenet_subgraph/main.cpp
@@ -67,6 +67,11 @@ using namespace cv;
67#define NUM_DEFAULT_INPUTS 1 67#define NUM_DEFAULT_INPUTS 1
68#define DEFAULT_OBJECT_CLASSES_LIST_FILE "imagenet_objects.json" 68#define DEFAULT_OBJECT_CLASSES_LIST_FILE "imagenet_objects.json"
69#define DEFAULT_OUTPUT_PROB_THRESHOLD 5 69#define DEFAULT_OUTPUT_PROB_THRESHOLD 5
70#define MOBILENET_IN_C (3)
71#define MOBILENET_IN_H (224)
72#define MOBILENET_IN_W (224)
73#define MOBILENET_INPUT_SIZE (1*MOBILENET_IN_C*MOBILENET_IN_H*MOBILENET_IN_W)
74#define MOBILENET_OUTPUT_SIZE (1001)
70const char *default_inputs[NUM_DEFAULT_INPUTS] = 75const char *default_inputs[NUM_DEFAULT_INPUTS] =
71{ 76{
72 "../test/testvecs/input/objects/cat-pet-animal-domestic-104827.jpeg" 77 "../test/testvecs/input/objects/cat-pet-animal-domestic-104827.jpeg"
@@ -169,9 +174,9 @@ bool RunConfiguration(cmdline_opts_t& opts)
169 { 174 {
170 TidlInitSubgraph(1, 0); 175 TidlInitSubgraph(1, 0);
171 float **inputs = new float *[1]; 176 float **inputs = new float *[1];
172 inputs[0] = new float[1*3*224*224]; 177 inputs[0] = new float[MOBILENET_INPUT_SIZE];
173 float **outputs = new float *[1]; 178 float **outputs = new float *[1];
174 outputs[0] = new float[1001]; 179 outputs[0] = new float[MOBILENET_OUTPUT_SIZE];
175 180
176 for (int i = 0; i < 5; i ++) 181 for (int i = 0; i < 5; i ++)
177 { 182 {
@@ -216,8 +221,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
216 float **outputs = new float *[batch_size]; 221 float **outputs = new float *[batch_size];
217 for (int i = 0; i < batch_size; i++) 222 for (int i = 0; i < batch_size; i++)
218 { 223 {
219 inputs[i] = new float[1*3*224*224]; 224 inputs[i] = new float[MOBILENET_INPUT_SIZE];
220 outputs[i] = new float[1001]; 225 outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
221 } 226 }
222 227
223 chrono::time_point<chrono::steady_clock> tloop0, tloop1; 228 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
@@ -273,8 +278,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
273 float **outputs = new float *[num_threads]; 278 float **outputs = new float *[num_threads];
274 for (int i = 0; i < num_threads; i++) 279 for (int i = 0; i < num_threads; i++)
275 { 280 {
276 inputs[i] = new float[1*3*224*224]; 281 inputs[i] = new float[MOBILENET_INPUT_SIZE];
277 outputs[i] = new float[1001]; 282 outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
278 } 283 }
279 vector<future<bool>> futures(num_threads); 284 vector<future<bool>> futures(num_threads);
280 bool skip_outputs = false; 285 bool skip_outputs = false;
@@ -341,8 +346,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
341 vector<UserData> v_data(num_threads); 346 vector<UserData> v_data(num_threads);
342 for (int i = 0; i < num_threads; i++) 347 for (int i = 0; i < num_threads; i++)
343 { 348 {
344 inputs[i] = new float[1*3*224*224]; 349 inputs[i] = new float[MOBILENET_INPUT_SIZE];
345 outputs[i] = new float[1001]; 350 outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
346 v_data[i].inputs = &inputs[i]; 351 v_data[i].inputs = &inputs[i];
347 v_data[i].outputs = &outputs[i]; 352 v_data[i].outputs = &outputs[i];
348 } 353 }
@@ -406,8 +411,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
406 float **outputs = new float *[num_threads * batch_size]; 411 float **outputs = new float *[num_threads * batch_size];
407 for (int i = 0; i < num_threads * batch_size; i++) 412 for (int i = 0; i < num_threads * batch_size; i++)
408 { 413 {
409 inputs[i] = new float[1*3*224*224]; 414 inputs[i] = new float[MOBILENET_INPUT_SIZE];
410 outputs[i] = new float[1001]; 415 outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
411 } 416 }
412 vector<future<bool>> futures(num_threads); 417 vector<future<bool>> futures(num_threads);
413 bool skip_outputs = false; 418 bool skip_outputs = false;
@@ -482,13 +487,14 @@ bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs,
482 int batch_size) 487 int batch_size)
483{ 488{
484 Configuration c; 489 Configuration c;
485 c.inNumChannels = 3;; 490 c.inNumChannels = MOBILENET_IN_C;
486 c.inWidth = 224; 491 c.inWidth = MOBILENET_IN_W;
487 c.inHeight = 224; 492 c.inHeight = MOBILENET_IN_H;
488 c.preProcType = 2; 493 c.preProcType = 2;
489 SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false}, {1,3,224,224}}; 494 SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false},
495 {1,MOBILENET_IN_C,MOBILENET_IN_H,MOBILENET_IN_W}};
490 496
491 char* frame_buffer = new char[3*224*224]; 497 char* frame_buffer = new char[MOBILENET_INPUT_SIZE];
492 assert (frame_buffer != nullptr); 498 assert (frame_buffer != nullptr);
493 499
494 Mat image; 500 Mat image;
diff --git a/tidl_api/inc/subgraph_runtime.h b/tidl_api/inc/subgraph_runtime.h
index 65db5b5..c75d6b2 100644
--- a/tidl_api/inc/subgraph_runtime.h
+++ b/tidl_api/inc/subgraph_runtime.h
@@ -48,6 +48,13 @@ extern void TidlInitSubgraph(int total_subgraphs,
48 int subgraph_id 48 int subgraph_id
49 ); 49 );
50 50
51//! @brief Top level API to free a TIDL subgraph on device
52//! @param total_subgraphs total number of TIDL subgraphs in whole inference
53//! @param subgraph_id index of current TIDL subgraph
54extern void TidlFreeSubgraph(int total_subgraphs,
55 int subgraph_id
56 );
57
51//! @brief Top level inference to run a TIDL subgraph 58//! @brief Top level inference to run a TIDL subgraph
52//! @param total_subgraphs total number of TIDL subgraphs in whole inference 59//! @param total_subgraphs total number of TIDL subgraphs in whole inference
53//! @param subgraph_id index of current TIDL subgraph 60//! @param subgraph_id index of current TIDL subgraph
diff --git a/tidl_api/src/subgraph_runtime.cpp b/tidl_api/src/subgraph_runtime.cpp
index 24b378e..9d068b2 100644
--- a/tidl_api/src/subgraph_runtime.cpp
+++ b/tidl_api/src/subgraph_runtime.cpp
@@ -85,6 +85,11 @@ void TidlInitSubgraph(int total_subgraphs, int subgraph_id)
85 res.InitSubgraph(subgraph_id); 85 res.InitSubgraph(subgraph_id);
86} 86}
87 87
88void TidlFreeSubgraph(int total_subgraphs, int subgraph_id)
89{
90 ResM& res = ResM::Instance(total_subgraphs);
91 res.FreeSubgraph(subgraph_id);
92}
88 93
89void TidlRunSubgraph(int total_subgraphs, 94void TidlRunSubgraph(int total_subgraphs,
90 int subgraph_id, 95 int subgraph_id,
@@ -152,32 +157,44 @@ ResM::ResM() : enable_trace_m(false), num_subgraphs_m(0),
152 157
153ResM::~ResM() 158ResM::~ResM()
154{ 159{
160 for (uint32_t i = 0; i < num_subgraphs_m; i++)
161 FreeSubgraph(i);
162
163 delete eops_m;
164 eops_m = nullptr;
165}
166
167void ResM::FreeSubgraph(uint32_t subgraph_id)
168{
169 assert(subgraph_id < num_subgraphs_m);
170
155 if (eops_m != nullptr) 171 if (eops_m != nullptr)
156 { 172 {
157 for (const ResEOP& res_eop : *eops_m) 173 ResEOP& res_eop = (*eops_m)[subgraph_id];
174 if (res_eop.eops != nullptr)
158 { 175 {
159 if (res_eop.eops != nullptr) 176 for (const ExecutionObjectPipeline* eop : *(res_eop.eops))
160 { 177 {
161 for (const ExecutionObjectPipeline* eop : *(res_eop.eops)) 178 free(eop->GetInputBufferPtr());
162 { 179 free(eop->GetOutputBufferPtr());
163 free(eop->GetInputBufferPtr()); 180 delete eop;
164 free(eop->GetOutputBufferPtr());
165 delete eop;
166 }
167 } 181 }
182 delete res_eop.eops;
183 res_eop.eops = nullptr;
168 } 184 }
169 delete eops_m;
170 eops_m = nullptr;
171 } 185 }
172 186
173 for (const Executor* e : es_m) 187 delete es_m[subgraph_id];
174 if (e != nullptr) delete e; 188 es_m[subgraph_id] = nullptr;
175 for (const Executor* e : e2s_m) 189
176 if (e != nullptr) delete e; 190 delete e2s_m[subgraph_id];
177 for (SubgraphDataConv *dc : in_conv_m) 191 e2s_m[subgraph_id] = nullptr;
178 if (dc != nullptr) delete dc; 192
179 for (SubgraphDataConv *dc : out_conv_m) 193 delete in_conv_m[subgraph_id];
180 if (dc != nullptr) delete dc; 194 in_conv_m[subgraph_id] = nullptr;
195
196 delete out_conv_m[subgraph_id];
197 out_conv_m[subgraph_id] = nullptr;
181} 198}
182 199
183ResM& ResM::Instance(uint32_t total_num_subgraphs) 200ResM& ResM::Instance(uint32_t total_num_subgraphs)
@@ -290,42 +307,55 @@ void ResM::InitSubgraph(uint32_t subgraph_id)
290 // uint32_t num_dsps_used = 0; 307 // uint32_t num_dsps_used = 0;
291 if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet) 308 if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet)
292 { 309 {
293 int32_t start_layer = net->numLayers -1; 310 if (cs_m[subgraph_id].layerIndex2LayerGroupId.empty())
294 int32_t end_layer = 0;
295 if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
296 start_layer -= 1;
297 if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
298 end_layer += 1;
299 int32_t i = start_layer;
300 for ( ; i > end_layer; i--)
301 { 311 {
302 int32_t layer_type = net->TIDLLayers[i].layerType; 312 int32_t start_layer = net->numLayers -1;
303 if (layer_type != (int32_t) TIDL_SoftMaxLayer && 313 int32_t end_layer = 0;
304 layer_type != (int32_t) TIDL_InnerProductLayer && 314 if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
305 layer_type != (int32_t) TIDL_PoolingLayer) 315 start_layer -= 1;
306 break; 316 if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
307 } 317 end_layer += 1;
308 i += 1; 318 int32_t i = start_layer;
309 if (i <= start_layer) 319 for ( ; i > end_layer; i--)
310 { 320 {
311 if (num_lg2_dsps_used_m < num_dsps_m) 321 int32_t layer_type = net->TIDLLayers[i].layerType;
322 if (layer_type != (int32_t) TIDL_SoftMaxLayer &&
323 layer_type != (int32_t) TIDL_InnerProductLayer &&
324 layer_type != (int32_t) TIDL_PoolingLayer)
325 break;
326 }
327 i += 1;
328 if (i <= start_layer)
312 { 329 {
313 if (enable_trace_m) 330 if (num_lg2_dsps_used_m < num_dsps_m)
314 printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
315 subgraph_id, i, start_layer);
316 while (i <= start_layer)
317 cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
318 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
319 num_lg2_dsps_used_m += 1;
320 if (num_subgraphs_m == 1) // Allocate all dsps if only one subgraph
321 { 331 {
322 while (num_lg2_dsps_used_m < num_dsps_m) 332 if (enable_trace_m)
323 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++)); 333 printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
334 subgraph_id, i, start_layer);
335 while (i <= start_layer)
336 cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
324 } 337 }
325 } 338 }
326 } 339 }
327 delete net; 340 else
341 {
342 if (enable_trace_m)
343 printf("Subgraph %d: using layer2group map in config file for DSP\n",
344 subgraph_id);
345 }
346
347 if (! cs_m[subgraph_id].layerIndex2LayerGroupId.empty())
348 {
349 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
350 num_lg2_dsps_used_m += 1;
351 if (num_subgraphs_m == 1) // Allocate all dsps if only one subgraph
352 {
353 while (num_lg2_dsps_used_m < num_dsps_m)
354 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++));
355 }
356 }
328 } 357 }
358 delete net;
329 359
330 if (e2_ids.empty()) 360 if (e2_ids.empty())
331 cs_m[subgraph_id].runFullNet = true; 361 cs_m[subgraph_id].runFullNet = true;
diff --git a/tidl_api/src/subgraph_runtime_impl.h b/tidl_api/src/subgraph_runtime_impl.h
index 9738dbb..54dc12d 100644
--- a/tidl_api/src/subgraph_runtime_impl.h
+++ b/tidl_api/src/subgraph_runtime_impl.h
@@ -53,6 +53,7 @@ class ResM {
53 53
54 // how to get resources for subgraph_id 54 // how to get resources for subgraph_id
55 void InitSubgraph(uint32_t subgraph_id); 55 void InitSubgraph(uint32_t subgraph_id);
56 void FreeSubgraph(uint32_t subgraph_id);
56 uint32_t GetNumEOPs(uint32_t subgraph_id); 57 uint32_t GetNumEOPs(uint32_t subgraph_id);
57 ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id); 58 ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id);
58 void FreeEOP(uint32_t subgraph_id, 59 void FreeEOP(uint32_t subgraph_id,