diff options
-rw-r--r-- | examples/mobilenet_subgraph/main.cpp | 36 | ||||
-rw-r--r-- | tidl_api/inc/subgraph_runtime.h | 7 | ||||
-rw-r--r-- | tidl_api/src/subgraph_runtime.cpp | 124 | ||||
-rw-r--r-- | tidl_api/src/subgraph_runtime_impl.h | 1 |
4 files changed, 106 insertions, 62 deletions
diff --git a/examples/mobilenet_subgraph/main.cpp b/examples/mobilenet_subgraph/main.cpp index 8a77f65..2233d96 100644 --- a/examples/mobilenet_subgraph/main.cpp +++ b/examples/mobilenet_subgraph/main.cpp | |||
@@ -67,6 +67,11 @@ using namespace cv; | |||
67 | #define NUM_DEFAULT_INPUTS 1 | 67 | #define NUM_DEFAULT_INPUTS 1 |
68 | #define DEFAULT_OBJECT_CLASSES_LIST_FILE "imagenet_objects.json" | 68 | #define DEFAULT_OBJECT_CLASSES_LIST_FILE "imagenet_objects.json" |
69 | #define DEFAULT_OUTPUT_PROB_THRESHOLD 5 | 69 | #define DEFAULT_OUTPUT_PROB_THRESHOLD 5 |
70 | #define MOBILENET_IN_C (3) | ||
71 | #define MOBILENET_IN_H (224) | ||
72 | #define MOBILENET_IN_W (224) | ||
73 | #define MOBILENET_INPUT_SIZE (1*MOBILENET_IN_C*MOBILENET_IN_H*MOBILENET_IN_W) | ||
74 | #define MOBILENET_OUTPUT_SIZE (1001) | ||
70 | const char *default_inputs[NUM_DEFAULT_INPUTS] = | 75 | const char *default_inputs[NUM_DEFAULT_INPUTS] = |
71 | { | 76 | { |
72 | "../test/testvecs/input/objects/cat-pet-animal-domestic-104827.jpeg" | 77 | "../test/testvecs/input/objects/cat-pet-animal-domestic-104827.jpeg" |
@@ -169,9 +174,9 @@ bool RunConfiguration(cmdline_opts_t& opts) | |||
169 | { | 174 | { |
170 | TidlInitSubgraph(1, 0); | 175 | TidlInitSubgraph(1, 0); |
171 | float **inputs = new float *[1]; | 176 | float **inputs = new float *[1]; |
172 | inputs[0] = new float[1*3*224*224]; | 177 | inputs[0] = new float[MOBILENET_INPUT_SIZE]; |
173 | float **outputs = new float *[1]; | 178 | float **outputs = new float *[1]; |
174 | outputs[0] = new float[1001]; | 179 | outputs[0] = new float[MOBILENET_OUTPUT_SIZE]; |
175 | 180 | ||
176 | for (int i = 0; i < 5; i ++) | 181 | for (int i = 0; i < 5; i ++) |
177 | { | 182 | { |
@@ -216,8 +221,8 @@ bool RunConfiguration(cmdline_opts_t& opts) | |||
216 | float **outputs = new float *[batch_size]; | 221 | float **outputs = new float *[batch_size]; |
217 | for (int i = 0; i < batch_size; i++) | 222 | for (int i = 0; i < batch_size; i++) |
218 | { | 223 | { |
219 | inputs[i] = new float[1*3*224*224]; | 224 | inputs[i] = new float[MOBILENET_INPUT_SIZE]; |
220 | outputs[i] = new float[1001]; | 225 | outputs[i] = new float[MOBILENET_OUTPUT_SIZE]; |
221 | } | 226 | } |
222 | 227 | ||
223 | chrono::time_point<chrono::steady_clock> tloop0, tloop1; | 228 | chrono::time_point<chrono::steady_clock> tloop0, tloop1; |
@@ -273,8 +278,8 @@ bool RunConfiguration(cmdline_opts_t& opts) | |||
273 | float **outputs = new float *[num_threads]; | 278 | float **outputs = new float *[num_threads]; |
274 | for (int i = 0; i < num_threads; i++) | 279 | for (int i = 0; i < num_threads; i++) |
275 | { | 280 | { |
276 | inputs[i] = new float[1*3*224*224]; | 281 | inputs[i] = new float[MOBILENET_INPUT_SIZE]; |
277 | outputs[i] = new float[1001]; | 282 | outputs[i] = new float[MOBILENET_OUTPUT_SIZE]; |
278 | } | 283 | } |
279 | vector<future<bool>> futures(num_threads); | 284 | vector<future<bool>> futures(num_threads); |
280 | bool skip_outputs = false; | 285 | bool skip_outputs = false; |
@@ -341,8 +346,8 @@ bool RunConfiguration(cmdline_opts_t& opts) | |||
341 | vector<UserData> v_data(num_threads); | 346 | vector<UserData> v_data(num_threads); |
342 | for (int i = 0; i < num_threads; i++) | 347 | for (int i = 0; i < num_threads; i++) |
343 | { | 348 | { |
344 | inputs[i] = new float[1*3*224*224]; | 349 | inputs[i] = new float[MOBILENET_INPUT_SIZE]; |
345 | outputs[i] = new float[1001]; | 350 | outputs[i] = new float[MOBILENET_OUTPUT_SIZE]; |
346 | v_data[i].inputs = &inputs[i]; | 351 | v_data[i].inputs = &inputs[i]; |
347 | v_data[i].outputs = &outputs[i]; | 352 | v_data[i].outputs = &outputs[i]; |
348 | } | 353 | } |
@@ -406,8 +411,8 @@ bool RunConfiguration(cmdline_opts_t& opts) | |||
406 | float **outputs = new float *[num_threads * batch_size]; | 411 | float **outputs = new float *[num_threads * batch_size]; |
407 | for (int i = 0; i < num_threads * batch_size; i++) | 412 | for (int i = 0; i < num_threads * batch_size; i++) |
408 | { | 413 | { |
409 | inputs[i] = new float[1*3*224*224]; | 414 | inputs[i] = new float[MOBILENET_INPUT_SIZE]; |
410 | outputs[i] = new float[1001]; | 415 | outputs[i] = new float[MOBILENET_OUTPUT_SIZE]; |
411 | } | 416 | } |
412 | vector<future<bool>> futures(num_threads); | 417 | vector<future<bool>> futures(num_threads); |
413 | bool skip_outputs = false; | 418 | bool skip_outputs = false; |
@@ -482,13 +487,14 @@ bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs, | |||
482 | int batch_size) | 487 | int batch_size) |
483 | { | 488 | { |
484 | Configuration c; | 489 | Configuration c; |
485 | c.inNumChannels = 3;; | 490 | c.inNumChannels = MOBILENET_IN_C; |
486 | c.inWidth = 224; | 491 | c.inWidth = MOBILENET_IN_W; |
487 | c.inHeight = 224; | 492 | c.inHeight = MOBILENET_IN_H; |
488 | c.preProcType = 2; | 493 | c.preProcType = 2; |
489 | SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false}, {1,3,224,224}}; | 494 | SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false}, |
495 | {1,MOBILENET_IN_C,MOBILENET_IN_H,MOBILENET_IN_W}}; | ||
490 | 496 | ||
491 | char* frame_buffer = new char[3*224*224]; | 497 | char* frame_buffer = new char[MOBILENET_INPUT_SIZE]; |
492 | assert (frame_buffer != nullptr); | 498 | assert (frame_buffer != nullptr); |
493 | 499 | ||
494 | Mat image; | 500 | Mat image; |
diff --git a/tidl_api/inc/subgraph_runtime.h b/tidl_api/inc/subgraph_runtime.h index 65db5b5..c75d6b2 100644 --- a/tidl_api/inc/subgraph_runtime.h +++ b/tidl_api/inc/subgraph_runtime.h | |||
@@ -48,6 +48,13 @@ extern void TidlInitSubgraph(int total_subgraphs, | |||
48 | int subgraph_id | 48 | int subgraph_id |
49 | ); | 49 | ); |
50 | 50 | ||
51 | //! @brief Top level API to free a TIDL subgraph on device | ||
52 | //! @param total_subgraphs total number of TIDL subgraphs in whole inference | ||
53 | //! @param subgraph_id index of current TIDL subgraph | ||
54 | extern void TidlFreeSubgraph(int total_subgraphs, | ||
55 | int subgraph_id | ||
56 | ); | ||
57 | |||
51 | //! @brief Top level inference to run a TIDL subgraph | 58 | //! @brief Top level inference to run a TIDL subgraph |
52 | //! @param total_subgraphs total number of TIDL subgraphs in whole inference | 59 | //! @param total_subgraphs total number of TIDL subgraphs in whole inference |
53 | //! @param subgraph_id index of current TIDL subgraph | 60 | //! @param subgraph_id index of current TIDL subgraph |
diff --git a/tidl_api/src/subgraph_runtime.cpp b/tidl_api/src/subgraph_runtime.cpp index 24b378e..9d068b2 100644 --- a/tidl_api/src/subgraph_runtime.cpp +++ b/tidl_api/src/subgraph_runtime.cpp | |||
@@ -85,6 +85,11 @@ void TidlInitSubgraph(int total_subgraphs, int subgraph_id) | |||
85 | res.InitSubgraph(subgraph_id); | 85 | res.InitSubgraph(subgraph_id); |
86 | } | 86 | } |
87 | 87 | ||
88 | void TidlFreeSubgraph(int total_subgraphs, int subgraph_id) | ||
89 | { | ||
90 | ResM& res = ResM::Instance(total_subgraphs); | ||
91 | res.FreeSubgraph(subgraph_id); | ||
92 | } | ||
88 | 93 | ||
89 | void TidlRunSubgraph(int total_subgraphs, | 94 | void TidlRunSubgraph(int total_subgraphs, |
90 | int subgraph_id, | 95 | int subgraph_id, |
@@ -152,32 +157,44 @@ ResM::ResM() : enable_trace_m(false), num_subgraphs_m(0), | |||
152 | 157 | ||
153 | ResM::~ResM() | 158 | ResM::~ResM() |
154 | { | 159 | { |
160 | for (uint32_t i = 0; i < num_subgraphs_m; i++) | ||
161 | FreeSubgraph(i); | ||
162 | |||
163 | delete eops_m; | ||
164 | eops_m = nullptr; | ||
165 | } | ||
166 | |||
167 | void ResM::FreeSubgraph(uint32_t subgraph_id) | ||
168 | { | ||
169 | assert(subgraph_id < num_subgraphs_m); | ||
170 | |||
155 | if (eops_m != nullptr) | 171 | if (eops_m != nullptr) |
156 | { | 172 | { |
157 | for (const ResEOP& res_eop : *eops_m) | 173 | ResEOP& res_eop = (*eops_m)[subgraph_id]; |
174 | if (res_eop.eops != nullptr) | ||
158 | { | 175 | { |
159 | if (res_eop.eops != nullptr) | 176 | for (const ExecutionObjectPipeline* eop : *(res_eop.eops)) |
160 | { | 177 | { |
161 | for (const ExecutionObjectPipeline* eop : *(res_eop.eops)) | 178 | free(eop->GetInputBufferPtr()); |
162 | { | 179 | free(eop->GetOutputBufferPtr()); |
163 | free(eop->GetInputBufferPtr()); | 180 | delete eop; |
164 | free(eop->GetOutputBufferPtr()); | ||
165 | delete eop; | ||
166 | } | ||
167 | } | 181 | } |
182 | delete res_eop.eops; | ||
183 | res_eop.eops = nullptr; | ||
168 | } | 184 | } |
169 | delete eops_m; | ||
170 | eops_m = nullptr; | ||
171 | } | 185 | } |
172 | 186 | ||
173 | for (const Executor* e : es_m) | 187 | delete es_m[subgraph_id]; |
174 | if (e != nullptr) delete e; | 188 | es_m[subgraph_id] = nullptr; |
175 | for (const Executor* e : e2s_m) | 189 | |
176 | if (e != nullptr) delete e; | 190 | delete e2s_m[subgraph_id]; |
177 | for (SubgraphDataConv *dc : in_conv_m) | 191 | e2s_m[subgraph_id] = nullptr; |
178 | if (dc != nullptr) delete dc; | 192 | |
179 | for (SubgraphDataConv *dc : out_conv_m) | 193 | delete in_conv_m[subgraph_id]; |
180 | if (dc != nullptr) delete dc; | 194 | in_conv_m[subgraph_id] = nullptr; |
195 | |||
196 | delete out_conv_m[subgraph_id]; | ||
197 | out_conv_m[subgraph_id] = nullptr; | ||
181 | } | 198 | } |
182 | 199 | ||
183 | ResM& ResM::Instance(uint32_t total_num_subgraphs) | 200 | ResM& ResM::Instance(uint32_t total_num_subgraphs) |
@@ -290,42 +307,55 @@ void ResM::InitSubgraph(uint32_t subgraph_id) | |||
290 | // uint32_t num_dsps_used = 0; | 307 | // uint32_t num_dsps_used = 0; |
291 | if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet) | 308 | if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet) |
292 | { | 309 | { |
293 | int32_t start_layer = net->numLayers -1; | 310 | if (cs_m[subgraph_id].layerIndex2LayerGroupId.empty()) |
294 | int32_t end_layer = 0; | ||
295 | if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer) | ||
296 | start_layer -= 1; | ||
297 | if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer) | ||
298 | end_layer += 1; | ||
299 | int32_t i = start_layer; | ||
300 | for ( ; i > end_layer; i--) | ||
301 | { | 311 | { |
302 | int32_t layer_type = net->TIDLLayers[i].layerType; | 312 | int32_t start_layer = net->numLayers -1; |
303 | if (layer_type != (int32_t) TIDL_SoftMaxLayer && | 313 | int32_t end_layer = 0; |
304 | layer_type != (int32_t) TIDL_InnerProductLayer && | 314 | if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer) |
305 | layer_type != (int32_t) TIDL_PoolingLayer) | 315 | start_layer -= 1; |
306 | break; | 316 | if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer) |
307 | } | 317 | end_layer += 1; |
308 | i += 1; | 318 | int32_t i = start_layer; |
309 | if (i <= start_layer) | 319 | for ( ; i > end_layer; i--) |
310 | { | 320 | { |
311 | if (num_lg2_dsps_used_m < num_dsps_m) | 321 | int32_t layer_type = net->TIDLLayers[i].layerType; |
322 | if (layer_type != (int32_t) TIDL_SoftMaxLayer && | ||
323 | layer_type != (int32_t) TIDL_InnerProductLayer && | ||
324 | layer_type != (int32_t) TIDL_PoolingLayer) | ||
325 | break; | ||
326 | } | ||
327 | i += 1; | ||
328 | if (i <= start_layer) | ||
312 | { | 329 | { |
313 | if (enable_trace_m) | 330 | if (num_lg2_dsps_used_m < num_dsps_m) |
314 | printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n", | ||
315 | subgraph_id, i, start_layer); | ||
316 | while (i <= start_layer) | ||
317 | cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2; | ||
318 | e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m)); | ||
319 | num_lg2_dsps_used_m += 1; | ||
320 | if (num_subgraphs_m == 1) // Allocate all dsps if only one subgraph | ||
321 | { | 331 | { |
322 | while (num_lg2_dsps_used_m < num_dsps_m) | 332 | if (enable_trace_m) |
323 | e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++)); | 333 | printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n", |
334 | subgraph_id, i, start_layer); | ||
335 | while (i <= start_layer) | ||
336 | cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2; | ||
324 | } | 337 | } |
325 | } | 338 | } |
326 | } | 339 | } |
327 | delete net; | 340 | else |
341 | { | ||
342 | if (enable_trace_m) | ||
343 | printf("Subgraph %d: using layer2group map in config file for DSP\n", | ||
344 | subgraph_id); | ||
345 | } | ||
346 | |||
347 | if (! cs_m[subgraph_id].layerIndex2LayerGroupId.empty()) | ||
348 | { | ||
349 | e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m)); | ||
350 | num_lg2_dsps_used_m += 1; | ||
351 | if (num_subgraphs_m == 1) // Allocate all dsps if only one subgraph | ||
352 | { | ||
353 | while (num_lg2_dsps_used_m < num_dsps_m) | ||
354 | e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++)); | ||
355 | } | ||
356 | } | ||
328 | } | 357 | } |
358 | delete net; | ||
329 | 359 | ||
330 | if (e2_ids.empty()) | 360 | if (e2_ids.empty()) |
331 | cs_m[subgraph_id].runFullNet = true; | 361 | cs_m[subgraph_id].runFullNet = true; |
diff --git a/tidl_api/src/subgraph_runtime_impl.h b/tidl_api/src/subgraph_runtime_impl.h index 9738dbb..54dc12d 100644 --- a/tidl_api/src/subgraph_runtime_impl.h +++ b/tidl_api/src/subgraph_runtime_impl.h | |||
@@ -53,6 +53,7 @@ class ResM { | |||
53 | 53 | ||
54 | // how to get resources for subgraph_id | 54 | // how to get resources for subgraph_id |
55 | void InitSubgraph(uint32_t subgraph_id); | 55 | void InitSubgraph(uint32_t subgraph_id); |
56 | void FreeSubgraph(uint32_t subgraph_id); | ||
56 | uint32_t GetNumEOPs(uint32_t subgraph_id); | 57 | uint32_t GetNumEOPs(uint32_t subgraph_id); |
57 | ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id); | 58 | ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id); |
58 | void FreeEOP(uint32_t subgraph_id, | 59 | void FreeEOP(uint32_t subgraph_id, |