Subgraph: use Layer2Group map in config file
authorYuan Zhao <yuanzhao@ti.com>
Mon, 25 Nov 2019 01:58:14 +0000 (19:58 -0600)
committerYuan Zhao <yuanzhao@ti.com>
Mon, 25 Nov 2019 04:20:25 +0000 (22:20 -0600)
- If Layer2Group map exists in subgraph config file, use it.
  Otherwise, try derive the map from network layer types.
- Added TidlFreeSubgraph() for subgraph resource de-allocation
- Code changes based on review comments.
- MCT-1223

examples/mobilenet_subgraph/main.cpp
tidl_api/inc/subgraph_runtime.h
tidl_api/src/subgraph_runtime.cpp
tidl_api/src/subgraph_runtime_impl.h

index 8a77f6576eed068ecfc3b20d506c476b26ac3aa7..2233d9669c254244e78963bd73cad8dc845f9e38 100644 (file)
@@ -67,6 +67,11 @@ using namespace cv;
 #define NUM_DEFAULT_INPUTS  1
 #define DEFAULT_OBJECT_CLASSES_LIST_FILE "imagenet_objects.json"
 #define DEFAULT_OUTPUT_PROB_THRESHOLD  5
+#define MOBILENET_IN_C         (3)
+#define MOBILENET_IN_H         (224)
+#define MOBILENET_IN_W         (224)
+#define MOBILENET_INPUT_SIZE   (1*MOBILENET_IN_C*MOBILENET_IN_H*MOBILENET_IN_W)
+#define MOBILENET_OUTPUT_SIZE  (1001)
 const char *default_inputs[NUM_DEFAULT_INPUTS] =
 {
     "../test/testvecs/input/objects/cat-pet-animal-domestic-104827.jpeg"
@@ -169,9 +174,9 @@ bool RunConfiguration(cmdline_opts_t& opts)
     {
         TidlInitSubgraph(1, 0);
         float **inputs = new float *[1];
-        inputs[0] = new float[1*3*224*224];
+        inputs[0] = new float[MOBILENET_INPUT_SIZE];
         float **outputs = new float *[1];
-        outputs[0] = new float[1001];
+        outputs[0] = new float[MOBILENET_OUTPUT_SIZE];
 
         for (int i = 0; i < 5; i ++)
         {
@@ -216,8 +221,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
             float **outputs = new float *[batch_size];
             for (int i = 0; i < batch_size; i++)
             {
-                inputs[i]  = new float[1*3*224*224];
-                outputs[i] = new float[1001];
+                inputs[i]  = new float[MOBILENET_INPUT_SIZE];
+                outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
             }
 
             chrono::time_point<chrono::steady_clock> tloop0, tloop1;
@@ -273,8 +278,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
         float **outputs = new float *[num_threads];
         for (int i = 0; i < num_threads; i++)
         {
-            inputs[i]  = new float[1*3*224*224];
-            outputs[i] = new float[1001];
+            inputs[i]  = new float[MOBILENET_INPUT_SIZE];
+            outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
         }
         vector<future<bool>> futures(num_threads);
         bool skip_outputs = false;
@@ -341,8 +346,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
         vector<UserData> v_data(num_threads);
         for (int i = 0; i < num_threads; i++)
         {
-            inputs[i]  = new float[1*3*224*224];
-            outputs[i] = new float[1001];
+            inputs[i]  = new float[MOBILENET_INPUT_SIZE];
+            outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
             v_data[i].inputs  = &inputs[i];
             v_data[i].outputs = &outputs[i];
         }
@@ -406,8 +411,8 @@ bool RunConfiguration(cmdline_opts_t& opts)
         float **outputs = new float *[num_threads * batch_size];
         for (int i = 0; i < num_threads * batch_size; i++)
         {
-            inputs[i]  = new float[1*3*224*224];
-            outputs[i] = new float[1001];
+            inputs[i]  = new float[MOBILENET_INPUT_SIZE];
+            outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
         }
         vector<future<bool>> futures(num_threads);
         bool skip_outputs = false;
@@ -482,13 +487,14 @@ bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs,
                int batch_size)
 {
     Configuration c;
-    c.inNumChannels = 3;;
-    c.inWidth = 224;
-    c.inHeight = 224;
+    c.inNumChannels = MOBILENET_IN_C;
+    c.inWidth = MOBILENET_IN_W;
+    c.inHeight = MOBILENET_IN_H;
     c.preProcType = 2;
-    SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false}, {1,3,224,224}};
+    SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false},
+                             {1,MOBILENET_IN_C,MOBILENET_IN_H,MOBILENET_IN_W}};
 
-    char* frame_buffer = new char[3*224*224];
+    char* frame_buffer = new char[MOBILENET_INPUT_SIZE];
     assert (frame_buffer != nullptr);
 
     Mat image;
index 65db5b5dc2e1c14dc77a039080b172d0bb66a1d8..c75d6b2b9461e74bafac13c14af7f0886fe29570 100644 (file)
@@ -48,6 +48,13 @@ extern void TidlInitSubgraph(int total_subgraphs,
                              int subgraph_id
                             );
 
+//! @brief Top level API to free a TIDL subgraph on device
+//! @param total_subgraphs  total number of TIDL subgraphs in whole inference
+//! @param subgraph_id  index of current TIDL subgraph
+extern void TidlFreeSubgraph(int total_subgraphs,
+                             int subgraph_id
+                            );
+
 //! @brief Top level inference to run a TIDL subgraph
 //! @param total_subgraphs  total number of TIDL subgraphs in whole inference
 //! @param subgraph_id  index of current TIDL subgraph
index 24b378e241b4d6b7ac5e881aa35969bbaf6c22d5..9d068b2db7c9ab1cc6a0b41aba746cf89cd0a6de 100644 (file)
@@ -85,6 +85,11 @@ void TidlInitSubgraph(int total_subgraphs, int subgraph_id)
   res.InitSubgraph(subgraph_id);
 }
 
+void TidlFreeSubgraph(int total_subgraphs, int subgraph_id)
+{
+  ResM& res = ResM::Instance(total_subgraphs);
+  res.FreeSubgraph(subgraph_id);
+}
 
 void TidlRunSubgraph(int total_subgraphs,
                      int subgraph_id,
@@ -152,32 +157,44 @@ ResM::ResM() : enable_trace_m(false), num_subgraphs_m(0),
 
 ResM::~ResM()
 {
+  for (uint32_t i = 0; i < num_subgraphs_m; i++)
+    FreeSubgraph(i);
+
+  delete eops_m;
+  eops_m = nullptr;
+}
+
+void ResM::FreeSubgraph(uint32_t subgraph_id)
+{
+  assert(subgraph_id < num_subgraphs_m);
+
   if (eops_m != nullptr)
   {
-    for (const ResEOP& res_eop : *eops_m)
+    ResEOP& res_eop = (*eops_m)[subgraph_id];
+    if (res_eop.eops != nullptr)
     {
-      if (res_eop.eops != nullptr)
+      for (const ExecutionObjectPipeline* eop : *(res_eop.eops))
       {
-        for (const ExecutionObjectPipeline* eop : *(res_eop.eops))
-        {
-          free(eop->GetInputBufferPtr());
-          free(eop->GetOutputBufferPtr());
-          delete eop;
-        }
+        free(eop->GetInputBufferPtr());
+        free(eop->GetOutputBufferPtr());
+        delete eop;
       }
+      delete res_eop.eops;
+      res_eop.eops = nullptr;
     }
-    delete eops_m;
-    eops_m = nullptr;
   }
 
-  for (const Executor* e : es_m)
-    if (e != nullptr) delete e;
-  for (const Executor* e : e2s_m)
-    if (e != nullptr) delete e;
-  for (SubgraphDataConv *dc : in_conv_m)
-    if (dc != nullptr) delete dc;
-  for (SubgraphDataConv *dc : out_conv_m)
-    if (dc != nullptr) delete dc;
+  delete es_m[subgraph_id];
+  es_m[subgraph_id] = nullptr;
+
+  delete e2s_m[subgraph_id];
+  e2s_m[subgraph_id] = nullptr;
+
+  delete in_conv_m[subgraph_id];
+  in_conv_m[subgraph_id] = nullptr;
+
+  delete out_conv_m[subgraph_id];
+  out_conv_m[subgraph_id] = nullptr;
 }
 
 ResM& ResM::Instance(uint32_t total_num_subgraphs)
@@ -290,42 +307,55 @@ void ResM::InitSubgraph(uint32_t subgraph_id)
     // uint32_t num_dsps_used = 0;
     if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet)
     {
-      int32_t start_layer = net->numLayers -1;
-      int32_t end_layer = 0;
-      if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
-        start_layer -= 1;
-      if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
-        end_layer += 1;
-      int32_t i = start_layer;
-      for ( ; i > end_layer; i--)
+      if (cs_m[subgraph_id].layerIndex2LayerGroupId.empty())
       {
-        int32_t layer_type = net->TIDLLayers[i].layerType;
-        if (layer_type != (int32_t) TIDL_SoftMaxLayer &&
-            layer_type != (int32_t) TIDL_InnerProductLayer &&
-            layer_type != (int32_t) TIDL_PoolingLayer)
-          break;
-      }
-      i += 1;
-      if (i <= start_layer)
-      {
-        if (num_lg2_dsps_used_m < num_dsps_m)
+        int32_t start_layer = net->numLayers -1;
+        int32_t end_layer = 0;
+        if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
+          start_layer -= 1;
+        if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
+          end_layer += 1;
+        int32_t i = start_layer;
+        for ( ; i > end_layer; i--)
+        {
+          int32_t layer_type = net->TIDLLayers[i].layerType;
+          if (layer_type != (int32_t) TIDL_SoftMaxLayer &&
+              layer_type != (int32_t) TIDL_InnerProductLayer &&
+              layer_type != (int32_t) TIDL_PoolingLayer)
+            break;
+        }
+        i += 1;
+        if (i <= start_layer)
         {
-          if (enable_trace_m)
-            printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
-                   subgraph_id, i, start_layer);
-          while (i <= start_layer)
-            cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
-          e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
-          num_lg2_dsps_used_m += 1;
-          if (num_subgraphs_m == 1)  // Allocate all dsps if only one subgraph
+          if (num_lg2_dsps_used_m < num_dsps_m)
           {
-            while (num_lg2_dsps_used_m < num_dsps_m)
-              e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++));
+            if (enable_trace_m)
+              printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
+                     subgraph_id, i, start_layer);
+            while (i <= start_layer)
+              cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
           }
         }
       }
-      delete net;
+      else
+      {
+        if (enable_trace_m)
+          printf("Subgraph %d: using layer2group map in config file for DSP\n",
+                 subgraph_id);
+      }
+
+      if (! cs_m[subgraph_id].layerIndex2LayerGroupId.empty())
+      {
+        e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
+        num_lg2_dsps_used_m += 1;
+        if (num_subgraphs_m == 1)  // Allocate all dsps if only one subgraph
+        {
+          while (num_lg2_dsps_used_m < num_dsps_m)
+            e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++));
+        }
+      }
     }
+    delete net;
 
     if (e2_ids.empty())
       cs_m[subgraph_id].runFullNet = true;
index 9738dbbac7cbf65671d263880043050582c4926b..54dc12dd2837cfbdbf20ae80d4a70aa23574cd7b 100644 (file)
@@ -53,6 +53,7 @@ class ResM {
 
     // how to get resources for subgraph_id
     void                     InitSubgraph(uint32_t subgraph_id);
+    void                     FreeSubgraph(uint32_t subgraph_id);
     uint32_t                 GetNumEOPs(uint32_t subgraph_id);
     ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id);
     void                     FreeEOP(uint32_t subgraph_id,