Subgraph: add top level API TidlRunSubgraph
authorYuan Zhao <yuanzhao@ti.com>
Thu, 31 Oct 2019 02:56:39 +0000 (21:56 -0500)
committerYuan Zhao <yuanzhao@ti.com>
Thu, 31 Oct 2019 02:56:39 +0000 (21:56 -0500)
- TidlRunSubgraph() should be the interface function that TVM/TFLite
  calls to offload subgraph to TIDL
- MCT-1222

tidl_api/Makefile
tidl_api/inc/subgraph_runtime.h
tidl_api/src/subgraph_runtime.cpp

index 4dc298b74f345a7cfddd458fb961dc530ceb1335..ca9187859866599c9081b9da64f4effba84d856d 100644 (file)
 PYTHON_INCLUDE_DIR ?= $(wildcard $(TARGET_ROOTDIR)/usr/include/python3*)
 
 LIB_NAME = tidl_api.a
+SHARED_LIB_NAME = libtidl_api.so
 LIB_IMGUTIL_NAME = tidl_imgutil.a
+SHARED_LIB_IMGUTIL_NAME = libtidl_imgutil.so
 PY_LIB_NAME = tidl.so
 
-all: $(LIB_NAME) $(LIB_IMGUTIL_NAME) $(PY_LIB_NAME)
+all: $(LIB_NAME) $(LIB_IMGUTIL_NAME) $(PY_LIB_NAME) \
+     $(SHARED_LIB_NAME) $(SHARED_LIB_IMGUTIL_NAME)
 
 include make.inc
 include make.buildid
@@ -91,12 +94,18 @@ obj/%.o: src/%.cpp $(HEADERS)
 $(LIB_NAME): $(HOST_OBJ_FILES)
        $(AR) cr $@ $(HOST_OBJ_FILES)
 
+$(SHARED_LIB_NAME): $(HOST_OBJ_FILES)
+       $(CXX) -shared $(HOST_OBJ_FILES) -o $@
+
 $(PY_LIB_NAME): $(HOST_OBJ_PYBIND_FILES) $(LIB_NAME)
        $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL -locl_util $^ -o $@
 
 $(LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
        $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES)
 
+$(SHARED_LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
+       $(CXX) -shared $(HOST_OBJ_IMGUTIL_FILES) -o $@
+
 clean::
        $(RM) -f $(LIB_NAME) $(PY_LIB_NAME)
        $(RM) -f $(LIB_IMGUTIL_NAME)
index a38973eee8a7a2aa4785c2d9a7be8963ec8e1e88..c153485099acbc59efdc3de9ac332db0c21f8234 100644 (file)
 #include "execution_object_pipeline.h"
 #include "subgraph_data_conv.h"
 
+extern "C" {
+
+void TidlRunSubgraph(int total_subgraphs,
+                     int subgraph_id,
+                     int num_inputs,
+                     int num_outputs,
+                     float **inputTensors,
+                     float **outputTensors
+                    );
+
+}  // extern "C"
 
 namespace tidl {
 
@@ -42,51 +53,30 @@ namespace tidl {
 // Auto-generated code from Relay/TVM compilation step after
 // partitioning and lowering to backend implementation
 
-// TODO: need to figure out exact arguments and format
-extern void tidl::RunSubgraphImpl(int subgraph_id,
-                                  const std::vector<float*>&,
-                                  const std::vector<float*>&);
-
-void tidlRunSubgraph(int subgraph_id,
+void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
                      int num_input_tensors, int num_output_tensors,
                      PackedArgs args)
 {
-  std::vector<float *> in_data, out_data;
+  float** in_data  = new float*[num_input_tensors];
+  float** out_data = new float*[num_output_tensors];
 
   for (int i = 0; i < num_input_tensors + num_output_tensors; i++)
     if (i < num_input_tensors)
-      in_data.push_back(args.data[i]);
+      in_data[i] = args.data[i];
     else
-      out_data.push_back(args.data[i]);
+      out_data[i - num_input_tensors] = args.data[i];
 
-  tidl::RunSubgraphImpl(subgraph_id, in_data, out_data);
-}
-#endif
+  // call into this function in libtidl.so
+  // dlopen("libtidl.so")
+  // TidlFunc = dlsym("TidlRunSubgraph");
+  (*TidlFunc)(total_subgraphs, subgraph_id,
+              num_input_tensors, num_output_tensors,
+              in_data, out_data);
 
-
-#if 0
-// user application code
-// subgraph_id will be used to find TIDL config file
-// e.g. subgraph_1.cfg, subgraph_2.cfg, etc
-void RunSubgraphImpl(int subgraph_id,
-                     int total_num_subgraphs,
-                     const std::vector<float*>& ext_in_data,
-                     const std::vector<float*>& ext_out_data)
-{
-  ResM& res = ResM::Instance(total_num_subgraphs);
-  const ExecutionObjectPipeline& eop = res.GetEOP(subgraph_id);
-  const SubgraphDataConv& in_conv    = res.GetInConv(subgraph_id);
-  const SubgraphDataConv& out_conv   = res.GetOutConv(subgraph_id);
-
-  in_data = eop.GetInputBufferPtr();
-  in_conv.ScaleQuant(ext_in_data, in_data);
-  eop.ProcessFrameStartAsync();
-  eop.ProcessFrameWait();
-  out_data = eop.GetOutputBufferPtr();
-  out_conv.ScaleDeQuant(out_data, ext_out_data);
-  res.FreeEOP(subgraph_id, eop);
+  delete [] in_data;
+  delete [] out_data;
 }
-#endif 
+#endif
 
 
 // Singleton ResM   .h file
index 9f68c62157da3ab45eefb912648ea0ee54371ada..ad5a11abdccfbbf3c6cbfab98f3580857366f263 100644 (file)
 // Auto-generated code from Relay/TVM compilation step after
 // partitioning and lowering to backend implementation
 
-// TODO: need to figure out exact arguments and format
-extern void tidl::RunSubgraphImpl(int subgraph_id,
-                                  const std::vector<float*>&,
-                                  const std::vector<float*>&);
-
-void tidlRunSubgraph(int subgraph_id,
+void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
                      int num_input_tensors, int num_output_tensors,
                      PackedArgs args)
 {
-  std::vector<float *> in_data, out_data;
+  float** in_data  = new float*[num_input_tensors];
+  float** out_data = new float*[num_output_tensors];
 
   for (int i = 0; i < num_input_tensors + num_output_tensors; i++)
     if (i < num_input_tensors)
-      in_data.push_back(args.data[i]);
+      in_data[i] = args.data[i];
     else
-      out_data.push_back(args.data[i]);
+      out_data[i - num_input_tensors] = args.data[i];
+
+  // call into this function in libtidl.so
+  // dlopen("libtidl.so")
+  // TidlFunc = dlsym("TidlRunSubgraph");
+  (*TidlFunc)(total_subgraphs, subgraph_id,
+              num_input_tensors, num_output_tensors,
+              in_data, out_data);
 
-  tidl::RunSubgraphImpl(subgraph_id, in_data, out_data);
+  delete [] in_data;
+  delete [] out_data;
 }
 #endif
 
 
-#if 0
-// user application code
-// subgraph_id will be used to find TIDL config file
-// e.g. subgraph_1.cfg, subgraph_2.cfg, etc
-void RunSubgraphImpl(int subgraph_id,
-                     int total_num_subgraphs,
-                     const std::vector<float*>& ext_in_data,
-                     const std::vector<float*>& ext_out_data)
+// Singleton ResM .cpp
+using namespace tidl;
+
+
+void TidlRunSubgraph(int total_subgraphs,
+                     int subgraph_id,
+                     int num_inputs,
+                     int num_outputs,
+                     float **inputTensors,
+                     float **outputTensors
+                    )
 {
-  ResM& res = ResM::Instance(total_num_subgraphs);
-  const ExecutionObjectPipeline& eop = res.GetEOP(subgraph_id);
-  const SubgraphDataConv& in_conv    = res.GetInConv(subgraph_id);
-  const SubgraphDataConv& out_conv   = res.GetOutConv(subgraph_id);
-
-  in_data = eop.GetInputBufferPtr();
-  in_conv.ScaleQuant(ext_in_data, in_data);
-  eop.ProcessFrameStartAsync();
-  eop.ProcessFrameWait();
-  out_data = eop.GetOutputBufferPtr();
-  out_conv.ScaleDeQuant(out_data, ext_out_data);
+  ResM& res = ResM::Instance(total_subgraphs);
+  ExecutionObjectPipeline* eop     = res.GetEOP(subgraph_id);
+  const SubgraphDataConv& in_conv  = res.GetInConv(subgraph_id);
+  const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
+
+  std::vector<float *> in_data_v, out_data_v;
+  for (int i = 0; i < num_inputs; i++)
+    in_data_v.emplace_back(inputTensors[i]);
+  for (int i = 0; i < num_outputs; i++)
+    out_data_v.emplace_back(outputTensors[i]);
+  char* in_data = eop->GetInputBufferPtr();
+  in_conv.ScaleQuant(in_data_v, (uint8_t *) in_data);
+
+  eop->ProcessFrameStartAsync();
+  eop->ProcessFrameWait();
+
+  char* out_data = eop->GetOutputBufferPtr();
+  out_conv.ScaleDequant((const uint8_t *) out_data, out_data_v);
   res.FreeEOP(subgraph_id, eop);
 }
-#endif
-
-
 
-// Singleton ResM .cpp
-using namespace tidl;
 
 typedef Loki::SingletonHolder <tidl::ResM, Loki::CreateUsingNew,
 Loki::DefaultLifetime, Loki::ClassLevelLockable> tidlSingleResM;
@@ -190,7 +199,7 @@ ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
     std::string cfg_file = "subgraph" + std::to_string(subgraph_id) + ".cfg";
     bool status = cs_m[subgraph_id].ReadFromFile(cfg_file);
     assert(status);
-    
+
     // Check if last few layers can be offloaded to DSPs
     //       and DSPs are available
     DeviceIds e_ids, e2_ids;
@@ -225,7 +234,7 @@ ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
         if (num_lg2_dsps_used_m < num_dsps_m)
         {
           if (enable_trace_m)
-            printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n", 
+            printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
                    subgraph_id, i, start_layer);
           while (i <= start_layer)
             cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;