aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuan Zhao2019-10-30 21:56:39 -0500
committerYuan Zhao2019-10-30 21:56:39 -0500
commitc1ed48fa12caedf24a5e83177c343852fdbf7ac9 (patch)
tree56ada47a6b4f333399c901de7f41b332e02dd6fd
parente0b7c38d199674f34ea1fb2c182744851785dd79 (diff)
downloadtidl-api-c1ed48fa12caedf24a5e83177c343852fdbf7ac9.tar.gz
tidl-api-c1ed48fa12caedf24a5e83177c343852fdbf7ac9.tar.xz
tidl-api-c1ed48fa12caedf24a5e83177c343852fdbf7ac9.zip
Subgraph: add top level API TidlRunSubgraph
- TidlRunSubgraph() should be the interface function that TVM/TFLite calls to offload subgraph to TIDL - MCT-1222
-rw-r--r--tidl_api/Makefile11
-rw-r--r--tidl_api/inc/subgraph_runtime.h60
-rw-r--r--tidl_api/src/subgraph_runtime.cpp81
3 files changed, 80 insertions, 72 deletions
diff --git a/tidl_api/Makefile b/tidl_api/Makefile
index 4dc298b..ca91878 100644
--- a/tidl_api/Makefile
+++ b/tidl_api/Makefile
@@ -26,10 +26,13 @@
26PYTHON_INCLUDE_DIR ?= $(wildcard $(TARGET_ROOTDIR)/usr/include/python3*) 26PYTHON_INCLUDE_DIR ?= $(wildcard $(TARGET_ROOTDIR)/usr/include/python3*)
27 27
28LIB_NAME = tidl_api.a 28LIB_NAME = tidl_api.a
29SHARED_LIB_NAME = libtidl_api.so
29LIB_IMGUTIL_NAME = tidl_imgutil.a 30LIB_IMGUTIL_NAME = tidl_imgutil.a
31SHARED_LIB_IMGUTIL_NAME = libtidl_imgutil.so
30PY_LIB_NAME = tidl.so 32PY_LIB_NAME = tidl.so
31 33
32all: $(LIB_NAME) $(LIB_IMGUTIL_NAME) $(PY_LIB_NAME) 34all: $(LIB_NAME) $(LIB_IMGUTIL_NAME) $(PY_LIB_NAME) \
35 $(SHARED_LIB_NAME) $(SHARED_LIB_IMGUTIL_NAME)
33 36
34include make.inc 37include make.inc
35include make.buildid 38include make.buildid
@@ -91,12 +94,18 @@ obj/%.o: src/%.cpp $(HEADERS)
91$(LIB_NAME): $(HOST_OBJ_FILES) 94$(LIB_NAME): $(HOST_OBJ_FILES)
92 $(AR) cr $@ $(HOST_OBJ_FILES) 95 $(AR) cr $@ $(HOST_OBJ_FILES)
93 96
97$(SHARED_LIB_NAME): $(HOST_OBJ_FILES)
98 $(CXX) -shared $(HOST_OBJ_FILES) -o $@
99
94$(PY_LIB_NAME): $(HOST_OBJ_PYBIND_FILES) $(LIB_NAME) 100$(PY_LIB_NAME): $(HOST_OBJ_PYBIND_FILES) $(LIB_NAME)
95 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL -locl_util $^ -o $@ 101 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL -locl_util $^ -o $@
96 102
97$(LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES) 103$(LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
98 $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES) 104 $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES)
99 105
106$(SHARED_LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
107 $(CXX) -shared $(HOST_OBJ_IMGUTIL_FILES) -o $@
108
100clean:: 109clean::
101 $(RM) -f $(LIB_NAME) $(PY_LIB_NAME) 110 $(RM) -f $(LIB_NAME) $(PY_LIB_NAME)
102 $(RM) -f $(LIB_IMGUTIL_NAME) 111 $(RM) -f $(LIB_IMGUTIL_NAME)
diff --git a/tidl_api/inc/subgraph_runtime.h b/tidl_api/inc/subgraph_runtime.h
index a38973e..c153485 100644
--- a/tidl_api/inc/subgraph_runtime.h
+++ b/tidl_api/inc/subgraph_runtime.h
@@ -35,6 +35,17 @@
35#include "execution_object_pipeline.h" 35#include "execution_object_pipeline.h"
36#include "subgraph_data_conv.h" 36#include "subgraph_data_conv.h"
37 37
38extern "C" {
39
40void TidlRunSubgraph(int total_subgraphs,
41 int subgraph_id,
42 int num_inputs,
43 int num_outputs,
44 float **inputTensors,
45 float **outputTensors
46 );
47
48} // extern "C"
38 49
39namespace tidl { 50namespace tidl {
40 51
@@ -42,51 +53,30 @@ namespace tidl {
42// Auto-generated code from Relay/TVM compilation step after 53// Auto-generated code from Relay/TVM compilation step after
43// partitioning and lowering to backend implementation 54// partitioning and lowering to backend implementation
44 55
45// TODO: need to figure out exact arguments and format 56void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
46extern void tidl::RunSubgraphImpl(int subgraph_id,
47 const std::vector<float*>&,
48 const std::vector<float*>&);
49
50void tidlRunSubgraph(int subgraph_id,
51 int num_input_tensors, int num_output_tensors, 57 int num_input_tensors, int num_output_tensors,
52 PackedArgs args) 58 PackedArgs args)
53{ 59{
54 std::vector<float *> in_data, out_data; 60 float** in_data = new float*[num_input_tensors];
61 float** out_data = new float*[num_output_tensors];
55 62
56 for (int i = 0; i < num_input_tensors + num_output_tensors; i++) 63 for (int i = 0; i < num_input_tensors + num_output_tensors; i++)
57 if (i < num_input_tensors) 64 if (i < num_input_tensors)
58 in_data.push_back(args.data[i]); 65 in_data[i] = args.data[i];
59 else 66 else
60 out_data.push_back(args.data[i]); 67 out_data[i - num_input_tensors] = args.data[i];
61 68
62 tidl::RunSubgraphImpl(subgraph_id, in_data, out_data); 69 // call into this function in libtidl.so
63} 70 // dlopen("libtidl.so")
64#endif 71 // TidlFunc = dlsym("TidlRunSubgraph");
72 (*TidlFunc)(total_subgraphs, subgraph_id,
73 num_input_tensors, num_output_tensors,
74 in_data, out_data);
65 75
66 76 delete [] in_data;
67#if 0 77 delete [] out_data;
68// user application code
69// subgraph_id will be used to find TIDL config file
70// e.g. subgraph_1.cfg, subgraph_2.cfg, etc
71void RunSubgraphImpl(int subgraph_id,
72 int total_num_subgraphs,
73 const std::vector<float*>& ext_in_data,
74 const std::vector<float*>& ext_out_data)
75{
76 ResM& res = ResM::Instance(total_num_subgraphs);
77 const ExecutionObjectPipeline& eop = res.GetEOP(subgraph_id);
78 const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id);
79 const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
80
81 in_data = eop.GetInputBufferPtr();
82 in_conv.ScaleQuant(ext_in_data, in_data);
83 eop.ProcessFrameStartAsync();
84 eop.ProcessFrameWait();
85 out_data = eop.GetOutputBufferPtr();
86 out_conv.ScaleDeQuant(out_data, ext_out_data);
87 res.FreeEOP(subgraph_id, eop);
88} 78}
89#endif 79#endif
90 80
91 81
92// Singleton ResM .h file 82// Singleton ResM .h file
diff --git a/tidl_api/src/subgraph_runtime.cpp b/tidl_api/src/subgraph_runtime.cpp
index 9f68c62..ad5a11a 100644
--- a/tidl_api/src/subgraph_runtime.cpp
+++ b/tidl_api/src/subgraph_runtime.cpp
@@ -38,56 +38,65 @@
38// Auto-generated code from Relay/TVM compilation step after 38// Auto-generated code from Relay/TVM compilation step after
39// partitioning and lowering to backend implementation 39// partitioning and lowering to backend implementation
40 40
41// TODO: need to figure out exact arguments and format 41void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
42extern void tidl::RunSubgraphImpl(int subgraph_id,
43 const std::vector<float*>&,
44 const std::vector<float*>&);
45
46void tidlRunSubgraph(int subgraph_id,
47 int num_input_tensors, int num_output_tensors, 42 int num_input_tensors, int num_output_tensors,
48 PackedArgs args) 43 PackedArgs args)
49{ 44{
50 std::vector<float *> in_data, out_data; 45 float** in_data = new float*[num_input_tensors];
46 float** out_data = new float*[num_output_tensors];
51 47
52 for (int i = 0; i < num_input_tensors + num_output_tensors; i++) 48 for (int i = 0; i < num_input_tensors + num_output_tensors; i++)
53 if (i < num_input_tensors) 49 if (i < num_input_tensors)
54 in_data.push_back(args.data[i]); 50 in_data[i] = args.data[i];
55 else 51 else
56 out_data.push_back(args.data[i]); 52 out_data[i - num_input_tensors] = args.data[i];
53
54 // call into this function in libtidl.so
55 // dlopen("libtidl.so")
56 // TidlFunc = dlsym("TidlRunSubgraph");
57 (*TidlFunc)(total_subgraphs, subgraph_id,
58 num_input_tensors, num_output_tensors,
59 in_data, out_data);
57 60
58 tidl::RunSubgraphImpl(subgraph_id, in_data, out_data); 61 delete [] in_data;
62 delete [] out_data;
59} 63}
60#endif 64#endif
61 65
62 66
63#if 0 67// Singleton ResM .cpp
64// user application code 68using namespace tidl;
65// subgraph_id will be used to find TIDL config file 69
66// e.g. subgraph_1.cfg, subgraph_2.cfg, etc 70
67void RunSubgraphImpl(int subgraph_id, 71void TidlRunSubgraph(int total_subgraphs,
68 int total_num_subgraphs, 72 int subgraph_id,
69 const std::vector<float*>& ext_in_data, 73 int num_inputs,
70 const std::vector<float*>& ext_out_data) 74 int num_outputs,
75 float **inputTensors,
76 float **outputTensors
77 )
71{ 78{
72 ResM& res = ResM::Instance(total_num_subgraphs); 79 ResM& res = ResM::Instance(total_subgraphs);
73 const ExecutionObjectPipeline& eop = res.GetEOP(subgraph_id); 80 ExecutionObjectPipeline* eop = res.GetEOP(subgraph_id);
74 const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id); 81 const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id);
75 const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id); 82 const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
76 83
77 in_data = eop.GetInputBufferPtr(); 84 std::vector<float *> in_data_v, out_data_v;
78 in_conv.ScaleQuant(ext_in_data, in_data); 85 for (int i = 0; i < num_inputs; i++)
79 eop.ProcessFrameStartAsync(); 86 in_data_v.emplace_back(inputTensors[i]);
80 eop.ProcessFrameWait(); 87 for (int i = 0; i < num_outputs; i++)
81 out_data = eop.GetOutputBufferPtr(); 88 out_data_v.emplace_back(outputTensors[i]);
82 out_conv.ScaleDeQuant(out_data, ext_out_data); 89 char* in_data = eop->GetInputBufferPtr();
90 in_conv.ScaleQuant(in_data_v, (uint8_t *) in_data);
91
92 eop->ProcessFrameStartAsync();
93 eop->ProcessFrameWait();
94
95 char* out_data = eop->GetOutputBufferPtr();
96 out_conv.ScaleDequant((const uint8_t *) out_data, out_data_v);
83 res.FreeEOP(subgraph_id, eop); 97 res.FreeEOP(subgraph_id, eop);
84} 98}
85#endif
86
87
88 99
89// Singleton ResM .cpp
90using namespace tidl;
91 100
92typedef Loki::SingletonHolder <tidl::ResM, Loki::CreateUsingNew, 101typedef Loki::SingletonHolder <tidl::ResM, Loki::CreateUsingNew,
93Loki::DefaultLifetime, Loki::ClassLevelLockable> tidlSingleResM; 102Loki::DefaultLifetime, Loki::ClassLevelLockable> tidlSingleResM;
@@ -190,7 +199,7 @@ ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
190 std::string cfg_file = "subgraph" + std::to_string(subgraph_id) + ".cfg"; 199 std::string cfg_file = "subgraph" + std::to_string(subgraph_id) + ".cfg";
191 bool status = cs_m[subgraph_id].ReadFromFile(cfg_file); 200 bool status = cs_m[subgraph_id].ReadFromFile(cfg_file);
192 assert(status); 201 assert(status);
193 202
194 // Check if last few layers can be offloaded to DSPs 203 // Check if last few layers can be offloaded to DSPs
195 // and DSPs are available 204 // and DSPs are available
196 DeviceIds e_ids, e2_ids; 205 DeviceIds e_ids, e2_ids;
@@ -225,7 +234,7 @@ ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
225 if (num_lg2_dsps_used_m < num_dsps_m) 234 if (num_lg2_dsps_used_m < num_dsps_m)
226 { 235 {
227 if (enable_trace_m) 236 if (enable_trace_m)
228 printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n", 237 printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
229 subgraph_id, i, start_layer); 238 subgraph_id, i, start_layer);
230 while (i <= start_layer) 239 while (i <= start_layer)
231 cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2; 240 cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;