aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--examples/mobilenet_subgraph/Makefile2
-rw-r--r--examples/mobilenet_subgraph/main.cpp58
-rw-r--r--examples/mobilenet_subgraph/subgraph0.cfg11
-rw-r--r--tidl_api/inc/configuration.h27
-rw-r--r--tidl_api/inc/subgraph_data_conv.h19
-rw-r--r--tidl_api/inc/subgraph_runtime.h9
-rw-r--r--tidl_api/src/configuration_parser.cpp18
-rw-r--r--tidl_api/src/subgraph_data_conv.cpp190
-rw-r--r--tidl_api/src/subgraph_runtime.cpp74
9 files changed, 349 insertions, 59 deletions
diff --git a/examples/mobilenet_subgraph/Makefile b/examples/mobilenet_subgraph/Makefile
index ffeb69d..68f5d9d 100644
--- a/examples/mobilenet_subgraph/Makefile
+++ b/examples/mobilenet_subgraph/Makefile
@@ -24,7 +24,7 @@
24# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25# THE POSSIBILITY OF SUCH DAMAGE. 25# THE POSSIBILITY OF SUCH DAMAGE.
26 26
27EXE = imagenet 27EXE = mobilenet_subgraph
28 28
29include ../make.common 29include ../make.common
30 30
diff --git a/examples/mobilenet_subgraph/main.cpp b/examples/mobilenet_subgraph/main.cpp
index 5534df3..e4e499a 100644
--- a/examples/mobilenet_subgraph/main.cpp
+++ b/examples/mobilenet_subgraph/main.cpp
@@ -39,6 +39,7 @@
39#include <queue> 39#include <queue>
40#include <vector> 40#include <vector>
41#include <chrono> 41#include <chrono>
42#include <future>
42 43
43#include "executor.h" 44#include "executor.h"
44#include "execution_object.h" 45#include "execution_object.h"
@@ -145,6 +146,7 @@ bool RunConfiguration(cmdline_opts_t& opts)
145 cout << "\n##### Batch size 1 testing ######\n" << endl; 146 cout << "\n##### Batch size 1 testing ######\n" << endl;
146 try 147 try
147 { 148 {
149 TidlInitSubgraph(1, 0);
148 float **inputs = new float *[1]; 150 float **inputs = new float *[1];
149 inputs[0] = new float[1*3*224*224]; 151 inputs[0] = new float[1*3*224*224];
150 float **outputs = new float *[1]; 152 float **outputs = new float *[1];
@@ -222,6 +224,60 @@ bool RunConfiguration(cmdline_opts_t& opts)
222 status = false; 224 status = false;
223 } 225 }
224 226
227 // This is only to test the multithreaded inference
228 // async/future may not be the most efficient multithreading method
229 // threading pool might have better performance
230 cout << "\n##### Multithreaded inference testing #####\n" << endl;
231 int num_threads = 8;
232 int num_iters = 8;
233 try
234 {
235 float **inputs = new float *[num_threads];
236 float **outputs = new float *[num_threads];
237 for (int i = 0; i < num_threads; i++)
238 {
239 inputs[i] = new float[1*3*224*224];
240 outputs[i] = new float[1001];
241 }
242 vector<future<bool>> futures(num_threads);
243
244 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
245 tloop0 = chrono::steady_clock::now();
246
247 for (int i = 0; i < num_iters + num_threads; i++)
248 {
249 int index = i % num_threads;
250 if (i >= num_threads)
251 {
252 if (futures[index].get())
253 WriteFrameOutput(outputs[index], opts);
254 }
255
256 if (i < num_iters)
257 {
258 ReadFrame(opts, cap, &inputs[index], 1);
259 futures[index] = std::async(std::launch::async,
260 [inputs, outputs](int index) {
261 TidlRunSubgraph(1, 0, 1, 1, 1, &inputs[index], &outputs[index]);
262 return true;
263 },
264 index);
265 }
266 }
267
268 tloop1 = chrono::steady_clock::now();
269 chrono::duration<float> elapsed = tloop1 - tloop0;
270 cout << "Multithreaded (num_threads=" << num_threads
271 << ") loop time (including read/write/opencv/print/etc): "
272 << setw(6) << setprecision(4)
273 << (elapsed.count() * 1000) << "ms" << endl;
274 }
275 catch (tidl::Exception &e)
276 {
277 cerr << e.what() << endl;
278 status = false;
279 }
280
225 return status; 281 return status;
226} 282}
227 283
@@ -234,7 +290,7 @@ bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs,
234 c.inWidth = 224; 290 c.inWidth = 224;
235 c.inHeight = 224; 291 c.inHeight = 224;
236 c.preProcType = 2; 292 c.preProcType = 2;
237 SubgraphDataConv in_conv{{true}, {128.0f}, {false}, {1,3,224,224}}; 293 SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false}, {1,3,224,224}};
238 294
239 char* frame_buffer = new char[3*224*224]; 295 char* frame_buffer = new char[3*224*224];
240 assert (frame_buffer != nullptr); 296 assert (frame_buffer != nullptr);
diff --git a/examples/mobilenet_subgraph/subgraph0.cfg b/examples/mobilenet_subgraph/subgraph0.cfg
index 17c20bf..404c70d 100644
--- a/examples/mobilenet_subgraph/subgraph0.cfg
+++ b/examples/mobilenet_subgraph/subgraph0.cfg
@@ -7,3 +7,14 @@ paramsBinFile = "../test/testvecs/config/tidl_models/tidl_param_mobilenet_1_22
7inWidth = 224 7inWidth = 224
8inHeight = 224 8inHeight = 224
9inNumChannels = 3 9inNumChannels = 3
10# The following information should be space separated list,
11# corresponding to vector of inputs and vector of outputs
12# Quant_value = float_value * scaleF2Q
13inConvType = 0
14inIsSigned = 1
15inScaleF2Q = 128.0
16inIsNCHW = 0
17outConvType = 0
18outIsSigned = 0
19outScaleF2Q = 255.0
20outIsNCHW = 1
diff --git a/tidl_api/inc/configuration.h b/tidl_api/inc/configuration.h
index 0a1c77c..c76ba7f 100644
--- a/tidl_api/inc/configuration.h
+++ b/tidl_api/inc/configuration.h
@@ -32,6 +32,7 @@
32 32
33#include <string> 33#include <string>
34#include <map> 34#include <map>
35#include <vector>
35#include <iostream> 36#include <iostream>
36 37
37namespace tidl { 38namespace tidl {
@@ -145,6 +146,32 @@ class Configuration
145 //! Margin added to the average in percentage. 146 //! Margin added to the average in percentage.
146 int quantMargin; 147 int quantMargin;
147 148
149 //! subgraph data conversion type at subgraph inputs
150 //! 0: float <-> Q, 1: float <-> float, 2: Q <-> Q
151 std::vector<int> inConvType;
152
153 //! subgraph is signed data at subgraph inputs
154 std::vector<int> inIsSigned;
155
156 //! subgraph scaleF2Q factor at subgraph inputs
157 std::vector<float> inScaleF2Q;
158
159 //! subgraph is external tensor NCHW layout at subgraph inputs
160 std::vector<int> inIsNCHW;
161
162 //! subgraph data conversion type at subgraph outputs
163 //! 0: float <-> Q, 1: float <-> float, 2: Q <-> Q
164 std::vector<int> outConvType;
165
166 //! subgraph is signed data at subgraph outputs
167 std::vector<int> outIsSigned;
168
169 //! subgraph scaleF2Q factor at subgraph outputs
170 std::vector<float> outScaleF2Q;
171
172 //! subgraph is external tensor NCHW layout at subgraph outputs
173 std::vector<int> outIsNCHW;
174
148 //! Default constructor. 175 //! Default constructor.
149 Configuration(); 176 Configuration();
150 177
diff --git a/tidl_api/inc/subgraph_data_conv.h b/tidl_api/inc/subgraph_data_conv.h
index 6b7c4b1..dee53e5 100644
--- a/tidl_api/inc/subgraph_data_conv.h
+++ b/tidl_api/inc/subgraph_data_conv.h
@@ -74,15 +74,23 @@ namespace tidl {
74class SubgraphDataConv 74class SubgraphDataConv
75{ 75{
76 public: 76 public:
77 enum ConvType {
78 FLOAT_Q = 0, // conversion between float <-> Q
79 FLOAT_FLOAT = 1, // conversion between float <-> float
80 Q_Q = 2 // conversion between Q <-> Q
81 };
82
77 //! @brief Creates a SubgraphDataConv. 83 //! @brief Creates a SubgraphDataConv.
78 //! @param None 84 //! @param None
79 SubgraphDataConv() {} 85 SubgraphDataConv() {}
80 86
81 SubgraphDataConv(const std::vector<bool>& is_signed, 87 SubgraphDataConv(const std::vector<int>& conv_type,
88 const std::vector<bool>& is_signed,
82 const std::vector<float>& scaleQ, 89 const std::vector<float>& scaleQ,
83 const std::vector<bool>& is_NCHW, 90 const std::vector<bool>& is_NCHW,
84 const std::vector<int>& dims 91 const std::vector<int>& dims
85 ) : is_signed_m(is_signed), scaleQ_m(scaleQ), 92 ) : conv_type_m(conv_type),
93 is_signed_m(is_signed), scaleQ_m(scaleQ),
86 is_NCHW_m(is_NCHW), dims_m(dims) 94 is_NCHW_m(is_NCHW), dims_m(dims)
87 {} 95 {}
88 96
@@ -115,10 +123,13 @@ class SubgraphDataConv
115 void ScaleDequant(const uint8_t *in, std::vector<float*>& out) const; 123 void ScaleDequant(const uint8_t *in, std::vector<float*>& out) const;
116 124
117 private: 125 private:
118 //! if tensor needs to be evaluated as signed char 126 //! data type conversion, 0: float <-> Q, 1: float <-> float, 2: Q <-> Q
127 std::vector<int> conv_type_m;
128
129 //! if tensor needs to be evaluated as signed char (if float <-> Q)
119 std::vector<bool> is_signed_m; 130 std::vector<bool> is_signed_m;
120 131
121 //! Q value for Quantization and Dequantization 132 //! Q value for Quantization and Dequantization (if float <-> Q)
122 std::vector<float> scaleQ_m; 133 std::vector<float> scaleQ_m;
123 134
124 //! the format of external tensors, NCHW or NHWC 135 //! the format of external tensors, NCHW or NHWC
diff --git a/tidl_api/inc/subgraph_runtime.h b/tidl_api/inc/subgraph_runtime.h
index 37e771d..b4fc2b7 100644
--- a/tidl_api/inc/subgraph_runtime.h
+++ b/tidl_api/inc/subgraph_runtime.h
@@ -32,6 +32,15 @@
32 32
33extern "C" { 33extern "C" {
34 34
35//! @brief Top level API to initialize a TIDL subgraph on device
36//! If not invoked ahead of time, TidlRunSubgraph() will call this
37//! function before any inference
38//! @param total_subgraphs total number of TIDL subgraphs in whole inference
39//! @param subgraph_id index of current TIDL subgraph
40extern void TidlInitSubgraph(int total_subgraphs,
41 int subgraph_id
42 );
43
35//! @brief Top level inference to run a TIDL subgraph 44//! @brief Top level inference to run a TIDL subgraph
36//! @param total_subgraphs total number of TIDL subgraphs in whole inference 45//! @param total_subgraphs total number of TIDL subgraphs in whole inference
37//! @param subgraph_id index of current TIDL subgraph 46//! @param subgraph_id index of current TIDL subgraph
diff --git a/tidl_api/src/configuration_parser.cpp b/tidl_api/src/configuration_parser.cpp
index f457560..3ad0d2c 100644
--- a/tidl_api/src/configuration_parser.cpp
+++ b/tidl_api/src/configuration_parser.cpp
@@ -52,6 +52,7 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
52 ConfigParser(Configuration &x) : ConfigParser::base_type(entry) 52 ConfigParser(Configuration &x) : ConfigParser::base_type(entry)
53 { 53 {
54 using qi::int_; 54 using qi::int_;
55 using qi::float_;
55 using qi::bool_; 56 using qi::bool_;
56 using qi::lit; 57 using qi::lit;
57 using qi::lexeme; 58 using qi::lexeme;
@@ -66,6 +67,10 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
66 path %= lexeme[+(char_ - '"')]; 67 path %= lexeme[+(char_ - '"')];
67 q_path = qi::omit[*char_('"')] >> path >> qi::omit[*char_('"')]; 68 q_path = qi::omit[*char_('"')] >> path >> qi::omit[*char_('"')];
68 69
70 // Rules for parsing subgraph data conversion information
71 intvec = int_ >> *int_;
72 floatvec = float_ >> *float_;
73
69 // Grammar for parsing configuration file 74 // Grammar for parsing configuration file
70 entry %= 75 entry %=
71 lit("layerIndex2LayerGroupId") >> '=' >> 76 lit("layerIndex2LayerGroupId") >> '=' >>
@@ -85,7 +90,15 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
85 int_[ph::ref(x.quantHistoryParam1)= _1] | 90 int_[ph::ref(x.quantHistoryParam1)= _1] |
86 lit("quantHistoryParam2") >> '=' >> 91 lit("quantHistoryParam2") >> '=' >>
87 int_[ph::ref(x.quantHistoryParam2)= _1] | 92 int_[ph::ref(x.quantHistoryParam2)= _1] |
88 lit("quantMargin") >> '=' >> int_[ph::ref(x.quantMargin)= _1] 93 lit("quantMargin") >> '=' >> int_[ph::ref(x.quantMargin)= _1] |
94 lit("inConvType") >> '=' >> intvec[ph::ref(x.inConvType) = _1] |
95 lit("inIsSigned") >> '=' >> intvec[ph::ref(x.inIsSigned) = _1] |
96 lit("inScaleF2Q") >> '=' >> floatvec[ph::ref(x.inScaleF2Q) = _1] |
97 lit("inIsNCHW") >> '=' >> intvec[ph::ref(x.inIsNCHW) = _1] |
98 lit("outConvType") >> '=' >> intvec[ph::ref(x.outConvType) = _1] |
99 lit("outIsSigned") >> '=' >> intvec[ph::ref(x.outIsSigned) = _1] |
100 lit("outScaleF2Q") >> '=' >> floatvec[ph::ref(x.outScaleF2Q) = _1] |
101 lit("outIsNCHW") >> '=' >> intvec[ph::ref(x.outIsNCHW) = _1]
89 ; 102 ;
90 } 103 }
91 104
@@ -95,6 +108,9 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
95 108
96 qi::rule<Iterator, std::pair<int, int>(), ascii::space_type> id2group; 109 qi::rule<Iterator, std::pair<int, int>(), ascii::space_type> id2group;
97 qi::rule<Iterator, std::map<int, int>(), ascii::space_type> id2groups; 110 qi::rule<Iterator, std::map<int, int>(), ascii::space_type> id2groups;
111
112 qi::rule<Iterator, std::vector<int>(), ascii::space_type> intvec;
113 qi::rule<Iterator, std::vector<float>(), ascii::space_type> floatvec;
98}; 114};
99 115
100bool Configuration::ReadFromFile(const std::string &file_name) 116bool Configuration::ReadFromFile(const std::string &file_name)
diff --git a/tidl_api/src/subgraph_data_conv.cpp b/tidl_api/src/subgraph_data_conv.cpp
index d8cc11f..6366360 100644
--- a/tidl_api/src/subgraph_data_conv.cpp
+++ b/tidl_api/src/subgraph_data_conv.cpp
@@ -26,6 +26,8 @@
26 * THE POSSIBILITY OF SUCH DAMAGE. 26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/ 27 *****************************************************************************/
28 28
29#include <cassert>
30#include <cstring>
29#include "subgraph_data_conv.h" 31#include "subgraph_data_conv.h"
30 32
31using namespace tidl; 33using namespace tidl;
@@ -75,32 +77,91 @@ const
75 int offset = 0; 77 int offset = 0;
76 for (uint32_t d = 0; d < is_NCHW_m.size(); d++) 78 for (uint32_t d = 0; d < is_NCHW_m.size(); d++)
77 { 79 {
78 float Q = scaleQ_m[d];
79 int N = dims_m[4 * d + 0]; 80 int N = dims_m[4 * d + 0];
80 int C = dims_m[4 * d + 1]; 81 int C = dims_m[4 * d + 1];
81 int H = dims_m[4 * d + 2]; 82 int H = dims_m[4 * d + 2];
82 int W = dims_m[4 * d + 3]; 83 int W = dims_m[4 * d + 3];
83 int vmin = is_signed_m[d] ? -128 : 0; 84
84 int vmax = is_signed_m[d] ? 127 : 255; 85 if (conv_type_m[d] == ConvType::FLOAT_Q)
85 float *in_d = in[d];
86 if (is_NCHW_m[d]) // no need to transpose external tensor
87 { 86 {
88 for (int i = 0; i < N * C * H * W; i++) 87 float Q = scaleQ_m[d];
89 out[offset + i] = QuantizeValue(in_d[i], Q, vmin, vmax); 88 int vmin = is_signed_m[d] ? -128 : 0;
89 int vmax = is_signed_m[d] ? 127 : 255;
90 float *in_d = in[d];
91 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
92 {
93 // no need to transpose external tensor
94 for (int i = 0; i < N * C * H * W; i++)
95 out[offset + i] = QuantizeValue(in_d[i], Q, vmin, vmax);
96 }
97 else
98 {
99 // need to transpose external tensor
100 for (int n = 0; n < N; n++)
101 for (int c = 0; c < C; c++)
102 for (int h = 0; h < H; h++)
103 for (int w = 0; w < W; w++)
104 {
105 int nchw = GetIndex(n, c, h, w, N, C, H, W);
106 int nhwc = GetIndex(n, h, w, c, N, H, W, C);
107 out[offset + nchw] = QuantizeValue(in_d[nhwc], Q, vmin, vmax);
108 }
109 }
90 } 110 }
91 else // need to transpose external tensor 111 else if (conv_type_m[d] == ConvType::FLOAT_FLOAT)
92 { 112 {
93 for (int n = 0; n < N; n++) 113 assert((W & 0x3) == 0); // last dimension is bytes
94 for (int c = 0; c < C; c++) 114 int f_W = W / 4; // number of elements
95 for (int h = 0; h < H; h++) 115 float *in_d = in[d];
96 for (int w = 0; w < W; w++) 116 float *out_d = (float *) (out + offset);
97 { 117 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
98 int nchw = GetIndex(n, c, h, w, N, C, H, W); 118 {
99 int nhwc = GetIndex(n, h, w, c, N, H, W, C); 119 // no need to transpose external tensor
100 out[offset + nchw] = QuantizeValue(in_d[nhwc], Q, vmin, vmax); 120 memcpy(out_d, in_d, N * C * H * W); // W is bytes
101 } 121 }
122 else
123 {
124 // need to transpose external tensor
125 for (int n = 0; n < N; n++)
126 for (int c = 0; c < C; c++)
127 for (int h = 0; h < H; h++)
128 for (int w = 0; w < f_W; w++)
129 {
130 int nchw = GetIndex(n, c, h, w, N, C, H, f_W);
131 int nhwc = GetIndex(n, h, w, c, N, H, f_W, C);
132 out_d[nchw] = in_d[nhwc];
133 }
134 }
102 } 135 }
103 offset += N * C * H * W; 136 else if (conv_type_m[d] == ConvType::Q_Q)
137 {
138 uint8_t *in_d = (uint8_t *) &in[d];
139 uint8_t *out_d = (out + offset);
140 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
141 {
142 // no need to transpose external tensor
143 memcpy(out_d, in_d, N * C * H * W);
144 }
145 else
146 {
147 // need to transpose external tensor
148 for (int n = 0; n < N; n++)
149 for (int c = 0; c < C; c++)
150 for (int h = 0; h < H; h++)
151 for (int w = 0; w < W; w++)
152 {
153 int nchw = GetIndex(n, c, h, w, N, C, H, W);
154 int nhwc = GetIndex(n, h, w, c, N, H, W, C);
155 out_d[nchw] = in_d[nhwc];
156 }
157 }
158 }
159 else
160 {
161 assert(false);
162 }
163
164 offset += N * C * H * W; // accumulate in bytes
104 } 165 }
105} 166}
106 167
@@ -111,31 +172,90 @@ const
111 int offset = 0; 172 int offset = 0;
112 for (uint32_t d = 0; d < is_NCHW_m.size(); d++) 173 for (uint32_t d = 0; d < is_NCHW_m.size(); d++)
113 { 174 {
114 float Q = scaleQ_m[d];
115 float Q_inv = 1.0f / Q;
116 int N = dims_m[4 * d + 0]; 175 int N = dims_m[4 * d + 0];
117 int C = dims_m[4 * d + 1]; 176 int C = dims_m[4 * d + 1];
118 int H = dims_m[4 * d + 2]; 177 int H = dims_m[4 * d + 2];
119 int W = dims_m[4 * d + 3]; 178 int W = dims_m[4 * d + 3];
120 bool S = is_signed_m[d]; 179
121 float *out_d = out[d]; 180 if (conv_type_m[d] == ConvType::FLOAT_Q)
122 if (is_NCHW_m[d]) // no need to transpose external tensor 181 {
182 float Q = scaleQ_m[d];
183 float Q_inv = 1.0f / Q;
184 bool S = is_signed_m[d];
185 float *out_d = out[d];
186 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
187 {
188 // no need to transpose external tensor
189 for (int i = 0; i < N * C * H * W; i++)
190 out_d[i] = DequantizeValue(in[offset + i], Q_inv, S);
191 }
192 else
193 {
194 // need to transpose external tensor
195 for (int n = 0; n < N; n++)
196 for (int c = 0; c < C; c++)
197 for (int h = 0; h < H; h++)
198 for (int w = 0; w < W; w++)
199 {
200 int nchw = GetIndex(n, c, h, w, N, C, H, W);
201 int nhwc = GetIndex(n, h, w, c, N, H, W, C);
202 out_d[nhwc] = DequantizeValue(in[offset + nchw], Q_inv, S);
203 }
204 }
205 }
206 else if (conv_type_m[d] == ConvType::FLOAT_FLOAT)
123 { 207 {
124 for (int i = 0; i < N * C * H * W; i++) 208 assert((W & 0x3) == 0); // last dimension is bytes
125 out_d[i] = DequantizeValue(in[offset + i], Q_inv, S); 209 int f_W = W / 4; // number of elements
210 float *in_d = (float *) (in + offset);
211 float *out_d = out[d];
212 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
213 {
214 // no need to transpose external tensor
215 memcpy(out_d, in_d, N * C * H * W); // W is bytes
216 }
217 else
218 {
219 // need to transpose external tensor
220 for (int n = 0; n < N; n++)
221 for (int c = 0; c < C; c++)
222 for (int h = 0; h < H; h++)
223 for (int w = 0; w < f_W; w++)
224 {
225 int nchw = GetIndex(n, c, h, w, N, C, H, f_W);
226 int nhwc = GetIndex(n, h, w, c, N, H, f_W, C);
227 out_d[nhwc] = in_d[nchw];
228 }
229 }
126 } 230 }
127 else // need to transpose external tensor 231 else if (conv_type_m[d] == ConvType::Q_Q)
128 { 232 {
129 for (int n = 0; n < N; n++) 233 uint8_t *in_d = (uint8_t *) (in + offset);
130 for (int c = 0; c < C; c++) 234 uint8_t *out_d = (uint8_t * ) &out[d];
131 for (int h = 0; h < H; h++) 235 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
132 for (int w = 0; w < W; w++) 236 {
133 { 237 // no need to transpose external tensor
134 int nchw = GetIndex(n, c, h, w, N, C, H, W); 238 memcpy(out_d, in_d, N * C * H * W);
135 int nhwc = GetIndex(n, h, w, c, N, H, W, C); 239 }
136 out_d[nhwc] = DequantizeValue(in[offset + nchw], Q_inv, S); 240 else
137 } 241 {
242 // need to transpose external tensor
243 for (int n = 0; n < N; n++)
244 for (int c = 0; c < C; c++)
245 for (int h = 0; h < H; h++)
246 for (int w = 0; w < W; w++)
247 {
248 int nchw = GetIndex(n, c, h, w, N, C, H, W);
249 int nhwc = GetIndex(n, h, w, c, N, H, W, C);
250 out_d[nhwc] = in_d[nchw];
251 }
252 }
138 } 253 }
254 else
255 {
256 assert(false);
257 }
258
139 offset += N * C * H * W; 259 offset += N * C * H * W;
140 } 260 }
141} 261}
diff --git a/tidl_api/src/subgraph_runtime.cpp b/tidl_api/src/subgraph_runtime.cpp
index 09905fc..342acd8 100644
--- a/tidl_api/src/subgraph_runtime.cpp
+++ b/tidl_api/src/subgraph_runtime.cpp
@@ -74,6 +74,13 @@ void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
74using namespace tidl; 74using namespace tidl;
75 75
76 76
77void TidlInitSubgraph(int total_subgraphs, int subgraph_id)
78{
79 ResM& res = ResM::Instance(total_subgraphs);
80 res.InitSubgraph(subgraph_id);
81}
82
83
77void TidlRunSubgraph(int total_subgraphs, 84void TidlRunSubgraph(int total_subgraphs,
78 int subgraph_id, 85 int subgraph_id,
79 int batch_size, 86 int batch_size,
@@ -199,15 +206,8 @@ void ResM::Init(uint32_t num_subgraphs)
199 es_m.resize(num_subgraphs_m, nullptr); 206 es_m.resize(num_subgraphs_m, nullptr);
200 e2s_m.resize(num_subgraphs_m, nullptr); 207 e2s_m.resize(num_subgraphs_m, nullptr);
201 eops_m = new std::vector<ResEOP>(num_subgraphs_m); 208 eops_m = new std::vector<ResEOP>(num_subgraphs_m);
202 209 in_conv_m.resize(num_subgraphs_m, nullptr);
203 // TODO: this should come from parsing config file 210 out_conv_m.resize(num_subgraphs_m, nullptr);
204 for (uint32_t i = 0; i < num_subgraphs_m; i++)
205 {
206 in_conv_m.push_back(new SubgraphDataConv(
207 {true}, {128.0f}, {false}, {1,3,224,224}));
208 out_conv_m.push_back(new SubgraphDataConv(
209 {false}, {255.0f}, {true}, {1,1,1,1001}));
210 }
211 } 211 }
212} 212}
213 213
@@ -219,19 +219,63 @@ void ResM::InitSubgraph(uint32_t subgraph_id)
219 219
220 std::unique_lock<std::mutex> lock(res_eop.mutex_eops); 220 std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
221 221
222 // Constructing EOPs if not already constructed
222 if (res_eop.eops == nullptr) 223 if (res_eop.eops == nullptr)
223 { 224 {
224 if (enable_trace_m) 225 if (enable_trace_m)
225 printf("Subgraph %d: initialing E/EOPs with %d cores\n", 226 printf("Subgraph %d: initialing E/EOPs with %d cores\n",
226 subgraph_id, num_es_per_subgraph_m); 227 subgraph_id, num_es_per_subgraph_m);
227 228
228 // Constructing EOPs if not already constructed 229 // Read config file
229 // Each subgraph -> num_eves_per_subgraph_m EOPs
230 // Each EOP -> use_count
231 std::string cfg_file = "subgraph" + std::to_string(subgraph_id) + ".cfg"; 230 std::string cfg_file = "subgraph" + std::to_string(subgraph_id) + ".cfg";
232 bool status = cs_m[subgraph_id].ReadFromFile(cfg_file); 231 bool status = cs_m[subgraph_id].ReadFromFile(cfg_file);
233 assert(status); 232 assert(status);
234 233
234 // Read the network
235 sTIDL_Network_t *net = new sTIDL_Network_t;
236 status = ReadNetworkBinary(cs_m[subgraph_id].netBinFile,
237 reinterpret_cast<char *>(net));
238 assert(status);
239
240 // Get data conversion info from configuration
241 // Get input/output tensors dimensions from network
242 // Construct data converters at the subgraph boundaries
243 std::vector<int> inDims, outDims;
244 for (int32_t layer = 0; layer < net->numLayers; layer++)
245 {
246 if (net->TIDLLayers[layer].layerType != (int32_t) TIDL_DataLayer)
247 continue;
248 if (net->TIDLLayers[layer].numInBufs <= 0)
249 {
250 for (int d = 0; d < 4; d++)
251 inDims.push_back(net->TIDLLayers[layer].outData[0].dimValues[d]);
252 }
253 if (net->TIDLLayers[layer].numOutBufs <= 0)
254 {
255 for (int d = 0; d < 4; d++)
256 outDims.push_back(net->TIDLLayers[layer].inData[0].dimValues[d]);
257 }
258 }
259 assert(cs_m[subgraph_id].inIsNCHW.size() * 4 == inDims.size());
260 assert(cs_m[subgraph_id].outIsNCHW.size() * 4 == outDims.size());
261 std::vector<bool> inIsSigned, outIsSigned, inIsNCHW, outIsNCHW;
262 for (int v : cs_m[subgraph_id].inIsSigned) inIsSigned.push_back(v != 0);
263 for (int v : cs_m[subgraph_id].inIsNCHW) inIsNCHW.push_back(v != 0);
264 for (int v : cs_m[subgraph_id].outIsSigned) outIsSigned.push_back(v != 0);
265 for (int v : cs_m[subgraph_id].outIsNCHW) outIsNCHW.push_back(v != 0);
266 in_conv_m[subgraph_id] = new SubgraphDataConv(
267 cs_m[subgraph_id].inConvType,
268 inIsSigned,
269 cs_m[subgraph_id].inScaleF2Q,
270 inIsNCHW,
271 inDims);
272 out_conv_m[subgraph_id] = new SubgraphDataConv(
273 cs_m[subgraph_id].outConvType,
274 outIsSigned,
275 cs_m[subgraph_id].outScaleF2Q,
276 outIsNCHW,
277 outDims);
278
235 // Check if last few layers can be offloaded to DSPs 279 // Check if last few layers can be offloaded to DSPs
236 // and DSPs are available 280 // and DSPs are available
237 DeviceIds e_ids, e2_ids; 281 DeviceIds e_ids, e2_ids;
@@ -241,10 +285,6 @@ void ResM::InitSubgraph(uint32_t subgraph_id)
241 // uint32_t num_dsps_used = 0; 285 // uint32_t num_dsps_used = 0;
242 if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet) 286 if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet)
243 { 287 {
244 sTIDL_Network_t *net = new sTIDL_Network_t;
245 bool status = ReadNetworkBinary(cs_m[subgraph_id].netBinFile,
246 reinterpret_cast<char *>(net));
247 assert(status);
248 int32_t start_layer = net->numLayers -1; 288 int32_t start_layer = net->numLayers -1;
249 int32_t end_layer = 0; 289 int32_t end_layer = 0;
250 if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer) 290 if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
@@ -286,7 +326,7 @@ void ResM::InitSubgraph(uint32_t subgraph_id)
286 cs_m[subgraph_id].runFullNet = true; 326 cs_m[subgraph_id].runFullNet = true;
287 cs_m[subgraph_id].enableApiTrace = enable_trace_m; 327 cs_m[subgraph_id].enableApiTrace = enable_trace_m;
288 328
289 // Constructing Es and EOPs 329 // Constructing Es and EOPs, each subgraph -> num_eves_per_subgraph_m EOPs
290 res_eop.eops = new std::vector<ExecutionObjectPipeline*>; 330 res_eop.eops = new std::vector<ExecutionObjectPipeline*>;
291 uint32_t buffer_factor = 2; // double buffering factor 331 uint32_t buffer_factor = 2; // double buffering factor
292 if (num_eves_m > 0) 332 if (num_eves_m > 0)