aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuan Zhao2019-12-07 23:44:50 -0600
committerYuan Zhao2019-12-07 23:44:50 -0600
commit21faf4b36a0ae9ee5b5c6aaf90d067728ff4f78b (patch)
treedd75d11f3b1e4bb43ce21af84b9577b725a4f11c
parent092a2ed047b02d9668fddebe61d4e5920bfbfb3f (diff)
parent18c76b41aa84071bbb6c95a7bbacbfd690921a3b (diff)
downloadtidl-api-21faf4b36a0ae9ee5b5c6aaf90d067728ff4f78b.tar.gz
tidl-api-21faf4b36a0ae9ee5b5c6aaf90d067728ff4f78b.tar.xz
tidl-api-21faf4b36a0ae9ee5b5c6aaf90d067728ff4f78b.zip
Merge branch 'release/v01.04.00'v01.04.00
-rw-r--r--docs/source/changelog.rst9
-rwxr-xr-xexamples/classification/detect_eves_dsps.sh7
-rwxr-xr-xexamples/mcbench/scripts/all_5728.sh1
-rwxr-xr-xexamples/mcbench/scripts/all_5729.sh14
-rwxr-xr-xexamples/mcbench/scripts/all_5749.sh1
-rw-r--r--examples/mobilenet_subgraph/Makefile44
-rw-r--r--examples/mobilenet_subgraph/imagenet_objects.json1005
-rw-r--r--examples/mobilenet_subgraph/main.cpp617
-rw-r--r--examples/mobilenet_subgraph/subgraph0.cfg20
-rw-r--r--examples/mobilenet_subgraph/thread_pool.cpp144
-rw-r--r--examples/mobilenet_subgraph/thread_pool.h77
-rw-r--r--tidl_api/Makefile14
-rw-r--r--tidl_api/inc/configuration.h27
-rw-r--r--tidl_api/inc/subgraph_data_conv.h144
-rw-r--r--tidl_api/inc/subgraph_runtime.h119
-rw-r--r--tidl_api/make.buildid4
-rw-r--r--tidl_api/src/configuration_parser.cpp18
-rw-r--r--tidl_api/src/subgraph_data_conv.cpp262
-rw-r--r--tidl_api/src/subgraph_runtime.cpp489
-rw-r--r--tidl_api/src/subgraph_runtime_impl.h96
20 files changed, 3107 insertions, 5 deletions
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index ed33397..38910d8 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -2,6 +2,12 @@
2Changelog 2Changelog
3######### 3#########
4 4
51.4.0 [Processor Linux SDK 6.2]
6===============================
7**Added**
8
9#. Subgraph execution support.
10
51.3.3 [Processor Linux SDK 6.1] 111.3.3 [Processor Linux SDK 6.1]
6=============================== 12===============================
7**Added** 13**Added**
@@ -10,6 +16,9 @@ Changelog
10 16
11#. Added MobileNet v2 support. 17#. Added MobileNet v2 support.
12 18
19#. Added environment variables to control pre-allocated network memory sizes.
20 TIDL_{PARAM,NETWORK}_HEAP_SIZE_{DSP,EVE}
21
13**Changed** 22**Changed**
14 23
15#. Trace data now also include dataQ/minValue/maxValue info. 24#. Trace data now also include dataQ/minValue/maxValue info.
diff --git a/examples/classification/detect_eves_dsps.sh b/examples/classification/detect_eves_dsps.sh
new file mode 100755
index 0000000..aafd904
--- /dev/null
+++ b/examples/classification/detect_eves_dsps.sh
@@ -0,0 +1,7 @@
1# Script to detect number of available EVEs, DSPs, and CMEM memory
2export numeve=$(/usr/share/ti/examples/opencl/platforms/platforms | grep "Embedded Vision Engine" | wc -l)
3export numdsp=$(find /proc/device-tree/ocp/ -name "dsp_system*" | wc -l)
4export cmemsize=$(cat /proc/cmem | grep "Block 0: Pool 0:" | cut -d ' ' -f8)
5echo "Number of EVEs:${numeve}"
6echo "Number of DSPs:${numdsp}"
7echo "CMEM size:${cmemsize}"
diff --git a/examples/mcbench/scripts/all_5728.sh b/examples/mcbench/scripts/all_5728.sh
index ed996be..c286297 100755
--- a/examples/mcbench/scripts/all_5728.sh
+++ b/examples/mcbench/scripts/all_5728.sh
@@ -1,3 +1,4 @@
1# Set of TIDL benchmarking test cases for AM5728 SoC, with 2xDSP and no EVE devices
1./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_mobileNet1.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y 2./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_mobileNet1.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y
2./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_mobileNet2.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y 3./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_mobileNet2.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y
3./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_squeeze1_1.txt -f 50 -i ../test/testvecs/input/preproc_1_227x227_multi.y 4./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_squeeze1_1.txt -f 50 -i ../test/testvecs/input/preproc_1_227x227_multi.y
diff --git a/examples/mcbench/scripts/all_5729.sh b/examples/mcbench/scripts/all_5729.sh
new file mode 100755
index 0000000..defec72
--- /dev/null
+++ b/examples/mcbench/scripts/all_5729.sh
@@ -0,0 +1,14 @@
1# Set of TIDL benchmarking test cases for AM5729 SoC, with 2xDSP and 4xEVE
2export TIDL_NETWORK_HEAP_SIZE_EVE=56623104
3export TIDL_NETWORK_HEAP_SIZE_DSP=56623104
4./mcbench -g 1 -d 2 -e 4 -c ../test/testvecs/config/infer/tidl_config_j11_v2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
5./mcbench -g 1 -d 2 -e 4 -c ../test/testvecs/config/infer/tidl_config_j11_v2_dense.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
6
7export TIDL_NETWORK_HEAP_SIZE_EVE=67108864
8export TIDL_NETWORK_HEAP_SIZE_DSP=8388608
9./mcbench -g 2 -d 1 -e 4 -c ../test/testvecs/config/infer/tidl_config_mobileNet1_lg2.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y
10./mcbench -g 2 -d 2 -e 4 -c ../test/testvecs/config/infer/tidl_config_mobileNet1_lg2.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y
11./mcbench -g 2 -d 2 -e 4 -c ../test/testvecs/config/infer/tidl_config_mobileNet2_lg2.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y
12./mcbench -g 2 -d 2 -e 4 -c ../test/testvecs/config/infer/tidl_config_inceptionNetv1_lg2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
13./mcbench -g 2 -d 2 -e 4 -c ../test/testvecs/config/infer/tidl_config_j11_v2_lg2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
14./mcbench -g 2 -d 2 -e 4 -c ../test/testvecs/config/infer/tidl_config_j11_v2_dense_lg2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
diff --git a/examples/mcbench/scripts/all_5749.sh b/examples/mcbench/scripts/all_5749.sh
index 8956373..7f8879a 100755
--- a/examples/mcbench/scripts/all_5749.sh
+++ b/examples/mcbench/scripts/all_5749.sh
@@ -1,3 +1,4 @@
1# Set of TIDL benchmarking test cases for AM5749 SoC, with 2xDSP and 2xEVE
1./mcbench -g 1 -d 0 -e 2 -c ../test/testvecs/config/infer/tidl_config_mobileNet1.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y 2./mcbench -g 1 -d 0 -e 2 -c ../test/testvecs/config/infer/tidl_config_mobileNet1.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y
2./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_mobileNet1.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y 3./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_mobileNet1.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y
3./mcbench -g 2 -d 1 -e 2 -c ../test/testvecs/config/infer/tidl_config_mobileNet1_lg2.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y 4./mcbench -g 2 -d 1 -e 2 -c ../test/testvecs/config/infer/tidl_config_mobileNet1_lg2.txt -f 50 -i ../test/testvecs/input/preproc_2_224x224_multi.y
diff --git a/examples/mobilenet_subgraph/Makefile b/examples/mobilenet_subgraph/Makefile
new file mode 100644
index 0000000..e4a5173
--- /dev/null
+++ b/examples/mobilenet_subgraph/Makefile
@@ -0,0 +1,44 @@
1# Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
2# All rights reserved.
3#
4# Redistribution and use in source and binary forms, with or without
5# modification, are permitted provided that the following conditions are met:
6# * Redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer.
8# * Redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution.
11# * Neither the name of Texas Instruments Incorporated nor the
12# names of its contributors may be used to endorse or promote products
13# derived from this software without specific prior written permission.
14#
15# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
25# THE POSSIBILITY OF SUCH DAMAGE.
26
27EXE = mobilenet_subgraph
28
29include ../make.common
30
31# overwrite LIBS, -ltidl_api should be able to pull -lOpenCL
32LIBS += -lopencv_highgui -lopencv_imgcodecs -lopencv_videoio\
33 -lopencv_imgproc -lopencv_core
34LIBS += -ljson-c
35
36LIBS += -L$(TIDL_API_DIR) -ltidl_api -ltidl_imgutil
37
38SOURCES = main.cpp ../common/object_classes.cpp ../common/utils.cpp \
39 ../common/video_utils.cpp thread_pool.cpp
40
41$(EXE): $(HEADERS) $(SOURCES)
42 $(CXX) $(CXXFLAGS) $(SOURCES) \
43 $(LDFLAGS) $(LIBS) -o $@
44
diff --git a/examples/mobilenet_subgraph/imagenet_objects.json b/examples/mobilenet_subgraph/imagenet_objects.json
new file mode 100644
index 0000000..7a5289b
--- /dev/null
+++ b/examples/mobilenet_subgraph/imagenet_objects.json
@@ -0,0 +1,1005 @@
1{
2 "network": "imagenet",
3 "objects": [
4 { "label": "tench" },
5 { "label": "goldfish" },
6 { "label": "great_white_shark" },
7 { "label": "tiger_shark" },
8 { "label": "hammerhead" },
9 { "label": "electric_ray" },
10 { "label": "stingray" },
11 { "label": "cock" },
12 { "label": "hen" },
13 { "label": "ostrich" },
14 { "label": "brambling" },
15 { "label": "goldfinch" },
16 { "label": "house_finch" },
17 { "label": "junco" },
18 { "label": "indigo_bunting" },
19 { "label": "robin" },
20 { "label": "bulbul" },
21 { "label": "jay" },
22 { "label": "magpie" },
23 { "label": "chickadee" },
24 { "label": "water_ouzel" },
25 { "label": "kite" },
26 { "label": "bald_eagle" },
27 { "label": "vulture" },
28 { "label": "great_grey_owl" },
29 { "label": "European_fire_salamander" },
30 { "label": "common_newt" },
31 { "label": "eft" },
32 { "label": "spotted_salamander" },
33 { "label": "axolotl" },
34 { "label": "bullfrog" },
35 { "label": "tree_frog" },
36 { "label": "tailed_frog" },
37 { "label": "loggerhead" },
38 { "label": "leatherback_turtle" },
39 { "label": "mud_turtle" },
40 { "label": "terrapin" },
41 { "label": "box_turtle" },
42 { "label": "banded_gecko" },
43 { "label": "common_iguana" },
44 { "label": "American_chameleon" },
45 { "label": "whiptail" },
46 { "label": "agama" },
47 { "label": "frilled_lizard" },
48 { "label": "alligator_lizard" },
49 { "label": "Gila_monster" },
50 { "label": "green_lizard" },
51 { "label": "African_chameleon" },
52 { "label": "Komodo_dragon" },
53 { "label": "African_crocodile" },
54 { "label": "American_alligator" },
55 { "label": "triceratops" },
56 { "label": "thunder_snake" },
57 { "label": "ringneck_snake" },
58 { "label": "hognose_snake" },
59 { "label": "green_snake" },
60 { "label": "king_snake" },
61 { "label": "garter_snake" },
62 { "label": "water_snake" },
63 { "label": "vine_snake" },
64 { "label": "night_snake" },
65 { "label": "boa_constrictor" },
66 { "label": "rock_python" },
67 { "label": "Indian_cobra" },
68 { "label": "green_mamba" },
69 { "label": "sea_snake" },
70 { "label": "horned_viper" },
71 { "label": "diamondback" },
72 { "label": "sidewinder" },
73 { "label": "trilobite" },
74 { "label": "harvestman" },
75 { "label": "scorpion" },
76 { "label": "black_and_gold_garden_spider" },
77 { "label": "barn_spider" },
78 { "label": "garden_spider" },
79 { "label": "black_widow" },
80 { "label": "tarantula" },
81 { "label": "wolf_spider" },
82 { "label": "tick" },
83 { "label": "centipede" },
84 { "label": "black_grouse" },
85 { "label": "ptarmigan" },
86 { "label": "ruffed_grouse" },
87 { "label": "prairie_chicken" },
88 { "label": "peacock" },
89 { "label": "quail" },
90 { "label": "partridge" },
91 { "label": "African_grey" },
92 { "label": "macaw" },
93 { "label": "sulphur-crested_cockatoo" },
94 { "label": "lorikeet" },
95 { "label": "coucal" },
96 { "label": "bee_eater" },
97 { "label": "hornbill" },
98 { "label": "hummingbird" },
99 { "label": "jacamar" },
100 { "label": "toucan" },
101 { "label": "drake" },
102 { "label": "red-breasted_merganser" },
103 { "label": "goose" },
104 { "label": "black_swan" },
105 { "label": "tusker" },
106 { "label": "echidna" },
107 { "label": "platypus" },
108 { "label": "wallaby" },
109 { "label": "koala" },
110 { "label": "wombat" },
111 { "label": "jellyfish" },
112 { "label": "sea_anemone" },
113 { "label": "brain_coral" },
114 { "label": "flatworm" },
115 { "label": "nematode" },
116 { "label": "conch" },
117 { "label": "snail" },
118 { "label": "slug" },
119 { "label": "sea_slug" },
120 { "label": "chiton" },
121 { "label": "chambered_nautilus" },
122 { "label": "Dungeness_crab" },
123 { "label": "rock_crab" },
124 { "label": "fiddler_crab" },
125 { "label": "king_crab" },
126 { "label": "American_lobster" },
127 { "label": "spiny_lobster" },
128 { "label": "crayfish" },
129 { "label": "hermit_crab" },
130 { "label": "isopod" },
131 { "label": "white_stork" },
132 { "label": "black_stork" },
133 { "label": "spoonbill" },
134 { "label": "flamingo" },
135 { "label": "little_blue_heron" },
136 { "label": "American_egret" },
137 { "label": "bittern" },
138 { "label": "crane" },
139 { "label": "limpkin" },
140 { "label": "European_gallinule" },
141 { "label": "American_coot" },
142 { "label": "bustard" },
143 { "label": "ruddy_turnstone" },
144 { "label": "red-backed_sandpiper" },
145 { "label": "redshank" },
146 { "label": "dowitcher" },
147 { "label": "oystercatcher" },
148 { "label": "pelican" },
149 { "label": "king_penguin" },
150 { "label": "albatross" },
151 { "label": "grey_whale" },
152 { "label": "killer_whale" },
153 { "label": "dugong" },
154 { "label": "sea_lion" },
155 { "label": "Chihuahua" },
156 { "label": "Japanese_spaniel" },
157 { "label": "Maltese_dog" },
158 { "label": "Pekinese" },
159 { "label": "Shih-Tzu" },
160 { "label": "Blenheim_spaniel" },
161 { "label": "papillon" },
162 { "label": "toy_terrier" },
163 { "label": "Rhodesian_ridgeback" },
164 { "label": "Afghan_hound" },
165 { "label": "basset" },
166 { "label": "beagle" },
167 { "label": "bloodhound" },
168 { "label": "bluetick" },
169 { "label": "black-and-tan_coonhound" },
170 { "label": "Walker_hound" },
171 { "label": "English_foxhound" },
172 { "label": "redbone" },
173 { "label": "borzoi" },
174 { "label": "Irish_wolfhound" },
175 { "label": "Italian_greyhound" },
176 { "label": "whippet" },
177 { "label": "Ibizan_hound" },
178 { "label": "Norwegian_elkhound" },
179 { "label": "otterhound" },
180 { "label": "Saluki" },
181 { "label": "Scottish_deerhound" },
182 { "label": "Weimaraner" },
183 { "label": "Staffordshire_bullterrier" },
184 { "label": "American_Staffordshire_terrier" },
185 { "label": "Bedlington_terrier" },
186 { "label": "Border_terrier" },
187 { "label": "Kerry_blue_terrier" },
188 { "label": "Irish_terrier" },
189 { "label": "Norfolk_terrier" },
190 { "label": "Norwich_terrier" },
191 { "label": "Yorkshire_terrier" },
192 { "label": "wire-haired_fox_terrier" },
193 { "label": "Lakeland_terrier" },
194 { "label": "Sealyham_terrier" },
195 { "label": "Airedale" },
196 { "label": "cairn" },
197 { "label": "Australian_terrier" },
198 { "label": "Dandie_Dinmont" },
199 { "label": "Boston_bull" },
200 { "label": "miniature_schnauzer" },
201 { "label": "giant_schnauzer" },
202 { "label": "standard_schnauzer" },
203 { "label": "Scotch_terrier" },
204 { "label": "Tibetan_terrier" },
205 { "label": "silky_terrier" },
206 { "label": "soft-coated_wheaten_terrier" },
207 { "label": "West_Highland_white_terrier" },
208 { "label": "Lhasa" },
209 { "label": "flat-coated_retriever" },
210 { "label": "curly-coated_retriever" },
211 { "label": "golden_retriever" },
212 { "label": "Labrador_retriever" },
213 { "label": "Chesapeake_Bay_retriever" },
214 { "label": "German_short-haired_pointer" },
215 { "label": "vizsla" },
216 { "label": "English_setter" },
217 { "label": "Irish_setter" },
218 { "label": "Gordon_setter" },
219 { "label": "Brittany_spaniel" },
220 { "label": "clumber" },
221 { "label": "English_springer" },
222 { "label": "Welsh_springer_spaniel" },
223 { "label": "cocker_spaniel" },
224 { "label": "Sussex_spaniel" },
225 { "label": "Irish_water_spaniel" },
226 { "label": "kuvasz" },
227 { "label": "schipperke" },
228 { "label": "groenendael" },
229 { "label": "malinois" },
230 { "label": "briard" },
231 { "label": "kelpie" },
232 { "label": "komondor" },
233 { "label": "Old_English_sheepdog" },
234 { "label": "Shetland_sheepdog" },
235 { "label": "collie" },
236 { "label": "Border_collie" },
237 { "label": "Bouvier_des_Flandres" },
238 { "label": "Rottweiler" },
239 { "label": "German_shepherd" },
240 { "label": "Doberman" },
241 { "label": "miniature_pinscher" },
242 { "label": "Greater_Swiss_Mountain_dog" },
243 { "label": "Bernese_mountain_dog" },
244 { "label": "Appenzeller" },
245 { "label": "EntleBucher" },
246 { "label": "boxer" },
247 { "label": "bull_mastiff" },
248 { "label": "Tibetan_mastiff" },
249 { "label": "French_bulldog" },
250 { "label": "Great_Dane" },
251 { "label": "Saint_Bernard" },
252 { "label": "Eskimo_dog" },
253 { "label": "malamute" },
254 { "label": "Siberian_husky" },
255 { "label": "dalmatian" },
256 { "label": "affenpinscher" },
257 { "label": "basenji" },
258 { "label": "pug" },
259 { "label": "Leonberg" },
260 { "label": "Newfoundland" },
261 { "label": "Great_Pyrenees" },
262 { "label": "Samoyed" },
263 { "label": "Pomeranian" },
264 { "label": "chow" },
265 { "label": "keeshond" },
266 { "label": "Brabancon_griffon" },
267 { "label": "Pembroke" },
268 { "label": "Cardigan" },
269 { "label": "toy_poodle" },
270 { "label": "miniature_poodle" },
271 { "label": "standard_poodle" },
272 { "label": "Mexican_hairless" },
273 { "label": "timber_wolf" },
274 { "label": "white_wolf" },
275 { "label": "red_wolf" },
276 { "label": "coyote" },
277 { "label": "dingo" },
278 { "label": "dhole" },
279 { "label": "African_hunting_dog" },
280 { "label": "hyena" },
281 { "label": "red_fox" },
282 { "label": "kit_fox" },
283 { "label": "Arctic_fox" },
284 { "label": "grey_fox" },
285 { "label": "tabby" },
286 { "label": "tiger_cat" },
287 { "label": "Persian_cat" },
288 { "label": "Siamese_cat" },
289 { "label": "Egyptian_cat" },
290 { "label": "cougar" },
291 { "label": "lynx" },
292 { "label": "leopard" },
293 { "label": "snow_leopard" },
294 { "label": "jaguar" },
295 { "label": "lion" },
296 { "label": "tiger" },
297 { "label": "cheetah" },
298 { "label": "brown_bear" },
299 { "label": "American_black_bear" },
300 { "label": "ice_bear" },
301 { "label": "sloth_bear" },
302 { "label": "mongoose" },
303 { "label": "meerkat" },
304 { "label": "tiger_beetle" },
305 { "label": "ladybug" },
306 { "label": "ground_beetle" },
307 { "label": "long-horned_beetle" },
308 { "label": "leaf_beetle" },
309 { "label": "dung_beetle" },
310 { "label": "rhinoceros_beetle" },
311 { "label": "weevil" },
312 { "label": "fly" },
313 { "label": "bee" },
314 { "label": "ant" },
315 { "label": "grasshopper" },
316 { "label": "cricket" },
317 { "label": "walking_stick" },
318 { "label": "cockroach" },
319 { "label": "mantis" },
320 { "label": "cicada" },
321 { "label": "leafhopper" },
322 { "label": "lacewing" },
323 { "label": "dragonfly" },
324 { "label": "damselfly" },
325 { "label": "admiral" },
326 { "label": "ringlet" },
327 { "label": "monarch" },
328 { "label": "cabbage_butterfly" },
329 { "label": "sulphur_butterfly" },
330 { "label": "lycaenid" },
331 { "label": "starfish" },
332 { "label": "sea_urchin" },
333 { "label": "sea_cucumber" },
334 { "label": "wood_rabbit" },
335 { "label": "hare" },
336 { "label": "Angora" },
337 { "label": "hamster" },
338 { "label": "porcupine" },
339 { "label": "fox_squirrel" },
340 { "label": "marmot" },
341 { "label": "beaver" },
342 { "label": "guinea_pig" },
343 { "label": "sorrel" },
344 { "label": "zebra" },
345 { "label": "hog" },
346 { "label": "wild_boar" },
347 { "label": "warthog" },
348 { "label": "hippopotamus" },
349 { "label": "ox" },
350 { "label": "water_buffalo" },
351 { "label": "bison" },
352 { "label": "ram" },
353 { "label": "bighorn" },
354 { "label": "ibex" },
355 { "label": "hartebeest" },
356 { "label": "impala" },
357 { "label": "gazelle" },
358 { "label": "Arabian_camel" },
359 { "label": "llama" },
360 { "label": "weasel" },
361 { "label": "mink" },
362 { "label": "polecat" },
363 { "label": "black-footed_ferret" },
364 { "label": "otter" },
365 { "label": "skunk" },
366 { "label": "badger" },
367 { "label": "armadillo" },
368 { "label": "three-toed_sloth" },
369 { "label": "orangutan" },
370 { "label": "gorilla" },
371 { "label": "chimpanzee" },
372 { "label": "gibbon" },
373 { "label": "siamang" },
374 { "label": "guenon" },
375 { "label": "patas" },
376 { "label": "baboon" },
377 { "label": "macaque" },
378 { "label": "langur" },
379 { "label": "colobus" },
380 { "label": "proboscis_monkey" },
381 { "label": "marmoset" },
382 { "label": "capuchin" },
383 { "label": "howler_monkey" },
384 { "label": "titi" },
385 { "label": "spider_monkey" },
386 { "label": "squirrel_monkey" },
387 { "label": "Madagascar_cat" },
388 { "label": "indri" },
389 { "label": "Indian_elephant" },
390 { "label": "African_elephant" },
391 { "label": "lesser_panda" },
392 { "label": "giant_panda" },
393 { "label": "barracouta" },
394 { "label": "eel" },
395 { "label": "coho" },
396 { "label": "rock_beauty" },
397 { "label": "anemone_fish" },
398 { "label": "sturgeon" },
399 { "label": "gar" },
400 { "label": "lionfish" },
401 { "label": "puffer" },
402 { "label": "abacus" },
403 { "label": "abaya" },
404 { "label": "academic_gown" },
405 { "label": "accordion" },
406 { "label": "acoustic_guitar" },
407 { "label": "aircraft_carrier" },
408 { "label": "airliner" },
409 { "label": "airship" },
410 { "label": "altar" },
411 { "label": "ambulance" },
412 { "label": "amphibian" },
413 { "label": "analog_clock" },
414 { "label": "apiary" },
415 { "label": "apron" },
416 { "label": "ashcan" },
417 { "label": "assault_rifle" },
418 { "label": "backpack" },
419 { "label": "bakery" },
420 { "label": "balance_beam" },
421 { "label": "balloon" },
422 { "label": "ballpoint" },
423 { "label": "Band_Aid" },
424 { "label": "banjo" },
425 { "label": "bannister" },
426 { "label": "barbell" },
427 { "label": "barber_chair" },
428 { "label": "barbershop" },
429 { "label": "barn" },
430 { "label": "barometer" },
431 { "label": "barrel" },
432 { "label": "barrow" },
433 { "label": "baseball" },
434 { "label": "basketball" },
435 { "label": "bassinet" },
436 { "label": "bassoon" },
437 { "label": "bathing_cap" },
438 { "label": "bath_towel" },
439 { "label": "bathtub" },
440 { "label": "beach_wagon" },
441 { "label": "beacon" },
442 { "label": "beaker" },
443 { "label": "bearskin" },
444 { "label": "beer_bottle" },
445 { "label": "beer_glass" },
446 { "label": "bell_cote" },
447 { "label": "bib" },
448 { "label": "bicycle-built-for-two" },
449 { "label": "bikini" },
450 { "label": "binder" },
451 { "label": "binoculars" },
452 { "label": "birdhouse" },
453 { "label": "boathouse" },
454 { "label": "bobsled" },
455 { "label": "bolo_tie" },
456 { "label": "bonnet" },
457 { "label": "bookcase" },
458 { "label": "bookshop" },
459 { "label": "bottlecap" },
460 { "label": "bow" },
461 { "label": "bow_tie" },
462 { "label": "brass" },
463 { "label": "brassiere" },
464 { "label": "breakwater" },
465 { "label": "breastplate" },
466 { "label": "broom" },
467 { "label": "bucket" },
468 { "label": "buckle" },
469 { "label": "bulletproof_vest" },
470 { "label": "bullet_train" },
471 { "label": "butcher_shop" },
472 { "label": "cab" },
473 { "label": "caldron" },
474 { "label": "candle" },
475 { "label": "cannon" },
476 { "label": "canoe" },
477 { "label": "can_opener" },
478 { "label": "cardigan" },
479 { "label": "car_mirror" },
480 { "label": "carousel" },
481 { "label": "carpenter's_kit" },
482 { "label": "carton" },
483 { "label": "car_wheel" },
484 { "label": "cash_machine" },
485 { "label": "cassette" },
486 { "label": "cassette_player" },
487 { "label": "castle" },
488 { "label": "catamaran" },
489 { "label": "CD_player" },
490 { "label": "cello" },
491 { "label": "cellular_telephone" },
492 { "label": "chain" },
493 { "label": "chainlink_fence" },
494 { "label": "chain_mail" },
495 { "label": "chain_saw" },
496 { "label": "chest" },
497 { "label": "chiffonier" },
498 { "label": "chime" },
499 { "label": "china_cabinet" },
500 { "label": "Christmas_stocking" },
501 { "label": "church" },
502 { "label": "cinema" },
503 { "label": "cleaver" },
504 { "label": "cliff_dwelling" },
505 { "label": "cloak" },
506 { "label": "clog" },
507 { "label": "cocktail_shaker" },
508 { "label": "coffee_mug" },
509 { "label": "coffeepot" },
510 { "label": "coil" },
511 { "label": "combination_lock" },
512 { "label": "computer_keyboard" },
513 { "label": "confectionery" },
514 { "label": "container_ship" },
515 { "label": "convertible" },
516 { "label": "corkscrew" },
517 { "label": "cornet" },
518 { "label": "cowboy_boot" },
519 { "label": "cowboy_hat" },
520 { "label": "cradle" },
521 { "label": "crane" },
522 { "label": "crash_helmet" },
523 { "label": "crate" },
524 { "label": "crib" },
525 { "label": "Crock_Pot" },
526 { "label": "croquet_ball" },
527 { "label": "crutch" },
528 { "label": "cuirass" },
529 { "label": "dam" },
530 { "label": "desk" },
531 { "label": "desktop_computer" },
532 { "label": "dial_telephone" },
533 { "label": "diaper" },
534 { "label": "digital_clock" },
535 { "label": "digital_watch" },
536 { "label": "dining_table" },
537 { "label": "dishrag" },
538 { "label": "dishwasher" },
539 { "label": "disk_brake" },
540 { "label": "dock" },
541 { "label": "dogsled" },
542 { "label": "dome" },
543 { "label": "doormat" },
544 { "label": "drilling_platform" },
545 { "label": "drum" },
546 { "label": "drumstick" },
547 { "label": "dumbbell" },
548 { "label": "Dutch_oven" },
549 { "label": "electric_fan" },
550 { "label": "electric_guitar" },
551 { "label": "electric_locomotive" },
552 { "label": "entertainment_center" },
553 { "label": "envelope" },
554 { "label": "espresso_maker" },
555 { "label": "face_powder" },
556 { "label": "feather_boa" },
557 { "label": "file" },
558 { "label": "fireboat" },
559 { "label": "fire_engine" },
560 { "label": "fire_screen" },
561 { "label": "flagpole" },
562 { "label": "flute" },
563 { "label": "folding_chair" },
564 { "label": "football_helmet" },
565 { "label": "forklift" },
566 { "label": "fountain" },
567 { "label": "fountain_pen" },
568 { "label": "four-poster" },
569 { "label": "freight_car" },
570 { "label": "French_horn" },
571 { "label": "frying_pan" },
572 { "label": "fur_coat" },
573 { "label": "garbage_truck" },
574 { "label": "gasmask" },
575 { "label": "gas_pump" },
576 { "label": "goblet" },
577 { "label": "go-kart" },
578 { "label": "golf_ball" },
579 { "label": "golfcart" },
580 { "label": "gondola" },
581 { "label": "gong" },
582 { "label": "gown" },
583 { "label": "grand_piano" },
584 { "label": "greenhouse" },
585 { "label": "grille" },
586 { "label": "grocery_store" },
587 { "label": "guillotine" },
588 { "label": "hair_slide" },
589 { "label": "hair_spray" },
590 { "label": "half_track" },
591 { "label": "hammer" },
592 { "label": "hamper" },
593 { "label": "hand_blower" },
594 { "label": "hand-held_computer" },
595 { "label": "handkerchief" },
596 { "label": "hard_disc" },
597 { "label": "harmonica" },
598 { "label": "harp" },
599 { "label": "harvester" },
600 { "label": "hatchet" },
601 { "label": "holster" },
602 { "label": "home_theater" },
603 { "label": "honeycomb" },
604 { "label": "hook" },
605 { "label": "hoopskirt" },
606 { "label": "horizontal_bar" },
607 { "label": "horse_cart" },
608 { "label": "hourglass" },
609 { "label": "iPod" },
610 { "label": "iron" },
611 { "label": "jack-o'-lantern" },
612 { "label": "jean" },
613 { "label": "jeep" },
614 { "label": "jersey" },
615 { "label": "jigsaw_puzzle" },
616 { "label": "jinrikisha" },
617 { "label": "joystick" },
618 { "label": "kimono" },
619 { "label": "knee_pad" },
620 { "label": "knot" },
621 { "label": "lab_coat" },
622 { "label": "ladle" },
623 { "label": "lampshade" },
624 { "label": "laptop" },
625 { "label": "lawn_mower" },
626 { "label": "lens_cap" },
627 { "label": "letter_opener" },
628 { "label": "library" },
629 { "label": "lifeboat" },
630 { "label": "lighter" },
631 { "label": "limousine" },
632 { "label": "liner" },
633 { "label": "lipstick" },
634 { "label": "Loafer" },
635 { "label": "lotion" },
636 { "label": "loudspeaker" },
637 { "label": "loupe" },
638 { "label": "lumbermill" },
639 { "label": "magnetic_compass" },
640 { "label": "mailbag" },
641 { "label": "mailbox" },
642 { "label": "maillot" },
643 { "label": "maillot" },
644 { "label": "manhole_cover" },
645 { "label": "maraca" },
646 { "label": "marimba" },
647 { "label": "mask" },
648 { "label": "matchstick" },
649 { "label": "maypole" },
650 { "label": "maze" },
651 { "label": "measuring_cup" },
652 { "label": "medicine_chest" },
653 { "label": "megalith" },
654 { "label": "microphone" },
655 { "label": "microwave" },
656 { "label": "military_uniform" },
657 { "label": "milk_can" },
658 { "label": "minibus" },
659 { "label": "miniskirt" },
660 { "label": "minivan" },
661 { "label": "missile" },
662 { "label": "mitten" },
663 { "label": "mixing_bowl" },
664 { "label": "mobile_home" },
665 { "label": "Model_T" },
666 { "label": "modem" },
667 { "label": "monastery" },
668 { "label": "monitor" },
669 { "label": "moped" },
670 { "label": "mortar" },
671 { "label": "mortarboard" },
672 { "label": "mosque" },
673 { "label": "mosquito_net" },
674 { "label": "motor_scooter" },
675 { "label": "mountain_bike" },
676 { "label": "mountain_tent" },
677 { "label": "mouse" },
678 { "label": "mousetrap" },
679 { "label": "moving_van" },
680 { "label": "muzzle" },
681 { "label": "nail" },
682 { "label": "neck_brace" },
683 { "label": "necklace" },
684 { "label": "nipple" },
685 { "label": "notebook" },
686 { "label": "obelisk" },
687 { "label": "oboe" },
688 { "label": "ocarina" },
689 { "label": "odometer" },
690 { "label": "oil_filter" },
691 { "label": "organ" },
692 { "label": "oscilloscope" },
693 { "label": "overskirt" },
694 { "label": "oxcart" },
695 { "label": "oxygen_mask" },
696 { "label": "packet" },
697 { "label": "paddle" },
698 { "label": "paddlewheel" },
699 { "label": "padlock" },
700 { "label": "paintbrush" },
701 { "label": "pajama" },
702 { "label": "palace" },
703 { "label": "panpipe" },
704 { "label": "paper_towel" },
705 { "label": "parachute" },
706 { "label": "parallel_bars" },
707 { "label": "park_bench" },
708 { "label": "parking_meter" },
709 { "label": "passenger_car" },
710 { "label": "patio" },
711 { "label": "pay-phone" },
712 { "label": "pedestal" },
713 { "label": "pencil_box" },
714 { "label": "pencil_sharpener" },
715 { "label": "perfume" },
716 { "label": "Petri_dish" },
717 { "label": "photocopier" },
718 { "label": "pick" },
719 { "label": "pickelhaube" },
720 { "label": "picket_fence" },
721 { "label": "pickup" },
722 { "label": "pier" },
723 { "label": "piggy_bank" },
724 { "label": "pill_bottle" },
725 { "label": "pillow" },
726 { "label": "ping-pong_ball" },
727 { "label": "pinwheel" },
728 { "label": "pirate" },
729 { "label": "pitcher" },
730 { "label": "plane" },
731 { "label": "planetarium" },
732 { "label": "plastic_bag" },
733 { "label": "plate_rack" },
734 { "label": "plow" },
735 { "label": "plunger" },
736 { "label": "Polaroid_camera" },
737 { "label": "pole" },
738 { "label": "police_van" },
739 { "label": "poncho" },
740 { "label": "pool_table" },
741 { "label": "pop_bottle" },
742 { "label": "pot" },
743 { "label": "potter's_wheel" },
744 { "label": "power_drill" },
745 { "label": "prayer_rug" },
746 { "label": "printer" },
747 { "label": "prison" },
748 { "label": "projectile" },
749 { "label": "projector" },
750 { "label": "puck" },
751 { "label": "punching_bag" },
752 { "label": "purse" },
753 { "label": "quill" },
754 { "label": "quilt" },
755 { "label": "racer" },
756 { "label": "racket" },
757 { "label": "radiator" },
758 { "label": "radio" },
759 { "label": "radio_telescope" },
760 { "label": "rain_barrel" },
761 { "label": "recreational_vehicle" },
762 { "label": "reel" },
763 { "label": "reflex_camera" },
764 { "label": "refrigerator" },
765 { "label": "remote_control" },
766 { "label": "restaurant" },
767 { "label": "revolver" },
768 { "label": "rifle" },
769 { "label": "rocking_chair" },
770 { "label": "rotisserie" },
771 { "label": "rubber_eraser" },
772 { "label": "rugby_ball" },
773 { "label": "rule" },
774 { "label": "running_shoe" },
775 { "label": "safe" },
776 { "label": "safety_pin" },
777 { "label": "saltshaker" },
778 { "label": "sandal" },
779 { "label": "sarong" },
780 { "label": "sax" },
781 { "label": "scabbard" },
782 { "label": "scale" },
783 { "label": "school_bus" },
784 { "label": "schooner" },
785 { "label": "scoreboard" },
786 { "label": "screen" },
787 { "label": "screw" },
788 { "label": "screwdriver" },
789 { "label": "seat_belt" },
790 { "label": "sewing_machine" },
791 { "label": "shield" },
792 { "label": "shoe_shop" },
793 { "label": "shoji" },
794 { "label": "shopping_basket" },
795 { "label": "shopping_cart" },
796 { "label": "shovel" },
797 { "label": "shower_cap" },
798 { "label": "shower_curtain" },
799 { "label": "ski" },
800 { "label": "ski_mask" },
801 { "label": "sleeping_bag" },
802 { "label": "slide_rule" },
803 { "label": "sliding_door" },
804 { "label": "slot" },
805 { "label": "snorkel" },
806 { "label": "snowmobile" },
807 { "label": "snowplow" },
808 { "label": "soap_dispenser" },
809 { "label": "soccer_ball" },
810 { "label": "sock" },
811 { "label": "solar_dish" },
812 { "label": "sombrero" },
813 { "label": "soup_bowl" },
814 { "label": "space_bar" },
815 { "label": "space_heater" },
816 { "label": "space_shuttle" },
817 { "label": "spatula" },
818 { "label": "speedboat" },
819 { "label": "spider_web" },
820 { "label": "spindle" },
821 { "label": "sports_car" },
822 { "label": "spotlight" },
823 { "label": "stage" },
824 { "label": "steam_locomotive" },
825 { "label": "steel_arch_bridge" },
826 { "label": "steel_drum" },
827 { "label": "stethoscope" },
828 { "label": "stole" },
829 { "label": "stone_wall" },
830 { "label": "stopwatch" },
831 { "label": "stove" },
832 { "label": "strainer" },
833 { "label": "streetcar" },
834 { "label": "stretcher" },
835 { "label": "studio_couch" },
836 { "label": "stupa" },
837 { "label": "submarine" },
838 { "label": "suit" },
839 { "label": "sundial" },
840 { "label": "sunglass" },
841 { "label": "sunglasses" },
842 { "label": "sunscreen" },
843 { "label": "suspension_bridge" },
844 { "label": "swab" },
845 { "label": "sweatshirt" },
846 { "label": "swimming_trunks" },
847 { "label": "swing" },
848 { "label": "switch" },
849 { "label": "syringe" },
850 { "label": "table_lamp" },
851 { "label": "tank" },
852 { "label": "tape_player" },
853 { "label": "teapot" },
854 { "label": "teddy" },
855 { "label": "television" },
856 { "label": "tennis_ball" },
857 { "label": "thatch" },
858 { "label": "theater_curtain" },
859 { "label": "thimble" },
860 { "label": "thresher" },
861 { "label": "throne" },
862 { "label": "tile_roof" },
863 { "label": "toaster" },
864 { "label": "tobacco_shop" },
865 { "label": "toilet_seat" },
866 { "label": "torch" },
867 { "label": "totem_pole" },
868 { "label": "tow_truck" },
869 { "label": "toyshop" },
870 { "label": "tractor" },
871 { "label": "trailer_truck" },
872 { "label": "tray" },
873 { "label": "trench_coat" },
874 { "label": "tricycle" },
875 { "label": "trimaran" },
876 { "label": "tripod" },
877 { "label": "triumphal_arch" },
878 { "label": "trolleybus" },
879 { "label": "trombone" },
880 { "label": "tub" },
881 { "label": "turnstile" },
882 { "label": "typewriter_keyboard" },
883 { "label": "umbrella" },
884 { "label": "unicycle" },
885 { "label": "upright" },
886 { "label": "vacuum" },
887 { "label": "vase" },
888 { "label": "vault" },
889 { "label": "velvet" },
890 { "label": "vending_machine" },
891 { "label": "vestment" },
892 { "label": "viaduct" },
893 { "label": "violin" },
894 { "label": "volleyball" },
895 { "label": "waffle_iron" },
896 { "label": "wall_clock" },
897 { "label": "wallet" },
898 { "label": "wardrobe" },
899 { "label": "warplane" },
900 { "label": "washbasin" },
901 { "label": "washer" },
902 { "label": "water_bottle" },
903 { "label": "water_jug" },
904 { "label": "water_tower" },
905 { "label": "whiskey_jug" },
906 { "label": "whistle" },
907 { "label": "wig" },
908 { "label": "window_screen" },
909 { "label": "window_shade" },
910 { "label": "Windsor_tie" },
911 { "label": "wine_bottle" },
912 { "label": "wing" },
913 { "label": "wok" },
914 { "label": "wooden_spoon" },
915 { "label": "wool" },
916 { "label": "worm_fence" },
917 { "label": "wreck" },
918 { "label": "yawl" },
919 { "label": "yurt" },
920 { "label": "web_site" },
921 { "label": "comic_book" },
922 { "label": "crossword_puzzle" },
923 { "label": "street_sign" },
924 { "label": "traffic_light" },
925 { "label": "book_jacket" },
926 { "label": "menu" },
927 { "label": "plate" },
928 { "label": "guacamole" },
929 { "label": "consomme" },
930 { "label": "hot_pot" },
931 { "label": "trifle" },
932 { "label": "ice_cream" },
933 { "label": "ice_lolly" },
934 { "label": "French_loaf" },
935 { "label": "bagel" },
936 { "label": "pretzel" },
937 { "label": "cheeseburger" },
938 { "label": "hotdog" },
939 { "label": "mashed_potato" },
940 { "label": "head_cabbage" },
941 { "label": "broccoli" },
942 { "label": "cauliflower" },
943 { "label": "zucchini" },
944 { "label": "spaghetti_squash" },
945 { "label": "acorn_squash" },
946 { "label": "butternut_squash" },
947 { "label": "cucumber" },
948 { "label": "artichoke" },
949 { "label": "bell_pepper" },
950 { "label": "cardoon" },
951 { "label": "mushroom" },
952 { "label": "Granny_Smith" },
953 { "label": "strawberry" },
954 { "label": "orange" },
955 { "label": "lemon" },
956 { "label": "fig" },
957 { "label": "pineapple" },
958 { "label": "banana" },
959 { "label": "jackfruit" },
960 { "label": "custard_apple" },
961 { "label": "pomegranate" },
962 { "label": "hay" },
963 { "label": "carbonara" },
964 { "label": "chocolate_sauce" },
965 { "label": "dough" },
966 { "label": "meat_loaf" },
967 { "label": "pizza" },
968 { "label": "potpie" },
969 { "label": "burrito" },
970 { "label": "red_wine" },
971 { "label": "espresso" },
972 { "label": "cup" },
973 { "label": "eggnog" },
974 { "label": "alp" },
975 { "label": "bubble" },
976 { "label": "cliff" },
977 { "label": "coral_reef" },
978 { "label": "geyser" },
979 { "label": "lakeside" },
980 { "label": "promontory" },
981 { "label": "sandbar" },
982 { "label": "seashore" },
983 { "label": "valley" },
984 { "label": "volcano" },
985 { "label": "ballplayer" },
986 { "label": "groom" },
987 { "label": "scuba_diver" },
988 { "label": "rapeseed" },
989 { "label": "daisy" },
990 { "label": "yellow_lady's_slipper" },
991 { "label": "corn" },
992 { "label": "acorn" },
993 { "label": "hip" },
994 { "label": "buckeye" },
995 { "label": "coral_fungus" },
996 { "label": "agaric" },
997 { "label": "gyromitra" },
998 { "label": "stinkhorn" },
999 { "label": "earthstar" },
1000 { "label": "hen-of-the-woods" },
1001 { "label": "bolete" },
1002 { "label": "ear" },
1003 { "label": "toilet_tissue" }
1004 ]
1005}
diff --git a/examples/mobilenet_subgraph/main.cpp b/examples/mobilenet_subgraph/main.cpp
new file mode 100644
index 0000000..2233d96
--- /dev/null
+++ b/examples/mobilenet_subgraph/main.cpp
@@ -0,0 +1,617 @@
1/******************************************************************************
2 * Copyright (c) 2019, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#include <signal.h>
30#include <iostream>
31#include <iomanip>
32#include <fstream>
33#include <cassert>
34#include <string>
35#include <functional>
36#include <algorithm>
37#include <time.h>
38#include <unistd.h>
39
40#include <queue>
41#include <vector>
42#include <chrono>
43#include <future>
44
45#include "executor.h"
46#include "execution_object.h"
47#include "execution_object_pipeline.h"
48#include "subgraph_runtime.h"
49#include "subgraph_data_conv.h"
50#include "configuration.h"
51#include "../common/object_classes.h"
52#include "imgutil.h"
53#include "../common/video_utils.h"
54#include "thread_pool.h"
55
56#include "opencv2/core.hpp"
57#include "opencv2/imgproc.hpp"
58#include "opencv2/highgui.hpp"
59#include "opencv2/videoio.hpp"
60
61using namespace std;
62using namespace tidl;
63using namespace cv;
64
65#define NUM_VIDEO_FRAMES 300
66#define DEFAULT_CONFIG "j11_v2"
67#define NUM_DEFAULT_INPUTS 1
68#define DEFAULT_OBJECT_CLASSES_LIST_FILE "imagenet_objects.json"
69#define DEFAULT_OUTPUT_PROB_THRESHOLD 5
70#define MOBILENET_IN_C (3)
71#define MOBILENET_IN_H (224)
72#define MOBILENET_IN_W (224)
73#define MOBILENET_INPUT_SIZE (1*MOBILENET_IN_C*MOBILENET_IN_H*MOBILENET_IN_W)
74#define MOBILENET_OUTPUT_SIZE (1001)
75const char *default_inputs[NUM_DEFAULT_INPUTS] =
76{
77 "../test/testvecs/input/objects/cat-pet-animal-domestic-104827.jpeg"
78};
79std::unique_ptr<ObjectClasses> object_classes;
80typedef struct {
81 float **inputs;
82 float **outputs;
83} UserData;
84
85bool RunConfiguration(cmdline_opts_t& opts);
86bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs,
87 int batch_size);
88bool WriteFrameOutput(float *out, const cmdline_opts_t& opts);
89void DisplayHelp();
90void SubgraphUserFunc(void *user_data);
91
92const int num_printed_outputs = 4;
93bool SkipOutputs(int i, int offset, bool &skip_outputs)
94{
95 if (skip_outputs) return true;
96 if (i >= num_printed_outputs + offset)
97 {
98 if (! skip_outputs)
99 {
100 cout << " ... skippping outputs ..." << endl;
101 skip_outputs = true;
102 }
103 }
104 return skip_outputs;
105}
106
107int main(int argc, char *argv[])
108{
109 // Catch ctrl-c to ensure a clean exit
110 signal(SIGABRT, exit);
111 signal(SIGTERM, exit);
112
113 // If there are no devices capable of offloading TIDL on the SoC, exit
114 uint32_t num_eves = Executor::GetNumDevices(DeviceType::EVE);
115 uint32_t num_dsps = Executor::GetNumDevices(DeviceType::DSP);
116 if (num_eves == 0 && num_dsps == 0)
117 {
118 cout << "TI DL not supported on this SoC." << endl;
119 return EXIT_SUCCESS;
120 }
121
122 // Process arguments
123 cmdline_opts_t opts;
124 opts.config = DEFAULT_CONFIG;
125 opts.object_classes_list_file = DEFAULT_OBJECT_CLASSES_LIST_FILE;
126 opts.output_prob_threshold = DEFAULT_OUTPUT_PROB_THRESHOLD;
127 if (num_eves != 0) { opts.num_eves = 1; opts.num_dsps = 0; }
128 else { opts.num_eves = 0; opts.num_dsps = 1; }
129 if (! ProcessArgs(argc, argv, opts))
130 {
131 DisplayHelp();
132 exit(EXIT_SUCCESS);
133 }
134 assert(opts.num_dsps != 0 || opts.num_eves != 0);
135 if (opts.num_frames == 0)
136 opts.num_frames = (opts.is_camera_input || opts.is_video_input) ?
137 NUM_VIDEO_FRAMES : 1;
138 if (opts.input_file.empty())
139 cout << "Input: " << default_inputs[0] << endl;
140 else
141 cout << "Input: " << opts.input_file << endl;
142
143 // Get object classes list
144 object_classes = std::unique_ptr<ObjectClasses>(
145 new ObjectClasses(opts.object_classes_list_file));
146 if (object_classes->GetNumClasses() == 0)
147 {
148 cout << "No object classes defined for this config." << endl;
149 return EXIT_FAILURE;
150 }
151
152 // Run network
153 bool status = RunConfiguration(opts);
154 if (!status)
155 {
156 cout << "imagenet FAILED" << endl;
157 return EXIT_FAILURE;
158 }
159
160 cout << "imagenet PASSED" << endl;
161 return EXIT_SUCCESS;
162}
163
164bool RunConfiguration(cmdline_opts_t& opts)
165{
166 bool status = true;
167
168 // setup camera/video input/output
169 VideoCapture cap;
170 if (! SetVideoInputOutput(cap, opts, "ImageNet")) return false;
171
172 cout << "\n##### Batch size 1 testing ######\n" << endl;
173 try
174 {
175 TidlInitSubgraph(1, 0);
176 float **inputs = new float *[1];
177 inputs[0] = new float[MOBILENET_INPUT_SIZE];
178 float **outputs = new float *[1];
179 outputs[0] = new float[MOBILENET_OUTPUT_SIZE];
180
181 for (int i = 0; i < 5; i ++)
182 {
183 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
184 tloop0 = chrono::steady_clock::now();
185
186 ReadFrame(opts, cap, inputs, 1);
187 TidlRunSubgraph(1, 0, 1, 1, 1, inputs, outputs);
188 WriteFrameOutput(outputs[0], opts);
189
190 tloop1 = chrono::steady_clock::now();
191 chrono::duration<float> elapsed = tloop1 - tloop0;
192 cout << "Frame " << i
193 << " time (including read/write/opencv/print/etc): "
194 << setw(6) << setprecision(4)
195 << (elapsed.count() * 1000) << "ms" << endl;
196 }
197
198 delete [] inputs[0];
199 delete [] inputs;
200 delete [] outputs[0];
201 delete [] outputs;
202 }
203 catch (tidl::Exception &e)
204 {
205 cerr << e.what() << endl;
206 status = false;
207 }
208
209 // If not doing multi-threaded processing, multiply by 2 or more
210 // for a larger batch to amortize batch initilization/tear down cost
211 int preferred_batch_size = TidlGetPreferredBatchSize(1);
212 for (int multiple = 1; multiple <= 16; multiple *= 2)
213 {
214 int batch_size = preferred_batch_size * multiple;
215 cout << "\n##### Batch size " << batch_size << " testing ######\n"
216 << endl;
217 bool skip_outputs = false;
218 try
219 {
220 float **inputs = new float *[batch_size];
221 float **outputs = new float *[batch_size];
222 for (int i = 0; i < batch_size; i++)
223 {
224 inputs[i] = new float[MOBILENET_INPUT_SIZE];
225 outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
226 }
227
228 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
229 tloop0 = chrono::steady_clock::now();
230
231 ReadFrame(opts, cap, inputs, batch_size);
232 TidlRunSubgraph(1, 0, batch_size, 1, 1, inputs, outputs);
233 for (int i = 0; i < batch_size; i++)
234 {
235 if (! SkipOutputs(i, 0, skip_outputs))
236 {
237 cout << "Frame " << i << " of " << batch_size
238 << " output:" << endl;
239 WriteFrameOutput(outputs[i], opts);
240 }
241 }
242
243 tloop1 = chrono::steady_clock::now();
244 chrono::duration<float> elapsed = tloop1 - tloop0;
245 cout << "Batch size " << batch_size
246 << " time: "
247 << setw(6) << setprecision(4)
248 << (elapsed.count() * 1000) << "ms, fps = "
249 << setw(6) << setprecision(4)
250 << (batch_size / elapsed.count())
251 << endl;
252
253 for (int i = 0; i < batch_size; i++)
254 {
255 delete [] inputs[i];
256 delete [] outputs[i];
257 }
258 delete [] inputs;
259 delete [] outputs;
260 }
261 catch (tidl::Exception &e)
262 {
263 cerr << e.what() << endl;
264 status = false;
265 }
266 }
267
268 // This is to test the multithreaded inference with async/future
269 // async/future has slightly worse threading performance than
270 // thread pool, however, it is much easier to program
271 cout << "\n##### Multithreaded inference testing (async/future) #####\n"
272 << endl;
273 int num_threads = TidlGetPreferredBatchSize(1) * 2;
274 int num_iters = 100;
275 try
276 {
277 float **inputs = new float *[num_threads];
278 float **outputs = new float *[num_threads];
279 for (int i = 0; i < num_threads; i++)
280 {
281 inputs[i] = new float[MOBILENET_INPUT_SIZE];
282 outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
283 }
284 vector<future<bool>> futures(num_threads);
285 bool skip_outputs = false;
286
287 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
288 tloop0 = chrono::steady_clock::now();
289
290 for (int i = 0; i < num_iters + num_threads; i++)
291 {
292 int index = i % num_threads;
293 if (i >= num_threads)
294 {
295 if (futures[index].get())
296 {
297 if (! SkipOutputs(i, num_threads, skip_outputs))
298 WriteFrameOutput(outputs[index], opts);
299 }
300 }
301
302 if (i < num_iters)
303 {
304 ReadFrame(opts, cap, &inputs[index], 1);
305 futures[index] = std::async(std::launch::async,
306 [inputs, outputs](int index) {
307 TidlRunSubgraph(1, 0, 1, 1, 1,
308 &inputs[index], &outputs[index]);
309 return true;
310 },
311 index);
312 }
313 }
314
315 tloop1 = chrono::steady_clock::now();
316 chrono::duration<float> elapsed = tloop1 - tloop0;
317 cout << "Multithreaded (num_threads=" << num_threads
318 << ", batch_size=1) loop time (" << num_iters << " frames): "
319 << setw(6) << setprecision(4)
320 << (elapsed.count() * 1000) << "ms, fps = "
321 << setw(6) << setprecision(4)
322 << (num_iters / elapsed.count())
323 << endl;
324
325 for (int i = 0; i < num_threads; i++)
326 {
327 delete [] inputs[i];
328 delete [] outputs[i];
329 }
330 delete [] inputs;
331 delete [] outputs;
332 }
333 catch (tidl::Exception &e)
334 {
335 cerr << e.what() << endl;
336 status = false;
337 }
338
339 // This is to test the multithreaded inference with a thread pool
340 cout << "\n##### Multithreaded inference testing (thread pool) #####\n"
341 << endl;
342 try
343 {
344 float **inputs = new float *[num_threads];
345 float **outputs = new float *[num_threads];
346 vector<UserData> v_data(num_threads);
347 for (int i = 0; i < num_threads; i++)
348 {
349 inputs[i] = new float[MOBILENET_INPUT_SIZE];
350 outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
351 v_data[i].inputs = &inputs[i];
352 v_data[i].outputs = &outputs[i];
353 }
354 ThPool pool(num_threads, SubgraphUserFunc);
355 vector<int> th_ids(num_threads);
356 bool skip_outputs = false;
357
358 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
359 tloop0 = chrono::steady_clock::now();
360
361 for (int i = 0; i < num_iters + num_threads; i++)
362 {
363 int index = i % num_threads;
364 if (i >= num_threads)
365 {
366 UserData *data = (UserData *) pool.Wait(th_ids[index]);
367 if (! SkipOutputs(i, num_threads, skip_outputs))
368 WriteFrameOutput(data->outputs[0], opts);
369 }
370
371 if (i < num_iters)
372 {
373 ReadFrame(opts, cap, &inputs[index], 1);
374 th_ids[index] = pool.RunAsync(&v_data[index]);
375 }
376 }
377
378 tloop1 = chrono::steady_clock::now();
379 chrono::duration<float> elapsed = tloop1 - tloop0;
380 cout << "Multithreaded (num_threads=" << num_threads
381 << ", batch_size=1) loop time (" << num_iters << " frames): "
382 << setw(6) << setprecision(4)
383 << (elapsed.count() * 1000) << "ms, fps = "
384 << setw(6) << setprecision(4)
385 << (num_iters / elapsed.count())
386 << endl;
387
388 for (int i = 0; i < num_threads; i++)
389 {
390 delete [] inputs[i];
391 delete [] outputs[i];
392 }
393 delete [] inputs;
394 delete [] outputs;
395 }
396 catch (tidl::Exception &e)
397 {
398 cerr << e.what() << endl;
399 status = false;
400 }
401
402 num_threads = 2;
403 int batch_size = preferred_batch_size;
404 // This is to test the multithreaded batch inference with async/future
405 // Ideally, batch_size * num_threads <= number of threads
406 cout << "\n##### Multithreaded batch inference testing (async/future)"
407 << " #####\n" << endl;
408 try
409 {
410 float **inputs = new float *[num_threads * batch_size];
411 float **outputs = new float *[num_threads * batch_size];
412 for (int i = 0; i < num_threads * batch_size; i++)
413 {
414 inputs[i] = new float[MOBILENET_INPUT_SIZE];
415 outputs[i] = new float[MOBILENET_OUTPUT_SIZE];
416 }
417 vector<future<bool>> futures(num_threads);
418 bool skip_outputs = false;
419
420 chrono::time_point<chrono::steady_clock> tloop0, tloop1;
421 tloop0 = chrono::steady_clock::now();
422
423 for (int i = 0; i < num_iters/batch_size + num_threads; i++)
424 {
425 int index = i % num_threads;
426 if (i >= num_threads)
427 {
428 if (futures[index].get())
429 if (! SkipOutputs(i*batch_size, num_threads*batch_size,
430 skip_outputs))
431 for (int b = 0; b < batch_size; b++)
432 WriteFrameOutput(outputs[index*batch_size+b], opts);
433 }
434
435 if (i < num_iters/batch_size)
436 {
437 ReadFrame(opts, cap, &inputs[index*batch_size], batch_size);
438 futures[index] = std::async(std::launch::async,
439 [inputs, outputs, batch_size](int index) {
440 TidlRunSubgraph(1, 0, batch_size, 1, 1,
441 &inputs[index*batch_size],
442 &outputs[index*batch_size]);
443 return true;
444 },
445 index);
446 }
447 }
448
449 tloop1 = chrono::steady_clock::now();
450 chrono::duration<float> elapsed = tloop1 - tloop0;
451 cout << "Multithreaded batch (num_threads=" << num_threads
452 << ", batch_size=" << batch_size
453 << ") loop time (" << num_iters << " frames): "
454 << setw(6) << setprecision(4)
455 << (elapsed.count() * 1000) << "ms, fps = "
456 << setw(6) << setprecision(4)
457 << (num_iters / elapsed.count())
458 << endl;
459
460 for (int i = 0; i < num_threads * batch_size; i++)
461 {
462 delete [] inputs[i];
463 delete [] outputs[i];
464 }
465 delete [] inputs;
466 delete [] outputs;
467 }
468 catch (tidl::Exception &e)
469 {
470 cerr << e.what() << endl;
471 status = false;
472 }
473
474
475 return status;
476}
477
478void SubgraphUserFunc(void *user_data)
479{
480 UserData *data = (UserData *) user_data;
481 //printf("data inputs = %p, outputs = %p\n", data->inputs, data->outputs);
482 TidlRunSubgraph(1, 0, 1, 1, 1, data->inputs, data->outputs);
483 //printf("TidlRunSubgraph finished\n");
484}
485
486bool ReadFrame(const cmdline_opts_t& opts, VideoCapture &cap, float** inputs,
487 int batch_size)
488{
489 Configuration c;
490 c.inNumChannels = MOBILENET_IN_C;
491 c.inWidth = MOBILENET_IN_W;
492 c.inHeight = MOBILENET_IN_H;
493 c.preProcType = 2;
494 SubgraphDataConv in_conv{{0}, {true}, {128.0f}, {false},
495 {1,MOBILENET_IN_C,MOBILENET_IN_H,MOBILENET_IN_W}};
496
497 char* frame_buffer = new char[MOBILENET_INPUT_SIZE];
498 assert (frame_buffer != nullptr);
499
500 Mat image;
501 if (! opts.is_camera_input && ! opts.is_video_input)
502 {
503 if (opts.input_file.empty())
504 image = cv::imread(default_inputs[0],
505 CV_LOAD_IMAGE_COLOR);
506 else
507 image = cv::imread(opts.input_file, CV_LOAD_IMAGE_COLOR);
508 if (image.empty())
509 {
510 cerr << "Unable to read input image" << endl;
511 return false;
512 }
513 }
514 else
515 {
516 Mat v_image;
517 if (! cap.grab()) return false;
518 if (! cap.retrieve(v_image)) return false;
519 int orig_width = v_image.cols;
520 int orig_height = v_image.rows;
521 // Crop camera/video input to center 256x256 input
522 if (orig_width > 256 && orig_height > 256)
523 {
524 image = Mat(v_image, Rect((orig_width-256)/2, (orig_height-256)/2,
525 256, 256));
526 }
527 else
528 image = v_image;
529 cv::imshow("ImageNet", image);
530 waitKey(2);
531 }
532
533 // TI DL image preprocessing, into frame_buffer
534 bool status = imgutil::PreprocessImage(image, frame_buffer, c);
535 for (int i = 0; i < batch_size; i++)
536 {
537 std::vector<float *> in_data_v{inputs[i]};
538 in_conv.ScaleDequant((const uint8_t *)frame_buffer, in_data_v);
539 }
540 delete [] frame_buffer;
541 return status;
542}
543
544// Display top 5 classified imagenet classes with probabilities 5% or higher
545bool WriteFrameOutput(float *out, const cmdline_opts_t& opts)
546{
547 const int k = 5;
548 int out_size = 1001;
549 // Tensorflow trained network outputs 1001 probabilities,
550 // with 0-index being background, thus we need to subtract 1 when
551 // reporting classified object from 1000 categories
552 int background_offset = out_size == 1001 ? 1 : 0;
553
554 // sort and get k largest values and corresponding indices
555 typedef pair<float, int> val_index;
556 auto cmp = [](val_index &left, val_index &right)
557 { return left.first > right.first; };
558 priority_queue<val_index, vector<val_index>, decltype(cmp)> queue(cmp);
559
560 // initialize priority queue with smallest value on top
561 for (int i = 0; i < k; i++)
562 queue.push(val_index(out[i], i));
563
564 // for rest output, if larger than current min, pop min, push new val
565 for (int i = k; i < out_size; i++)
566 {
567 if (out[i] > queue.top().first)
568 {
569 queue.pop();
570 queue.push(val_index(out[i], i));
571 }
572 }
573
574 // output top k values in reverse order: largest val first
575 vector<val_index> sorted;
576 while (! queue.empty())
577 {
578 sorted.push_back(queue.top());
579 queue.pop();
580 }
581
582 for (int i = k - 1; i >= 0; i--)
583 {
584 if (sorted[i].first * 100 < opts.output_prob_threshold) break;
585 int imagenet_index = sorted[i].second - background_offset;
586 cout << k-i << ": [" << imagenet_index << "] "
587 << object_classes->At(imagenet_index).label
588 << ", prob = " << setprecision(4)
589 << (sorted[i].first * 100) << "%" << endl;
590 }
591
592 return true;
593}
594
595void DisplayHelp()
596{
597 cout <<
598 "Usage: imagenet\n"
599 " Will run imagenet network to predict top 5 object"
600 " classes for the input.\n Use -c to run a"
601 " different imagenet network. Default is j11_v2.\n"
602 "Optional arguments:\n"
603 " -c <config> Valid configs: j11_bn, j11_prelu, j11_v2\n"
604 " -d <number> Number of dsp cores to use\n"
605 " -e <number> Number of eve cores to use\n"
606 " -i <image> Path to the image file as input\n"
607 " -i camera<number> Use camera as input\n"
608 " video input port: /dev/video<number>\n"
609 " -i <name>.{mp4,mov,avi} Use video file as input\n"
610 " -l <objects_list> Path to the object classes list file\n"
611 " -f <number> Number of frames to process\n"
612 " -p <number> Output probablity threshold in percentage\n"
613 " Default is 5 percent or higher.\n"
614 " -v Verbose output during execution\n"
615 " -h Help\n";
616}
617
diff --git a/examples/mobilenet_subgraph/subgraph0.cfg b/examples/mobilenet_subgraph/subgraph0.cfg
new file mode 100644
index 0000000..404c70d
--- /dev/null
+++ b/examples/mobilenet_subgraph/subgraph0.cfg
@@ -0,0 +1,20 @@
1numFrames = 1
2preProcType = 2
3inData = "../test/testvecs/input/preproc_2_224x224.y"
4outData = "./stats_tool_out.bin"
5netBinFile = "../test/testvecs/config/tidl_models/tidl_net_mobilenet_1_224.bin"
6paramsBinFile = "../test/testvecs/config/tidl_models/tidl_param_mobilenet_1_224.bin"
7inWidth = 224
8inHeight = 224
9inNumChannels = 3
10# The following information should be space separated list,
11# corresponding to vector of inputs and vector of outputs
12# Quant_value = float_value * scaleF2Q
13inConvType = 0
14inIsSigned = 1
15inScaleF2Q = 128.0
16inIsNCHW = 0
17outConvType = 0
18outIsSigned = 0
19outScaleF2Q = 255.0
20outIsNCHW = 1
diff --git a/examples/mobilenet_subgraph/thread_pool.cpp b/examples/mobilenet_subgraph/thread_pool.cpp
new file mode 100644
index 0000000..ee25aea
--- /dev/null
+++ b/examples/mobilenet_subgraph/thread_pool.cpp
@@ -0,0 +1,144 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#include "thread_pool.h"
30
31using namespace std;
32using namespace tidl;
33
34void ThFunc(int th_id, ThPool* pool)
35{
36 while (true)
37 {
38 // wait on th_id
39 pool->WaitForWork(th_id);
40
41 // check stop condition
42 if (pool->Stop()) return;
43
44 // Run user func
45 pool->RunUserFunc(th_id);
46
47 // notify completition
48 pool->NotifyCompletion(th_id);
49 }
50}
51
52ThPool::ThPool(int num_threads, UserFunc user_func) :
53 num_threads_m(num_threads),
54 user_func_m(user_func),
55 stop_m(false),
56 pool_m(num_threads),
57 pool_state_m((1ULL << num_threads) - 1),
58 v_mutex_th_m(num_threads),
59 v_cv_th_work_m(num_threads),
60 v_cv_th_completion_m(num_threads),
61 v_user_data_m(num_threads, nullptr),
62 v_completion_data_m(num_threads, nullptr)
63{
64 for (int i = 0; i < num_threads_m; i++)
65 {
66 pool_m[i] = thread(ThFunc, i, this);
67 }
68}
69
70ThPool::~ThPool()
71{
72 stop_m = true;
73 for (auto& data : v_user_data_m) data = &stop_m;
74 for (auto& cv : v_cv_th_work_m) cv.notify_all();
75 for (auto& th : pool_m) th.join();
76}
77
78int ThPool::RunAsync(void *user_data)
79{
80 int th_id = -1;
81 {
82 std::unique_lock<std::mutex> lock(mutex_pool_m);
83 cv_pool_m.wait(lock, [this]{ return this->pool_state_m != 0; });
84 // find first 1 bit
85 for (int i = 0; i < num_threads_m; i++)
86 if (pool_state_m & (1 << i))
87 {
88 th_id = i;
89 break;
90 }
91 pool_state_m &= (~ (1 << th_id));
92 }
93
94 {
95 std::unique_lock<std::mutex> lock(v_mutex_th_m[th_id]);
96 v_user_data_m[th_id] = user_data;
97 }
98 v_cv_th_work_m[th_id].notify_all();
99 return th_id;
100}
101
102void* ThPool::Wait(int th_id)
103{
104 void *user_data = nullptr;
105
106 {
107 std::unique_lock<std::mutex> lock(v_mutex_th_m[th_id]);
108 v_cv_th_completion_m[th_id].wait(lock, [this, th_id]{
109 return this->v_completion_data_m[th_id] != nullptr; });
110 user_data = v_completion_data_m[th_id];
111 v_completion_data_m[th_id] = nullptr;
112 }
113
114 {
115 std::unique_lock<std::mutex> lock(mutex_pool_m);
116 pool_state_m |= (1 << th_id);
117 }
118 cv_pool_m.notify_all();
119
120 return user_data;
121}
122
123
124void ThPool::RunUserFunc(int th_id)
125{
126 user_func_m(v_user_data_m[th_id]);
127}
128
129void ThPool::WaitForWork(int th_id)
130{
131 std::unique_lock<std::mutex> lock(v_mutex_th_m[th_id]);
132 v_cv_th_work_m[th_id].wait(lock, [this, th_id]{
133 return this->v_user_data_m[th_id] != nullptr; });
134}
135
136void ThPool::NotifyCompletion(int th_id)
137{
138 {
139 std::unique_lock<std::mutex> lock(v_mutex_th_m[th_id]);
140 v_completion_data_m[th_id] = v_user_data_m[th_id];
141 v_user_data_m[th_id] = nullptr;
142 }
143 v_cv_th_completion_m[th_id].notify_all();
144}
diff --git a/examples/mobilenet_subgraph/thread_pool.h b/examples/mobilenet_subgraph/thread_pool.h
new file mode 100644
index 0000000..0a3f60d
--- /dev/null
+++ b/examples/mobilenet_subgraph/thread_pool.h
@@ -0,0 +1,77 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#pragma once
30
31#include <vector>
32#include <mutex>
33#include <condition_variable>
34#include <thread>
35
36using namespace std;
37
38namespace tidl {
39
40#define TIDL_MAX_NUM_THREADS 32
41
42typedef void(*UserFunc)(void *user_data);
43
44class ThPool {
45 public:
46 ThPool(int num_threads, UserFunc user_func);
47 ~ThPool();
48 // returns th_id that can be used for Wait()
49 int RunAsync(void* user_data);
50 void* Wait(int th_id);
51
52 // Run by threaded function
53 bool Stop() { return stop_m; }
54 void RunUserFunc(int th_id);
55 void WaitForWork(int th_id);
56 void NotifyCompletion(int th_id);
57
58 private:
59
60 int num_threads_m;
61 UserFunc user_func_m;
62 bool stop_m;
63 vector<thread> pool_m;
64 mutex mutex_pool_m;
65 condition_variable cv_pool_m;
66 // bit vector for availability, up to 32 threads, 1: avail, 0: not avail
67 int32_t pool_state_m;
68
69 vector<mutex> v_mutex_th_m;
70 vector<condition_variable> v_cv_th_work_m;
71 vector<condition_variable> v_cv_th_completion_m;
72
73 vector<void *> v_user_data_m;
74 vector<void *> v_completion_data_m;
75};
76
77} // namespace tidl
diff --git a/tidl_api/Makefile b/tidl_api/Makefile
index 8da13e4..a04e604 100644
--- a/tidl_api/Makefile
+++ b/tidl_api/Makefile
@@ -26,10 +26,13 @@
26PYTHON_INCLUDE_DIR ?= $(wildcard $(TARGET_ROOTDIR)/usr/include/python3*) 26PYTHON_INCLUDE_DIR ?= $(wildcard $(TARGET_ROOTDIR)/usr/include/python3*)
27 27
28LIB_NAME = tidl_api.a 28LIB_NAME = tidl_api.a
29SHARED_LIB_NAME = libtidl_api.so
29LIB_IMGUTIL_NAME = tidl_imgutil.a 30LIB_IMGUTIL_NAME = tidl_imgutil.a
31SHARED_LIB_IMGUTIL_NAME = libtidl_imgutil.so
30PY_LIB_NAME = tidl.so 32PY_LIB_NAME = tidl.so
31 33
32all: $(LIB_NAME) $(LIB_IMGUTIL_NAME) $(PY_LIB_NAME) 34all: $(LIB_NAME) $(LIB_IMGUTIL_NAME) $(PY_LIB_NAME) \
35 $(SHARED_LIB_NAME) $(SHARED_LIB_IMGUTIL_NAME)
33 36
34include make.inc 37include make.inc
35include make.buildid 38include make.buildid
@@ -40,7 +43,8 @@ AR = ar
40 43
41SRCS = ocl_device.cpp configuration_parser.cpp configuration.cpp\ 44SRCS = ocl_device.cpp configuration_parser.cpp configuration.cpp\
42 executor.cpp execution_object.cpp trace.cpp util.cpp \ 45 executor.cpp execution_object.cpp trace.cpp util.cpp \
43 execution_object_pipeline.cpp 46 execution_object_pipeline.cpp \
47 subgraph_runtime.cpp subgraph_data_conv.cpp
44SRCS_IMGUTIL = imgutil.cpp 48SRCS_IMGUTIL = imgutil.cpp
45SRCS_PYBIND = pybind_eo.cpp pybind_eop.cpp pybind_executor.cpp \ 49SRCS_PYBIND = pybind_eo.cpp pybind_eop.cpp pybind_executor.cpp \
46 pybind_configuration.cpp pybind_helpers.cpp 50 pybind_configuration.cpp pybind_helpers.cpp
@@ -90,12 +94,18 @@ obj/%.o: src/%.cpp $(HEADERS)
90$(LIB_NAME): $(HOST_OBJ_FILES) 94$(LIB_NAME): $(HOST_OBJ_FILES)
91 $(AR) cr $@ $(HOST_OBJ_FILES) 95 $(AR) cr $@ $(HOST_OBJ_FILES)
92 96
97$(SHARED_LIB_NAME): $(HOST_OBJ_FILES)
98 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL $(HOST_OBJ_FILES) -o $@
99
93$(PY_LIB_NAME): $(HOST_OBJ_PYBIND_FILES) $(LIB_NAME) 100$(PY_LIB_NAME): $(HOST_OBJ_PYBIND_FILES) $(LIB_NAME)
94 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL -locl_util $^ -o $@ 101 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared -lOpenCL -locl_util $^ -o $@
95 102
96$(LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES) 103$(LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
97 $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES) 104 $(AR) cr $@ $(HOST_OBJ_IMGUTIL_FILES)
98 105
106$(SHARED_LIB_IMGUTIL_NAME): $(HOST_OBJ_IMGUTIL_FILES)
107 $(CXX) $(CXXFLAGS) -Wl,-Bsymbolic -shared $(HOST_OBJ_IMGUTIL_FILES) -o $@
108
99clean:: 109clean::
100 $(RM) -f $(LIB_NAME) $(PY_LIB_NAME) 110 $(RM) -f $(LIB_NAME) $(PY_LIB_NAME)
101 $(RM) -f $(LIB_IMGUTIL_NAME) 111 $(RM) -f $(LIB_IMGUTIL_NAME)
diff --git a/tidl_api/inc/configuration.h b/tidl_api/inc/configuration.h
index 0a1c77c..c76ba7f 100644
--- a/tidl_api/inc/configuration.h
+++ b/tidl_api/inc/configuration.h
@@ -32,6 +32,7 @@
32 32
33#include <string> 33#include <string>
34#include <map> 34#include <map>
35#include <vector>
35#include <iostream> 36#include <iostream>
36 37
37namespace tidl { 38namespace tidl {
@@ -145,6 +146,32 @@ class Configuration
145 //! Margin added to the average in percentage. 146 //! Margin added to the average in percentage.
146 int quantMargin; 147 int quantMargin;
147 148
149 //! subgraph data conversion type at subgraph inputs
150 //! 0: float <-> Q, 1: float <-> float, 2: Q <-> Q
151 std::vector<int> inConvType;
152
153 //! subgraph is signed data at subgraph inputs
154 std::vector<int> inIsSigned;
155
156 //! subgraph scaleF2Q factor at subgraph inputs
157 std::vector<float> inScaleF2Q;
158
159 //! subgraph is external tensor NCHW layout at subgraph inputs
160 std::vector<int> inIsNCHW;
161
162 //! subgraph data conversion type at subgraph outputs
163 //! 0: float <-> Q, 1: float <-> float, 2: Q <-> Q
164 std::vector<int> outConvType;
165
166 //! subgraph is signed data at subgraph outputs
167 std::vector<int> outIsSigned;
168
169 //! subgraph scaleF2Q factor at subgraph outputs
170 std::vector<float> outScaleF2Q;
171
172 //! subgraph is external tensor NCHW layout at subgraph outputs
173 std::vector<int> outIsNCHW;
174
148 //! Default constructor. 175 //! Default constructor.
149 Configuration(); 176 Configuration();
150 177
diff --git a/tidl_api/inc/subgraph_data_conv.h b/tidl_api/inc/subgraph_data_conv.h
new file mode 100644
index 0000000..dee53e5
--- /dev/null
+++ b/tidl_api/inc/subgraph_data_conv.h
@@ -0,0 +1,144 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#pragma once
30
31#include <stdint.h>
32#include <vector>
33
34namespace tidl {
35
36/* @class SubgraphDataConv
37 @brief Handles data conversion at subgraph boundaries
38 At calibration time, consume either external input or external
39 output tensors, determine sign and scaling factor.
40 At inference time, use sign and scaling factor to perform data
41 conversion between TIDL tensors and external tensors
42
43 Example use for EstScaleQuant:
44 SubgraphDataConv conv({}, {}, {}, {1,3,64,64,1,3,28,28});
45 conv.EstScaleQuant(in);
46 WriteQuantizationParams(conv.GetIsSigned(), conv.getScaleQ());
47 conv.ScaleQuant(in, out);
48
49 Example use for EstScaleDequant:
50 SubgraphDataConv conv({}, {}, {}, {1,3,64,64,1,3,28,28});
51 conv.EstScaleDeQuant(out);
52 WriteDeQuantizationParams(conv.GetIsSigned(), conv.getScaleQ());
53
54 Example use for ScaleQuant:
55 // one time setup
56 ... Parse json file for is_signed, scaleQ, is_NCHW, dims ...
57 SubgraphDataConv conv(is_signed, scaleQ, is_NCHW, dims);
58
59 // per inference
60 out = eop.GetInputBufferPtr();
61 conv.ScaleQuant(in, out);
62 eop.ProcessFrameStartAsync();
63
64 Example use for ScaleDeQuant:
65 // one time setup
66 ... Parse json file for is_signed, scaleQ, is_NCHW, dims ...
67 SubgraphDataConv conv(is_signed, scaleQ, is_NCHW, dims);
68
69 // per inference
70 eop.ProcessFrameWait();
71 in = eop.GetOutputBufferPtr();
72 conv.ScaleDeQuant(in, out);
73*/
74class SubgraphDataConv
75{
76 public:
77 enum ConvType {
78 FLOAT_Q = 0, // conversion between float <-> Q
79 FLOAT_FLOAT = 1, // conversion between float <-> float
80 Q_Q = 2 // conversion between Q <-> Q
81 };
82
83 //! @brief Creates a SubgraphDataConv.
84 //! @param None
85 SubgraphDataConv() {}
86
87 SubgraphDataConv(const std::vector<int>& conv_type,
88 const std::vector<bool>& is_signed,
89 const std::vector<float>& scaleQ,
90 const std::vector<bool>& is_NCHW,
91 const std::vector<int>& dims
92 ) : conv_type_m(conv_type),
93 is_signed_m(is_signed), scaleQ_m(scaleQ),
94 is_NCHW_m(is_NCHW), dims_m(dims)
95 {}
96
97 const std::vector<bool>& GetIsSigned() { return is_signed_m; }
98 const std::vector<float>& GetScaleQ() { return scaleQ_m; }
99 const std::vector<bool>& GetIsNCHW() { return is_NCHW_m; }
100
101 //! @brief Estimate parameters for Quantization
102 //! @param in vector of floating point external tensor data at input
103 void EstScaleQuant(const std::vector<float*>& in);
104
105 //! @brief Estimate paramters for DeQuantization
106 //! @param out vector of floating point external tensor data at output
107 void EstScaleDequant(const std::vector<float*>& out);
108
109 //! @brief Quantizes floating point {in} to 8-bit Quantized {out}
110 //! and transposes buffer from NHWC to NCHW format (if needed),
111 //! results are put into out pointer consecutively, as expected
112 //! by TIDL
113 //! @param in floating point vector input to quantize
114 //! @param out 8-bit Quantized output (quantized from in)
115 void ScaleQuant(const std::vector<float*>& in, uint8_t* out) const;
116
117 //! @brief De-Quantizes 8-bit Quantized {in} to floating point {out}
118 //! and transposes buffer from NCHW to NHWC format (if needed),
119 //! the results are put into out vector, one vector per
120 //! tensor, as expected by external tensors
121 //! @param in 8-bit Quantized input to De-Quantize
122 //! @param out floating point output (De-Quantized from in)
123 void ScaleDequant(const uint8_t *in, std::vector<float*>& out) const;
124
125 private:
126 //! data type conversion, 0: float <-> Q, 1: float <-> float, 2: Q <-> Q
127 std::vector<int> conv_type_m;
128
129 //! if tensor needs to be evaluated as signed char (if float <-> Q)
130 std::vector<bool> is_signed_m;
131
132 //! Q value for Quantization and Dequantization (if float <-> Q)
133 std::vector<float> scaleQ_m;
134
135 //! the format of external tensors, NCHW or NHWC
136 //! if data needs to be transposed between TIDL NCHW tensors and
137 //! external tensors
138 std::vector<bool> is_NCHW_m;
139
140 //! flattened 4d dims of external tensors
141 std::vector<int> dims_m;
142};
143
144} // namespace tidl
diff --git a/tidl_api/inc/subgraph_runtime.h b/tidl_api/inc/subgraph_runtime.h
new file mode 100644
index 0000000..c75d6b2
--- /dev/null
+++ b/tidl_api/inc/subgraph_runtime.h
@@ -0,0 +1,119 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29//! @file subgraph_runtime.h
30
31#pragma once
32
33extern "C" {
34
35//! @brief Top level API to get preferred batch_size for a subgraph
36//! Best performance comes with preferred batch_size processing
37//! plus multi-threaded (num_threads = 2) processing
38//! @param total_subgraphs total number of TIDL subgraphs in whole inference
39//! @return preferred batch size
40extern int TidlGetPreferredBatchSize(int total_subgraphs);
41
42//! @brief Top level API to initialize a TIDL subgraph on device
43//! If not invoked ahead of time, TidlRunSubgraph() will call this
44//! function before any inference
45//! @param total_subgraphs total number of TIDL subgraphs in whole inference
46//! @param subgraph_id index of current TIDL subgraph
47extern void TidlInitSubgraph(int total_subgraphs,
48 int subgraph_id
49 );
50
51//! @brief Top level API to free a TIDL subgraph on device
52//! @param total_subgraphs total number of TIDL subgraphs in whole inference
53//! @param subgraph_id index of current TIDL subgraph
54extern void TidlFreeSubgraph(int total_subgraphs,
55 int subgraph_id
56 );
57
58//! @brief Top level inference to run a TIDL subgraph
59//! @param total_subgraphs total number of TIDL subgraphs in whole inference
60//! @param subgraph_id index of current TIDL subgraph
61//! @param batch_size number of samples/inferences in this batch
62//! @param num_inputs_per_inference number of inputs to TIDL subgraph
63//! for every sample/inference
64//! @param num_outputs_per_inference number of outputs from TIDL subgraph
65//! for every sample/inference
66//! @param input_tensors input data to TIDL subgraph, layout as
67//! batch1_input1, batch1_input2, ..., batch1_inputM,
68//! ... ... ...
69//! batchN_input1, batchN_input2, ..., batchN_inputM
70//! @param output_tensors output data from TIDL subgraph, layout as
71//! batch1_output1, batch1_output2, ..., batch1_outputK,
72//! ... ... ...
73//! batchN_output1, batchN_output2, ..., batchN_outputK
74extern void TidlRunSubgraph(int total_subgraphs,
75 int subgraph_id,
76 int batch_size,
77 int num_inputs_per_inference,
78 int num_outputs_per_inference,
79 float **input_tensors,
80 float **output_tensors
81 );
82
83} // extern "C"
84
85
86#if 0
87// Auto-generated code from Relay/TVM compilation step after
88// partitioning and lowering to backend implementation
89
90void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
91 int num_input_tensors, int num_output_tensors,
92 PackedArgs args)
93{
94 float** in_data = new float*[num_inputs_per_inference * batch_size];
95 float** out_data = new float*[num_outputs_per_inference * batch_size];
96
97 for (in j = 0; j < batch_size; j++)
98 {
99 for (int i = 0; i < num_inputs_per_inference + num_outputs_per_inference;
100 i++)
101 if (i < num_inputs_per_inference)
102 in_data[j * num_inputs_per_inference + i] = args.data[i][j];
103 else
104 out_data[j * num_outpus_per_inference + i - num_inputs_per_inference]
105 = args.data[i][j];
106 }
107
108 // call into this function in libtidl.so
109 // dlopen("libtidl_api.so")
110 // TidlFunc = dlsym("TidlRunSubgraph");
111 (*TidlFunc)(total_subgraphs, subgraph_id, batch_size
112 num_inputs_per_inference, num_outputs_per_inference,
113 in_data, out_data);
114
115 delete [] in_data;
116 delete [] out_data;
117}
118#endif
119
diff --git a/tidl_api/make.buildid b/tidl_api/make.buildid
index dbdfda4..c2efbc5 100644
--- a/tidl_api/make.buildid
+++ b/tidl_api/make.buildid
@@ -25,8 +25,8 @@
25# THE POSSIBILITY OF SUCH DAMAGE. 25# THE POSSIBILITY OF SUCH DAMAGE.
26 26
27MAJOR_VER=1 27MAJOR_VER=1
28MINOR_VER=3 28MINOR_VER=4
29PATCH_VER=3 29PATCH_VER=0
30 30
31ifeq ($(shell git rev-parse --short HEAD 2>&1 1>/dev/null; echo $$?),0) 31ifeq ($(shell git rev-parse --short HEAD 2>&1 1>/dev/null; echo $$?),0)
32BUILD_SHA?=$(shell git rev-parse --short HEAD) 32BUILD_SHA?=$(shell git rev-parse --short HEAD)
diff --git a/tidl_api/src/configuration_parser.cpp b/tidl_api/src/configuration_parser.cpp
index f457560..3ad0d2c 100644
--- a/tidl_api/src/configuration_parser.cpp
+++ b/tidl_api/src/configuration_parser.cpp
@@ -52,6 +52,7 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
52 ConfigParser(Configuration &x) : ConfigParser::base_type(entry) 52 ConfigParser(Configuration &x) : ConfigParser::base_type(entry)
53 { 53 {
54 using qi::int_; 54 using qi::int_;
55 using qi::float_;
55 using qi::bool_; 56 using qi::bool_;
56 using qi::lit; 57 using qi::lit;
57 using qi::lexeme; 58 using qi::lexeme;
@@ -66,6 +67,10 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
66 path %= lexeme[+(char_ - '"')]; 67 path %= lexeme[+(char_ - '"')];
67 q_path = qi::omit[*char_('"')] >> path >> qi::omit[*char_('"')]; 68 q_path = qi::omit[*char_('"')] >> path >> qi::omit[*char_('"')];
68 69
70 // Rules for parsing subgraph data conversion information
71 intvec = int_ >> *int_;
72 floatvec = float_ >> *float_;
73
69 // Grammar for parsing configuration file 74 // Grammar for parsing configuration file
70 entry %= 75 entry %=
71 lit("layerIndex2LayerGroupId") >> '=' >> 76 lit("layerIndex2LayerGroupId") >> '=' >>
@@ -85,7 +90,15 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
85 int_[ph::ref(x.quantHistoryParam1)= _1] | 90 int_[ph::ref(x.quantHistoryParam1)= _1] |
86 lit("quantHistoryParam2") >> '=' >> 91 lit("quantHistoryParam2") >> '=' >>
87 int_[ph::ref(x.quantHistoryParam2)= _1] | 92 int_[ph::ref(x.quantHistoryParam2)= _1] |
88 lit("quantMargin") >> '=' >> int_[ph::ref(x.quantMargin)= _1] 93 lit("quantMargin") >> '=' >> int_[ph::ref(x.quantMargin)= _1] |
94 lit("inConvType") >> '=' >> intvec[ph::ref(x.inConvType) = _1] |
95 lit("inIsSigned") >> '=' >> intvec[ph::ref(x.inIsSigned) = _1] |
96 lit("inScaleF2Q") >> '=' >> floatvec[ph::ref(x.inScaleF2Q) = _1] |
97 lit("inIsNCHW") >> '=' >> intvec[ph::ref(x.inIsNCHW) = _1] |
98 lit("outConvType") >> '=' >> intvec[ph::ref(x.outConvType) = _1] |
99 lit("outIsSigned") >> '=' >> intvec[ph::ref(x.outIsSigned) = _1] |
100 lit("outScaleF2Q") >> '=' >> floatvec[ph::ref(x.outScaleF2Q) = _1] |
101 lit("outIsNCHW") >> '=' >> intvec[ph::ref(x.outIsNCHW) = _1]
89 ; 102 ;
90 } 103 }
91 104
@@ -95,6 +108,9 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
95 108
96 qi::rule<Iterator, std::pair<int, int>(), ascii::space_type> id2group; 109 qi::rule<Iterator, std::pair<int, int>(), ascii::space_type> id2group;
97 qi::rule<Iterator, std::map<int, int>(), ascii::space_type> id2groups; 110 qi::rule<Iterator, std::map<int, int>(), ascii::space_type> id2groups;
111
112 qi::rule<Iterator, std::vector<int>(), ascii::space_type> intvec;
113 qi::rule<Iterator, std::vector<float>(), ascii::space_type> floatvec;
98}; 114};
99 115
100bool Configuration::ReadFromFile(const std::string &file_name) 116bool Configuration::ReadFromFile(const std::string &file_name)
diff --git a/tidl_api/src/subgraph_data_conv.cpp b/tidl_api/src/subgraph_data_conv.cpp
new file mode 100644
index 0000000..6366360
--- /dev/null
+++ b/tidl_api/src/subgraph_data_conv.cpp
@@ -0,0 +1,262 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#include <cassert>
30#include <cstring>
31#include "subgraph_data_conv.h"
32
33using namespace tidl;
34
35
36static inline uint8_t QuantizeValue(float v, float Q, int vmin, int vmax)
37{
38 // scale
39 int32_t qv = (int32_t) (v * Q);
40 // saturate
41 qv = qv < vmin ? vmin : qv;
42 qv = qv > vmax ? vmax : qv;
43 return (uint8_t) qv;
44}
45
46static inline float DequantizeValue(uint8_t v, float Q_inv, bool S)
47{
48 // interpret sign
49 int32_t sv = S ? ((int32_t)(int8_t) v) : ((int32_t) v);
50 // scale
51 return sv * Q_inv;
52}
53
54// Gets 1-d index for 4-d buffer[d][c][b][a]
55static inline int GetIndex(int d, int c, int b, int a,
56 int D, int C, int B, int A)
57{
58 return a + A*(b + B*(c + C*d));
59}
60
61
62void SubgraphDataConv::EstScaleQuant(const std::vector<float*>& in)
63{
64 // TODO
65}
66
67void SubgraphDataConv::EstScaleDequant(const std::vector<float*>& out)
68{
69 // TODO
70}
71
72
73void
74SubgraphDataConv::ScaleQuant(const std::vector<float*>& in, uint8_t* out)
75const
76{
77 int offset = 0;
78 for (uint32_t d = 0; d < is_NCHW_m.size(); d++)
79 {
80 int N = dims_m[4 * d + 0];
81 int C = dims_m[4 * d + 1];
82 int H = dims_m[4 * d + 2];
83 int W = dims_m[4 * d + 3];
84
85 if (conv_type_m[d] == ConvType::FLOAT_Q)
86 {
87 float Q = scaleQ_m[d];
88 int vmin = is_signed_m[d] ? -128 : 0;
89 int vmax = is_signed_m[d] ? 127 : 255;
90 float *in_d = in[d];
91 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
92 {
93 // no need to transpose external tensor
94 for (int i = 0; i < N * C * H * W; i++)
95 out[offset + i] = QuantizeValue(in_d[i], Q, vmin, vmax);
96 }
97 else
98 {
99 // need to transpose external tensor
100 for (int n = 0; n < N; n++)
101 for (int c = 0; c < C; c++)
102 for (int h = 0; h < H; h++)
103 for (int w = 0; w < W; w++)
104 {
105 int nchw = GetIndex(n, c, h, w, N, C, H, W);
106 int nhwc = GetIndex(n, h, w, c, N, H, W, C);
107 out[offset + nchw] = QuantizeValue(in_d[nhwc], Q, vmin, vmax);
108 }
109 }
110 }
111 else if (conv_type_m[d] == ConvType::FLOAT_FLOAT)
112 {
113 assert((W & 0x3) == 0); // last dimension is bytes
114 int f_W = W / 4; // number of elements
115 float *in_d = in[d];
116 float *out_d = (float *) (out + offset);
117 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
118 {
119 // no need to transpose external tensor
120 memcpy(out_d, in_d, N * C * H * W); // W is bytes
121 }
122 else
123 {
124 // need to transpose external tensor
125 for (int n = 0; n < N; n++)
126 for (int c = 0; c < C; c++)
127 for (int h = 0; h < H; h++)
128 for (int w = 0; w < f_W; w++)
129 {
130 int nchw = GetIndex(n, c, h, w, N, C, H, f_W);
131 int nhwc = GetIndex(n, h, w, c, N, H, f_W, C);
132 out_d[nchw] = in_d[nhwc];
133 }
134 }
135 }
136 else if (conv_type_m[d] == ConvType::Q_Q)
137 {
138 uint8_t *in_d = (uint8_t *) &in[d];
139 uint8_t *out_d = (out + offset);
140 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
141 {
142 // no need to transpose external tensor
143 memcpy(out_d, in_d, N * C * H * W);
144 }
145 else
146 {
147 // need to transpose external tensor
148 for (int n = 0; n < N; n++)
149 for (int c = 0; c < C; c++)
150 for (int h = 0; h < H; h++)
151 for (int w = 0; w < W; w++)
152 {
153 int nchw = GetIndex(n, c, h, w, N, C, H, W);
154 int nhwc = GetIndex(n, h, w, c, N, H, W, C);
155 out_d[nchw] = in_d[nhwc];
156 }
157 }
158 }
159 else
160 {
161 assert(false);
162 }
163
164 offset += N * C * H * W; // accumulate in bytes
165 }
166}
167
168void
169SubgraphDataConv::ScaleDequant(const uint8_t *in, std::vector<float*>& out)
170const
171{
172 int offset = 0;
173 for (uint32_t d = 0; d < is_NCHW_m.size(); d++)
174 {
175 int N = dims_m[4 * d + 0];
176 int C = dims_m[4 * d + 1];
177 int H = dims_m[4 * d + 2];
178 int W = dims_m[4 * d + 3];
179
180 if (conv_type_m[d] == ConvType::FLOAT_Q)
181 {
182 float Q = scaleQ_m[d];
183 float Q_inv = 1.0f / Q;
184 bool S = is_signed_m[d];
185 float *out_d = out[d];
186 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
187 {
188 // no need to transpose external tensor
189 for (int i = 0; i < N * C * H * W; i++)
190 out_d[i] = DequantizeValue(in[offset + i], Q_inv, S);
191 }
192 else
193 {
194 // need to transpose external tensor
195 for (int n = 0; n < N; n++)
196 for (int c = 0; c < C; c++)
197 for (int h = 0; h < H; h++)
198 for (int w = 0; w < W; w++)
199 {
200 int nchw = GetIndex(n, c, h, w, N, C, H, W);
201 int nhwc = GetIndex(n, h, w, c, N, H, W, C);
202 out_d[nhwc] = DequantizeValue(in[offset + nchw], Q_inv, S);
203 }
204 }
205 }
206 else if (conv_type_m[d] == ConvType::FLOAT_FLOAT)
207 {
208 assert((W & 0x3) == 0); // last dimension is bytes
209 int f_W = W / 4; // number of elements
210 float *in_d = (float *) (in + offset);
211 float *out_d = out[d];
212 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
213 {
214 // no need to transpose external tensor
215 memcpy(out_d, in_d, N * C * H * W); // W is bytes
216 }
217 else
218 {
219 // need to transpose external tensor
220 for (int n = 0; n < N; n++)
221 for (int c = 0; c < C; c++)
222 for (int h = 0; h < H; h++)
223 for (int w = 0; w < f_W; w++)
224 {
225 int nchw = GetIndex(n, c, h, w, N, C, H, f_W);
226 int nhwc = GetIndex(n, h, w, c, N, H, f_W, C);
227 out_d[nhwc] = in_d[nchw];
228 }
229 }
230 }
231 else if (conv_type_m[d] == ConvType::Q_Q)
232 {
233 uint8_t *in_d = (uint8_t *) (in + offset);
234 uint8_t *out_d = (uint8_t * ) &out[d];
235 if (is_NCHW_m[d] || (C == 1) || (H*W == 1))
236 {
237 // no need to transpose external tensor
238 memcpy(out_d, in_d, N * C * H * W);
239 }
240 else
241 {
242 // need to transpose external tensor
243 for (int n = 0; n < N; n++)
244 for (int c = 0; c < C; c++)
245 for (int h = 0; h < H; h++)
246 for (int w = 0; w < W; w++)
247 {
248 int nchw = GetIndex(n, c, h, w, N, C, H, W);
249 int nhwc = GetIndex(n, h, w, c, N, H, W, C);
250 out_d[nhwc] = in_d[nchw];
251 }
252 }
253 }
254 else
255 {
256 assert(false);
257 }
258
259 offset += N * C * H * W;
260 }
261}
262
diff --git a/tidl_api/src/subgraph_runtime.cpp b/tidl_api/src/subgraph_runtime.cpp
new file mode 100644
index 0000000..9d068b2
--- /dev/null
+++ b/tidl_api/src/subgraph_runtime.cpp
@@ -0,0 +1,489 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#include <pthread.h>
30#define LOKI_PTHREAD_H
31#include <loki/Singleton.h>
32
33#include "util.h"
34#include "subgraph_runtime.h"
35#include "subgraph_runtime_impl.h"
36
37
38#if 0
39// Auto-generated code from Relay/TVM compilation step after
40// partitioning and lowering to backend implementation
41
42void TVM_TidlFunction(int total_subgraphs, int subgraph_id,
43 int num_input_tensors, int num_output_tensors,
44 PackedArgs args)
45{
46 float** in_data = new float*[num_inputs_per_inference * batch_size];
47 float** out_data = new float*[num_outputs_per_inference * batch_size];
48
49 for (in j = 0; j < batch_size; j++)
50 {
51 for (int i = 0; i < num_inputs_per_inference + num_outputs_per_inference;
52 i++)
53 if (i < num_inputs_per_inference)
54 in_data[j * num_inputs_per_inference + i] = args.data[i][j];
55 else
56 out_data[j * num_outpus_per_inference + i - num_inputs_per_inference]
57 = args.data[i][j];
58 }
59
60 // call into this function in libtidl.so
61 // dlopen("libtidl_api.so")
62 // TidlFunc = dlsym("TidlRunSubgraph");
63 (*TidlFunc)(total_subgraphs, subgraph_id, batch_size
64 num_inputs_per_inference, num_outputs_per_inference,
65 in_data, out_data);
66
67 delete [] in_data;
68 delete [] out_data;
69}
70#endif
71
72
73// Singleton ResM .cpp
74using namespace tidl;
75
76int TidlGetPreferredBatchSize(int total_subgraphs)
77{
78 ResM& res = ResM::Instance(total_subgraphs);
79 return res.GetNumEs();
80}
81
82void TidlInitSubgraph(int total_subgraphs, int subgraph_id)
83{
84 ResM& res = ResM::Instance(total_subgraphs);
85 res.InitSubgraph(subgraph_id);
86}
87
88void TidlFreeSubgraph(int total_subgraphs, int subgraph_id)
89{
90 ResM& res = ResM::Instance(total_subgraphs);
91 res.FreeSubgraph(subgraph_id);
92}
93
94void TidlRunSubgraph(int total_subgraphs,
95 int subgraph_id,
96 int batch_size,
97 int num_inputs_per_inference,
98 int num_outputs_per_inference,
99 float **input_tensors,
100 float **output_tensors
101 )
102{
103 ResM& res = ResM::Instance(total_subgraphs);
104 res.InitSubgraph(subgraph_id);
105 int num_eops = res.GetNumEOPs(subgraph_id);
106 if (num_eops > batch_size) num_eops = batch_size;
107 std::vector<ExecutionObjectPipeline*> eops(num_eops);
108 for (int i = 0; i < num_eops; i++)
109 eops[i] = res.GetEOP(subgraph_id);
110 const SubgraphDataConv& in_conv = res.GetInConv(subgraph_id);
111 const SubgraphDataConv& out_conv = res.GetOutConv(subgraph_id);
112
113 std::vector<std::vector<float *>> in_data_v(batch_size),
114 out_data_v(batch_size);
115 for (int frame_idx = 0; frame_idx < batch_size; frame_idx++)
116 {
117 for (int i = 0; i < num_inputs_per_inference; i++)
118 in_data_v[frame_idx].emplace_back(input_tensors[
119 frame_idx * num_inputs_per_inference + i]);
120 for (int i = 0; i < num_outputs_per_inference; i++)
121 out_data_v[frame_idx].emplace_back(output_tensors[
122 frame_idx * num_inputs_per_inference + i]);
123 }
124
125 // Process batch_size frames with available eops in pipelined manner
126 // additional num_eops iterations to flush the pipeline (epilogue)
127 for (int frame_idx = 0; frame_idx < batch_size + num_eops; frame_idx++)
128 {
129 ExecutionObjectPipeline *eop = eops[frame_idx % num_eops];
130
131 if (eop->ProcessFrameWait())
132 {
133 const uint8_t *out_data = (const uint8_t*) eop->GetOutputBufferPtr();
134 out_conv.ScaleDequant(out_data, out_data_v[frame_idx - num_eops]);
135 }
136
137 if (frame_idx < batch_size)
138 {
139 uint8_t *in_data = (uint8_t *) eop->GetInputBufferPtr();
140 in_conv.ScaleQuant(in_data_v[frame_idx], in_data);
141 eop->ProcessFrameStartAsync();
142 }
143 }
144
145 for (int i = 0; i < num_eops; i++)
146 res.FreeEOP(subgraph_id, eops[i]);
147}
148
149
150typedef Loki::SingletonHolder <tidl::ResM, Loki::CreateUsingNew,
151Loki::DefaultLifetime, Loki::ClassLevelLockable> tidlSingleResM;
152
153ResM::ResM() : enable_trace_m(false), num_subgraphs_m(0),
154 num_lg2_dsps_used_m(0), eops_m(nullptr)
155{
156}
157
158ResM::~ResM()
159{
160 for (uint32_t i = 0; i < num_subgraphs_m; i++)
161 FreeSubgraph(i);
162
163 delete eops_m;
164 eops_m = nullptr;
165}
166
167void ResM::FreeSubgraph(uint32_t subgraph_id)
168{
169 assert(subgraph_id < num_subgraphs_m);
170
171 if (eops_m != nullptr)
172 {
173 ResEOP& res_eop = (*eops_m)[subgraph_id];
174 if (res_eop.eops != nullptr)
175 {
176 for (const ExecutionObjectPipeline* eop : *(res_eop.eops))
177 {
178 free(eop->GetInputBufferPtr());
179 free(eop->GetOutputBufferPtr());
180 delete eop;
181 }
182 delete res_eop.eops;
183 res_eop.eops = nullptr;
184 }
185 }
186
187 delete es_m[subgraph_id];
188 es_m[subgraph_id] = nullptr;
189
190 delete e2s_m[subgraph_id];
191 e2s_m[subgraph_id] = nullptr;
192
193 delete in_conv_m[subgraph_id];
194 in_conv_m[subgraph_id] = nullptr;
195
196 delete out_conv_m[subgraph_id];
197 out_conv_m[subgraph_id] = nullptr;
198}
199
200ResM& ResM::Instance(uint32_t total_num_subgraphs)
201{
202 ResM& res = tidlSingleResM::Instance();
203 res.Init(total_num_subgraphs);
204 return res;
205}
206
207void ResM::Init(uint32_t num_subgraphs)
208{
209 std::lock_guard<std::mutex> lock(mutex_init_m);
210
211 if (num_subgraphs_m == 0)
212 {
213 num_subgraphs_m = num_subgraphs;
214
215 if (getenv("TIDL_SUBGRAPH_TRACE") != nullptr) enable_trace_m = true;
216
217 // Allocating resources
218 num_eves_m = Executor::GetNumDevices(DeviceType::EVE);
219 num_dsps_m = Executor::GetNumDevices(DeviceType::DSP);
220
221 assert(num_eves_m > 0 || num_dsps_m > 0);
222 assert(num_subgraphs_m <= num_eves_m || num_subgraphs_m <= num_dsps_m);
223 num_es_per_subgraph_m = num_eves_m / num_subgraphs_m;
224 if (num_eves_m == 0)
225 num_es_per_subgraph_m = num_dsps_m / num_subgraphs_m;
226
227 cs_m.resize(num_subgraphs_m);
228 es_m.resize(num_subgraphs_m, nullptr);
229 e2s_m.resize(num_subgraphs_m, nullptr);
230 eops_m = new std::vector<ResEOP>(num_subgraphs_m);
231 in_conv_m.resize(num_subgraphs_m, nullptr);
232 out_conv_m.resize(num_subgraphs_m, nullptr);
233 }
234}
235
236
237void ResM::InitSubgraph(uint32_t subgraph_id)
238{
239 assert(subgraph_id < num_subgraphs_m);
240 ResEOP& res_eop = (*eops_m)[subgraph_id];
241
242 std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
243
244 // Constructing EOPs if not already constructed
245 if (res_eop.eops == nullptr)
246 {
247 if (enable_trace_m)
248 printf("Subgraph %d: initialing E/EOPs with %d cores\n",
249 subgraph_id, num_es_per_subgraph_m);
250
251 // Read config file
252 std::string cfg_file = "subgraph" + std::to_string(subgraph_id) + ".cfg";
253 bool status = cs_m[subgraph_id].ReadFromFile(cfg_file);
254 assert(status);
255
256 // Read the network
257 sTIDL_Network_t *net = new sTIDL_Network_t;
258 status = ReadNetworkBinary(cs_m[subgraph_id].netBinFile,
259 reinterpret_cast<char *>(net));
260 assert(status);
261
262 // Get data conversion info from configuration
263 // Get input/output tensors dimensions from network
264 // Construct data converters at the subgraph boundaries
265 std::vector<int> inDims, outDims;
266 for (int32_t layer = 0; layer < net->numLayers; layer++)
267 {
268 if (net->TIDLLayers[layer].layerType != (int32_t) TIDL_DataLayer)
269 continue;
270 if (net->TIDLLayers[layer].numInBufs <= 0)
271 {
272 for (int d = 0; d < 4; d++)
273 inDims.push_back(net->TIDLLayers[layer].outData[0].dimValues[d]);
274 }
275 if (net->TIDLLayers[layer].numOutBufs <= 0)
276 {
277 for (int d = 0; d < 4; d++)
278 outDims.push_back(net->TIDLLayers[layer].inData[0].dimValues[d]);
279 }
280 }
281 assert(cs_m[subgraph_id].inIsNCHW.size() * 4 == inDims.size());
282 assert(cs_m[subgraph_id].outIsNCHW.size() * 4 == outDims.size());
283 std::vector<bool> inIsSigned, outIsSigned, inIsNCHW, outIsNCHW;
284 for (int v : cs_m[subgraph_id].inIsSigned) inIsSigned.push_back(v != 0);
285 for (int v : cs_m[subgraph_id].inIsNCHW) inIsNCHW.push_back(v != 0);
286 for (int v : cs_m[subgraph_id].outIsSigned) outIsSigned.push_back(v != 0);
287 for (int v : cs_m[subgraph_id].outIsNCHW) outIsNCHW.push_back(v != 0);
288 in_conv_m[subgraph_id] = new SubgraphDataConv(
289 cs_m[subgraph_id].inConvType,
290 inIsSigned,
291 cs_m[subgraph_id].inScaleF2Q,
292 inIsNCHW,
293 inDims);
294 out_conv_m[subgraph_id] = new SubgraphDataConv(
295 cs_m[subgraph_id].outConvType,
296 outIsSigned,
297 cs_m[subgraph_id].outScaleF2Q,
298 outIsNCHW,
299 outDims);
300
301 // Check if last few layers can be offloaded to DSPs
302 // and DSPs are available
303 DeviceIds e_ids, e2_ids;
304 for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
305 e_ids.insert(static_cast<DeviceId>(
306 subgraph_id * num_es_per_subgraph_m + i));
307 // uint32_t num_dsps_used = 0;
308 if (num_eves_m > 0 && num_dsps_m > 0 && ! cs_m[subgraph_id].runFullNet)
309 {
310 if (cs_m[subgraph_id].layerIndex2LayerGroupId.empty())
311 {
312 int32_t start_layer = net->numLayers -1;
313 int32_t end_layer = 0;
314 if (net->TIDLLayers[start_layer].layerType == (int32_t) TIDL_DataLayer)
315 start_layer -= 1;
316 if (net->TIDLLayers[end_layer].layerType == (int32_t) TIDL_DataLayer)
317 end_layer += 1;
318 int32_t i = start_layer;
319 for ( ; i > end_layer; i--)
320 {
321 int32_t layer_type = net->TIDLLayers[i].layerType;
322 if (layer_type != (int32_t) TIDL_SoftMaxLayer &&
323 layer_type != (int32_t) TIDL_InnerProductLayer &&
324 layer_type != (int32_t) TIDL_PoolingLayer)
325 break;
326 }
327 i += 1;
328 if (i <= start_layer)
329 {
330 if (num_lg2_dsps_used_m < num_dsps_m)
331 {
332 if (enable_trace_m)
333 printf("Subgraph %d: assign layers %d to %d to group 2 for DSP\n",
334 subgraph_id, i, start_layer);
335 while (i <= start_layer)
336 cs_m[subgraph_id].layerIndex2LayerGroupId[i++] = 2;
337 }
338 }
339 }
340 else
341 {
342 if (enable_trace_m)
343 printf("Subgraph %d: using layer2group map in config file for DSP\n",
344 subgraph_id);
345 }
346
347 if (! cs_m[subgraph_id].layerIndex2LayerGroupId.empty())
348 {
349 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m));
350 num_lg2_dsps_used_m += 1;
351 if (num_subgraphs_m == 1) // Allocate all dsps if only one subgraph
352 {
353 while (num_lg2_dsps_used_m < num_dsps_m)
354 e2_ids.insert(static_cast<DeviceId>(num_lg2_dsps_used_m++));
355 }
356 }
357 }
358 delete net;
359
360 if (e2_ids.empty())
361 cs_m[subgraph_id].runFullNet = true;
362 cs_m[subgraph_id].enableApiTrace = enable_trace_m;
363
364 // Constructing Es and EOPs, each subgraph -> num_eves_per_subgraph_m EOPs
365 res_eop.eops = new std::vector<ExecutionObjectPipeline*>;
366 uint32_t buffer_factor = 2; // double buffering factor
367 if (num_eves_m > 0)
368 {
369 es_m[subgraph_id] = new Executor(DeviceType::EVE, e_ids,
370 cs_m[subgraph_id], 1);
371 if (! e2_ids.empty())
372 {
373 e2s_m[subgraph_id] = new Executor(DeviceType::DSP, e2_ids,
374 cs_m[subgraph_id], 2);
375 for (uint32_t j = 0; j < buffer_factor; j++)
376 for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
377 res_eop.eops->emplace_back(new ExecutionObjectPipeline(
378 {(*es_m[subgraph_id])[i],
379 (*e2s_m[subgraph_id])[i % e2_ids.size()]}));
380 }
381 else
382 {
383 for (uint32_t j = 0; j < buffer_factor; j++)
384 for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
385 res_eop.eops->emplace_back(new ExecutionObjectPipeline(
386 {(*es_m[subgraph_id])[i]}));
387 }
388 }
389 else
390 {
391 es_m[subgraph_id] = new Executor(DeviceType::DSP, e_ids,
392 cs_m[subgraph_id], 1);
393 for (uint32_t j = 0; j < buffer_factor; j++)
394 for (uint32_t i = 0; i < num_es_per_subgraph_m; i++)
395 res_eop.eops->emplace_back(new ExecutionObjectPipeline(
396 {(*es_m[subgraph_id])[i]}));
397 }
398
399 if (enable_trace_m)
400 printf("Subgraph %d: Allocating input/output buffers for %d EOPs\n",
401 subgraph_id, res_eop.eops->size());
402 // Allocate input/output buffers
403 for (auto eop : *(res_eop.eops))
404 {
405 size_t in_size = eop->GetInputBufferSizeInBytes();
406 size_t out_size = eop->GetOutputBufferSizeInBytes();
407 void* in_ptr = malloc(in_size);
408 void* out_ptr = malloc(out_size);
409 assert(in_ptr != nullptr && out_ptr != nullptr);
410
411 ArgInfo in(in_ptr, in_size);
412 ArgInfo out(out_ptr, out_size);
413 eop->SetInputOutputBuffer(in, out);
414 }
415
416 res_eop.free_eop_index = 0;
417 res_eop.is_used.resize(res_eop.eops->size(), false);
418 }
419}
420
421uint32_t ResM::GetNumEOPs(uint32_t subgraph_id)
422{
423 assert(subgraph_id < num_subgraphs_m);
424 ResEOP& res_eop = (*eops_m)[subgraph_id];
425 assert (res_eop.eops != nullptr);
426
427 return res_eop.eops->size();
428}
429
430ExecutionObjectPipeline* ResM::GetEOP(uint32_t subgraph_id)
431{
432 assert(subgraph_id < num_subgraphs_m);
433 ResEOP& res_eop = (*eops_m)[subgraph_id];
434 assert(res_eop.eops != nullptr);
435
436 std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
437
438 // Return an available EOP (round robin allocation)
439 uint32_t curr_eop = res_eop.free_eop_index;
440 res_eop.cv_eops.wait(lock, [this, subgraph_id, curr_eop]{
441 return this->eops_m->at(subgraph_id).is_used[curr_eop] == false; });
442 res_eop.is_used[curr_eop] = true;
443 res_eop.free_eop_index = (curr_eop + 1) % res_eop.eops->size();
444 if (enable_trace_m)
445 printf("Subgraph %d: return EOP %d for GetEOP()\n", subgraph_id, curr_eop);
446 return res_eop.eops->at(curr_eop);
447}
448
449void ResM::FreeEOP(uint32_t subgraph_id, ExecutionObjectPipeline* eop)
450{
451 assert(subgraph_id < num_subgraphs_m);
452 ResEOP& res_eop = (*eops_m)[subgraph_id];
453 assert(res_eop.eops != nullptr);
454
455 {
456 std::unique_lock<std::mutex> lock(res_eop.mutex_eops);
457 for (uint32_t i = 0; i < res_eop.is_used.size(); i++)
458 if (res_eop.eops->at(i) == eop)
459 {
460 res_eop.is_used[i] = false;
461 if (enable_trace_m)
462 printf("Subgraph %d: FreeEOP %d\n", subgraph_id, i);
463 break;
464 }
465 }
466 res_eop.cv_eops.notify_all();
467}
468
469Configuration& ResM::GetConfiguration(uint32_t subgraph_id)
470{
471 assert(subgraph_id < num_subgraphs_m);
472 assert((*eops_m)[subgraph_id].eops != nullptr);
473 return cs_m[subgraph_id];
474}
475
476const SubgraphDataConv& ResM::GetInConv(uint32_t subgraph_id)
477{
478 assert(subgraph_id < num_subgraphs_m);
479 assert(in_conv_m[subgraph_id] != nullptr);
480 return *in_conv_m[subgraph_id];
481}
482
483const SubgraphDataConv& ResM::GetOutConv(uint32_t subgraph_id)
484{
485 assert(subgraph_id < num_subgraphs_m);
486 assert(out_conv_m[subgraph_id] != nullptr);
487 return *out_conv_m[subgraph_id];
488}
489
diff --git a/tidl_api/src/subgraph_runtime_impl.h b/tidl_api/src/subgraph_runtime_impl.h
new file mode 100644
index 0000000..54dc12d
--- /dev/null
+++ b/tidl_api/src/subgraph_runtime_impl.h
@@ -0,0 +1,96 @@
1/******************************************************************************
2 * Copyright (c) 2019 Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29//! @file subgraph_runtime_impl.h
30
31#pragma once
32#include <vector>
33#include <mutex>
34#include <condition_variable>
35#include "execution_object_pipeline.h"
36#include "subgraph_data_conv.h"
37
38
39namespace tidl {
40
41// Singleton ResM .h file
42// Resource manager for available EVE and DSP devices,
43// - Allocates EVEs and DSPs
44// - Constructs Executors (tidl_setup) and ExecutionObjects (tid_init)
45// - Creates set of ExecutionPipelines (with or without DSP)
46// - Allocating EOP on demand (acquire and free semantics)
47// - Allocates input/output buffers
48class ResM {
49 public:
50 ResM();
51 ~ResM();
52 static ResM& Instance(uint32_t total_num_subgraphs = 1);
53
54 // how to get resources for subgraph_id
55 void InitSubgraph(uint32_t subgraph_id);
56 void FreeSubgraph(uint32_t subgraph_id);
57 uint32_t GetNumEOPs(uint32_t subgraph_id);
58 ExecutionObjectPipeline* GetEOP(uint32_t subgraph_id);
59 void FreeEOP(uint32_t subgraph_id,
60 ExecutionObjectPipeline* eop);
61 Configuration& GetConfiguration(uint32_t subgraph_id);
62 const SubgraphDataConv& GetInConv(uint32_t subgraph_id);
63 const SubgraphDataConv& GetOutConv(uint32_t subgraph_id);
64 uint32_t GetNumEs() { return num_es_per_subgraph_m; }
65
66
67 private:
68 void Init(uint32_t num_subgraphs);
69
70 bool enable_trace_m;
71 uint32_t num_subgraphs_m;
72 uint32_t num_es_per_subgraph_m;
73 uint32_t num_eves_m;
74 uint32_t num_dsps_m;
75 uint32_t num_lg2_dsps_used_m; // in partitioned execution case
76 std::mutex mutex_init_m;
77
78 // indexed by subgraph_id for resources
79 struct ResEOP {
80 ResEOP() : free_eop_index(0), is_used(), eops(nullptr) {}
81
82 uint32_t free_eop_index;
83 std::mutex mutex_eops;
84 std::condition_variable cv_eops;
85 std::vector<bool> is_used;
86 std::vector<ExecutionObjectPipeline*>* eops;
87 };
88 std::vector<Configuration> cs_m;
89 std::vector<Executor*> es_m;
90 std::vector<Executor*> e2s_m;
91 std::vector<ResEOP> *eops_m;
92 std::vector<SubgraphDataConv*> in_conv_m;
93 std::vector<SubgraphDataConv*> out_conv_m;
94};
95
96} // namespace tidl