Wall cleanup, optimize ssd_multibox
authorYuan Zhao <yuanzhao@ti.com>
Wed, 5 Sep 2018 17:19:49 +0000 (12:19 -0500)
committerYuan Zhao <yuanzhao@ti.com>
Wed, 5 Sep 2018 18:38:34 +0000 (13:38 -0500)
- Fix -Wall errors
- Optimize pipeline execution for ssd_multibox
- MCT-1015

examples/common/video_utils.cpp
examples/imagenet/main.cpp
examples/segmentation/main.cpp
examples/ssd_multibox/main.cpp

index 3afc4381f0d642eefeaa8a8553479b0a45032c7a..bfb7642497936e2058273c4d30516bacaae86644 100644 (file)
@@ -116,6 +116,8 @@ bool ProcessArgs(int argc, char *argv[], cmdline_opts_t& opts)
         opts.is_video_input = (suffix == ".mp4") || (suffix == ".avi") ||
                               (suffix == ".mov");
     }
+
+    return true;
 }
 
 // Set Video Input and Output
@@ -141,5 +143,7 @@ bool SetVideoInputOutput(VideoCapture &cap, const cmdline_opts_t& opts,
         }
         namedWindow(window_name, WINDOW_AUTOSIZE | CV_GUI_NORMAL);
     }
+
+    return true;
 }
 
index 8504f1132b0b714706309a9f28b01481fd4ce00d..b84dcf616a1b85456d9cdba779de57a42a9c3837 100644 (file)
@@ -65,9 +65,9 @@ const char *default_inputs[NUM_DEFAULT_INPUTS] =
     "../test/testvecs/input/objects/cat-pet-animal-domestic-104827.jpeg"
 };
 
-Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c);
+Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c);
 bool RunConfiguration(cmdline_opts_t& opts);
-bool ReadFrame(ExecutionObject& eo, int frame_idx, const Configuration& c,
+bool ReadFrame(ExecutionObject& eo, uint32_t frame_idx, const Configuration& c,
                const cmdline_opts_t& opts, VideoCapture &cap);
 bool WriteFrameOutput(const ExecutionObject &eo);
 void DisplayHelp();
@@ -195,7 +195,7 @@ bool RunConfiguration(cmdline_opts_t& opts)
 }
 
 // Create an Executor with the specified type and number of EOs
-Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c)
+Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c)
 {
     if (num == 0) return nullptr;
 
@@ -206,7 +206,7 @@ Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c)
     return new Executor(dt, ids, c);
 }
 
-bool ReadFrame(ExecutionObject &eo, int frame_idx, const Configuration& c,
+bool ReadFrame(ExecutionObject &eo, uint32_t frame_idx, const Configuration& c,
                const cmdline_opts_t& opts, VideoCapture &cap)
 {
     if (frame_idx >= opts.num_frames)
index 91d68f48615abaef9156af5f1c8b0c22bd32c0b0..b0aef2f6e4558c79d663644f97d1c8d701b39ea6 100644 (file)
@@ -64,8 +64,8 @@ uint32_t orig_height;
 
 
 bool RunConfiguration(const cmdline_opts_t& opts);
-Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c);
-bool ReadFrame(ExecutionObject& eo, int frame_idx, const Configuration& c,
+Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c);
+bool ReadFrame(ExecutionObject& eo, uint32_t frame_idx, const Configuration& c,
                const cmdline_opts_t& opts, VideoCapture &cap);
 bool WriteFrameOutput(const ExecutionObject &eo, const Configuration& c,
                       const cmdline_opts_t& opts);
@@ -202,7 +202,7 @@ bool RunConfiguration(const cmdline_opts_t& opts)
 }
 
 // Create an Executor with the specified type and number of EOs
-Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c)
+Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c)
 {
     if (num == 0) return nullptr;
 
@@ -213,7 +213,7 @@ Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c)
     return new Executor(dt, ids, c);
 }
 
-bool ReadFrame(ExecutionObject &eo, int frame_idx, const Configuration& c, 
+bool ReadFrame(ExecutionObject &eo, uint32_t frame_idx, const Configuration& c,
                const cmdline_opts_t& opts, VideoCapture &cap)
 {
     if (frame_idx >= opts.num_frames)
index 073a6efa0198596d8e03c488ed594090ae5dd7ec..482e9f72fa37d4d2f70f445334be69de3e86d53f 100644 (file)
@@ -65,9 +65,9 @@ uint32_t orig_height;
 
 
 bool RunConfiguration(const cmdline_opts_t& opts);
-Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c,
+Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
                          int layers_group_id);
-bool ReadFrame(ExecutionObjectPipeline& eop, int frame_idx,
+bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
                const Configuration& c, const cmdline_opts_t& opts,
                VideoCapture &cap);
 bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
@@ -159,9 +159,38 @@ bool RunConfiguration(const cmdline_opts_t& opts)
         // Construct ExecutionObjectPipeline that utilizes multiple
         // ExecutionObjects to process a single frame, each ExecutionObject
         // processes one layerGroup of the network
+        //
+        // Pipeline depth can enable more optimized pipeline execution:
+        // Given one EVE and one DSP as an example, with different
+        //     pipeline_depth, we have different execution behavior:
+        // If pipeline_depth is set to 1,
+        //    we create one EOP: eop0 (eve0, dsp0)
+        //    pipeline execution of multiple frames over time is as follows:
+        //    --------------------- time ------------------->
+        //    eop0: [eve0...][dsp0]
+        //    eop0:                [eve0...][dsp0]
+        //    eop0:                               [eve0...][dsp0]
+        //    eop0:                                              [eve0...][dsp0]
+        // If pipeline_depth is set to 2,
+        //    we create two EOPs: eop0 (eve0, dsp0), eop1(eve0, dsp0)
+        //    pipeline execution of multiple frames over time is as follows:
+        //    --------------------- time ------------------->
+        //    eop0: [eve0...][dsp0]
+        //    eop1:          [eve0...][dsp0]
+        //    eop0:                   [eve0...][dsp0]
+        //    eop1:                            [eve0...][dsp0]
+        // Additional benefit of setting pipeline_depth to 2 is that
+        //    it can also overlap host ReadFrame() with device processing:
+        //    --------------------- time ------------------->
+        //    eop0: [RF][eve0...][dsp0]
+        //    eop1:     [RF]     [eve0...][dsp0]
+        //    eop0:                    [RF][eve0...][dsp0]
+        //    eop1:                             [RF][eve0...][dsp0]
         vector<ExecutionObjectPipeline *> eops;
-        for (uint32_t i = 0; i < max(opts.num_eves, opts.num_dsps); i++)
-            eops.push_back(new ExecutionObjectPipeline(
+        uint32_t pipeline_depth = 2;  // 2 EOs in EOP -> depth 2
+        for (uint32_t j = 0; j < pipeline_depth; j++)
+            for (uint32_t i = 0; i < max(opts.num_eves, opts.num_dsps); i++)
+                eops.push_back(new ExecutionObjectPipeline(
                       {(*e_eve)[i%opts.num_eves], (*e_dsp)[i%opts.num_dsps]}));
         uint32_t num_eops = eops.size();
 
@@ -211,7 +240,7 @@ bool RunConfiguration(const cmdline_opts_t& opts)
 }
 
 // Create an Executor with the specified type and number of EOs
-Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c,
+Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
                          int layers_group_id)
 {
     if (num == 0) return nullptr;
@@ -223,7 +252,7 @@ Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c,
     return new Executor(dt, ids, c, layers_group_id);
 }
 
-bool ReadFrame(ExecutionObjectPipeline& eop, int frame_idx,
+bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
                const Configuration& c, const cmdline_opts_t& opts,
                VideoCapture &cap)
 {