Added Python 3 bindings for TIDL API

[tidl/tidl-api.git] / examples / ssd_multibox / main.cpp
diff --git a/examples/ssd_multibox/main.cpp b/examples/ssd_multibox/main.cpp

index c3d9c8e7036de5bc41935d3101509cc0422174b7..4bcc707984bae927bf2c521e45c2138a89061872 100644 (file)
--- a/examples/ssd_multibox/main.cpp
+++ b/examples/ssd_multibox/main.cpp
@@ -26,7 +26,6 @@
   *   THE POSSIBILITY OF SUCH DAMAGE.
   *****************************************************************************/
  #include <signal.h>
-#include <getopt.h>
  #include <iostream>
  #include <iomanip>
  #include <fstream>
@@ -40,56 +39,41 @@
  #include <queue>
  #include <vector>
  #include <cstdio>
+#include <chrono>
  
  #include "executor.h"
  #include "execution_object.h"
+#include "execution_object_pipeline.h"
  #include "configuration.h"
  #include "../segmentation/object_classes.h"
+#include "../common/utils.h"
+#include "../common/video_utils.h"
+
+using namespace std;
+using namespace tidl;
+using namespace cv;
  
-#include "opencv2/core.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/highgui.hpp"
-#include "opencv2/videoio.hpp"
  
  #define NUM_VIDEO_FRAMES  100
  #define DEFAULT_CONFIG    "jdetnet"
  #define DEFAULT_INPUT     "../test/testvecs/input/preproc_0_768x320.y"
+#define DEFAULT_INPUT_FRAMES (1)
  
-bool __TI_show_debug_ = false;
-bool is_default_input = false;
-bool is_preprocessed_input = false;
-bool is_camera_input       = false;
-int  orig_width;
-int  orig_height;
  object_class_table_t *object_class_table;
-
-using namespace tidl;
-using namespace cv;
-
-
-bool RunConfiguration(const std::string& config_file, uint32_t num_devices,
-                      DeviceType device_type, std::string& input_file);
-bool ReadFrame(ExecutionObject& eo, int frame_idx,
-               const Configuration& configuration, int num_frames,
-               std::string& image_file, VideoCapture &cap);
-bool WriteFrameOutput(const ExecutionObject &eo_in,
-                      const ExecutionObject &eo_out,
-                      const Configuration& configuration);
-
-void ReportTime(int frame_index, std::string device_name, double elapsed_host,
-                double elapsed_device);
-
-static void ProcessArgs(int argc, char *argv[],
-                        std::string& config,
-                        uint32_t& num_devices,
-                        DeviceType& device_type,
-                        std::string& input_file);
-
+uint32_t orig_width;
+uint32_t orig_height;
+
+
+bool RunConfiguration(const cmdline_opts_t& opts);
+Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
+                         int layers_group_id);
+bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
+               const Configuration& c, const cmdline_opts_t& opts,
+               VideoCapture &cap);
+bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
+                      const Configuration& c, const cmdline_opts_t& opts);
  static void DisplayHelp();
  
-static double ms_diff(struct timespec &t0, struct timespec &t1)
-{ return (t1.tv_sec - t0.tv_sec) * 1e3 + (t1.tv_nsec - t0.tv_nsec) / 1e6; }
-
  
  int main(int argc, char *argv[])
  {
@@ -98,245 +82,209 @@ int main(int argc, char *argv[])
      signal(SIGTERM, exit);
  
      // If there are no devices capable of offloading TIDL on the SoC, exit
-    uint32_t num_dla = Executor::GetNumDevices(DeviceType::DLA);
-    uint32_t num_dsp = Executor::GetNumDevices(DeviceType::DSP);
-    if (num_dla == 0 || num_dsp == 0)
+    uint32_t num_eves = Executor::GetNumDevices(DeviceType::EVE);
+    uint32_t num_dsps = Executor::GetNumDevices(DeviceType::DSP);
+    if (num_eves == 0 || num_dsps == 0)
      {
-        std::cout << "ssd_multibox requires both DLA and DSP for execution."
-                  << std::endl;
+        cout << "ssd_multibox requires both EVE and DSP for execution." << endl;
          return EXIT_SUCCESS;
      }
  
      // Process arguments
-    std::string config      = DEFAULT_CONFIG;
-    std::string input_file  = DEFAULT_INPUT;
-    uint32_t num_devices    = 1;
-    DeviceType  device_type = DeviceType::DLA;
-    ProcessArgs(argc, argv, config, num_devices, device_type, input_file);
-
-    // Use same number of DLAs and DSPs
-    num_devices = std::min(num_devices, std::min(num_dla, num_dsp));
-    if (num_devices == 0)
+    cmdline_opts_t opts;
+    opts.config = DEFAULT_CONFIG;
+    opts.num_eves = 1;
+    opts.num_dsps = 1;
+    if (! ProcessArgs(argc, argv, opts))
      {
-        std::cout << "Partitioned execution requires at least 1 DLA and 1 DSP."
-                  << std::endl;
-        return EXIT_FAILURE;
+        DisplayHelp();
+        exit(EXIT_SUCCESS);
      }
-    if ((object_class_table = GetObjectClassTable(config)) == nullptr)
+    assert(opts.num_dsps != 0 && opts.num_eves != 0);
+    if (opts.num_frames == 0)
+        opts.num_frames = (opts.is_camera_input || opts.is_video_input) ?
+                          NUM_VIDEO_FRAMES :
+                          (opts.input_file.empty() ? DEFAULT_INPUT_FRAMES : 1);
+    if (opts.input_file.empty())
+        cout << "Input: " << DEFAULT_INPUT << endl;
+    else
+        cout << "Input: " << opts.input_file << endl;
+
+    // Get object class table
+    if ((object_class_table = GetObjectClassTable(opts.config)) == nullptr)
      {
-        std::cout << "No object classes defined for this config." << std::endl;
+        cout << "No object classes defined for this config." << endl;
          return EXIT_FAILURE;
      }
  
-    if (input_file == DEFAULT_INPUT)  is_default_input = true;
-    if (input_file == "camera")       is_camera_input = true;
-    if (input_file.length() > 2 &&
-        input_file.compare(input_file.length() - 2, 2, ".y") == 0)
-        is_preprocessed_input = true;
-    std::cout << "Input: " << input_file << std::endl;
-    std::string config_file = "../test/testvecs/config/infer/tidl_config_"
-                              + config + ".txt";
-    bool status = RunConfiguration(config_file, num_devices, device_type,
-                                   input_file);
-
+    // Run network
+    bool status = RunConfiguration(opts);
      if (!status)
      {
-        std::cout << "ssd_multibox FAILED" << std::endl;
+        cout << "ssd_multibox FAILED" << endl;
          return EXIT_FAILURE;
      }
  
-    std::cout << "ssd_multibox PASSED" << std::endl;
+    cout << "ssd_multibox PASSED" << endl;
      return EXIT_SUCCESS;
  }
  
-bool RunConfiguration(const std::string& config_file, uint32_t num_devices,
-                      DeviceType device_type, std::string& input_file)
+bool RunConfiguration(const cmdline_opts_t& opts)
  {
-    DeviceIds ids;
-    for (int i = 0; i < num_devices; i++)
-        ids.insert(static_cast<DeviceId>(i));
-
      // Read the TI DL configuration file
-    Configuration configuration;
-    bool status = configuration.ReadFromFile(config_file);
+    Configuration c;
+    std::string config_file = "../test/testvecs/config/infer/tidl_config_"
+                              + opts.config + ".txt";
+    bool status = c.ReadFromFile(config_file);
      if (!status)
      {
-        std::cerr << "Error in configuration file: " << config_file
-                  << std::endl;
+        cerr << "Error in configuration file: " << config_file << endl;
          return false;
      }
+    c.enableApiTrace = opts.verbose;
  
-    // setup input
-    int num_frames = is_default_input ? 3 : 1;
+    // setup camera/video input
      VideoCapture cap;
-    std::string image_file;
-    if (is_camera_input)
-    {
-        cap = VideoCapture(1);  // cap = VideoCapture("test.mp4");
-        if (! cap.isOpened())
-        {
-            std::cerr << "Cannot open camera input." << std::endl;
-            return false;
-        }
-        num_frames = NUM_VIDEO_FRAMES;
-        namedWindow("SSD_Multibox", WINDOW_AUTOSIZE | CV_GUI_NORMAL);
-    }
-    else
-    {
-        image_file = input_file;
-    }
+    if (! SetVideoInputOutput(cap, opts, "SSD_Multibox"))  return false;
  
      try
      {
-        // Create a executor with the approriate core type, number of cores
+        // Create Executors with the approriate core type, number of cores
          // and configuration specified
-        // DLA will run layersGroupId 1 in the network, while
+        // EVE will run layersGroupId 1 in the network, while
          // DSP will run layersGroupId 2 in the network
-        Executor executor_dla(DeviceType::DLA, ids, configuration, 1);
-        Executor executor_dsp(DeviceType::DSP, ids, configuration, 2);
-
-        // Query Executor for set of ExecutionObjects created
-        const ExecutionObjects& execution_objects_dla =
-                                            executor_dla.GetExecutionObjects();
-        const ExecutionObjects& execution_objects_dsp =
-                                            executor_dsp.GetExecutionObjects();
-        int num_eos = execution_objects_dla.size();
-
-        // Allocate input and output buffers for each execution object
-        // Note that "out" is both the output of eo_dla and the input of eo_dsp
-        // This is how two layersGroupIds, 1 and 2, are tied together
-        std::vector<void *> buffers;
-        for (int i = 0; i < num_eos; i++)
-        {
-            ExecutionObject *eo_dla = execution_objects_dla[i].get();
-            size_t in_size  = eo_dla->GetInputBufferSizeInBytes();
-            size_t out_size = eo_dla->GetOutputBufferSizeInBytes();
-            ArgInfo in  = { ArgInfo(malloc(in_size),  in_size)  };
-            ArgInfo out = { ArgInfo(malloc(out_size), out_size) };
-            eo_dla->SetInputOutputBuffer(in, out);
-
-            ExecutionObject *eo_dsp = execution_objects_dsp[i].get();
-            size_t out2_size = eo_dsp->GetOutputBufferSizeInBytes();
-            ArgInfo out2 = { ArgInfo(malloc(out2_size), out2_size) };
-            eo_dsp->SetInputOutputBuffer(out, out2);
-
-            buffers.push_back(in.ptr());
-            buffers.push_back(out.ptr());
-            buffers.push_back(out2.ptr());
-        }
-
-        #define MAX_NUM_EOS  4
-        struct timespec t0[MAX_NUM_EOS], t1, tloop0, tloop1;
-        clock_gettime(CLOCK_MONOTONIC, &tloop0);
-
-        // Process frames with available execution objects in a pipelined manner
-        // additional num_eos iterations to flush the pipeline (epilogue)
-        ExecutionObject *eo_dla, *eo_dsp, *eo_input;
-        for (int frame_idx = 0;
-             frame_idx < num_frames + num_eos; frame_idx++)
+        Executor* e_eve = CreateExecutor(DeviceType::EVE, opts.num_eves, c, 1);
+        Executor* e_dsp = CreateExecutor(DeviceType::DSP, opts.num_dsps, c, 2);
+
+        // Construct ExecutionObjectPipeline that utilizes multiple
+        // ExecutionObjects to process a single frame, each ExecutionObject
+        // processes one layerGroup of the network
+        //
+        // Pipeline depth can enable more optimized pipeline execution:
+        // Given one EVE and one DSP as an example, with different
+        //     pipeline_depth, we have different execution behavior:
+        // If pipeline_depth is set to 1,
+        //    we create one EOP: eop0 (eve0, dsp0)
+        //    pipeline execution of multiple frames over time is as follows:
+        //    --------------------- time ------------------->
+        //    eop0: [eve0...][dsp0]
+        //    eop0:                [eve0...][dsp0]
+        //    eop0:                               [eve0...][dsp0]
+        //    eop0:                                              [eve0...][dsp0]
+        // If pipeline_depth is set to 2,
+        //    we create two EOPs: eop0 (eve0, dsp0), eop1(eve0, dsp0)
+        //    pipeline execution of multiple frames over time is as follows:
+        //    --------------------- time ------------------->
+        //    eop0: [eve0...][dsp0]
+        //    eop1:          [eve0...][dsp0]
+        //    eop0:                   [eve0...][dsp0]
+        //    eop1:                            [eve0...][dsp0]
+        // Additional benefit of setting pipeline_depth to 2 is that
+        //    it can also overlap host ReadFrame() with device processing:
+        //    --------------------- time ------------------->
+        //    eop0: [RF][eve0...][dsp0]
+        //    eop1:     [RF]     [eve0...][dsp0]
+        //    eop0:                    [RF][eve0...][dsp0]
+        //    eop1:                             [RF][eve0...][dsp0]
+        vector<ExecutionObjectPipeline *> eops;
+        uint32_t pipeline_depth = 2;  // 2 EOs in EOP -> depth 2
+        for (uint32_t j = 0; j < pipeline_depth; j++)
+            for (uint32_t i = 0; i < max(opts.num_eves, opts.num_dsps); i++)
+                eops.push_back(new ExecutionObjectPipeline(
+                      {(*e_eve)[i%opts.num_eves], (*e_dsp)[i%opts.num_dsps]}));
+        uint32_t num_eops = eops.size();
+
+        // Allocate input/output memory for each EOP
+        AllocateMemory(eops);
+
+        chrono::time_point<chrono::steady_clock> tloop0, tloop1;
+        tloop0 = chrono::steady_clock::now();
+
+        // Process frames with available eops in a pipelined manner
+        // additional num_eops iterations to flush pipeline (epilogue)
+        for (uint32_t frame_idx = 0;
+             frame_idx < opts.num_frames + num_eops; frame_idx++)
          {
-            eo_dla = execution_objects_dla[frame_idx % num_eos].get();
-            eo_dsp = execution_objects_dsp[frame_idx % num_eos].get();
+            ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];
  
-            // Wait for previous frame on the same eo to finish processing
-            if (eo_dsp->ProcessFrameWait())
+            // Wait for previous frame on the same eop to finish processing
+            if (eop->ProcessFrameWait())
              {
-                int finished_idx = eo_dsp->GetFrameIndex();
-                clock_gettime(CLOCK_MONOTONIC, &t1);
-                ReportTime(finished_idx, "DSP",
-                           ms_diff(t0[finished_idx % num_eos], t1),
-                           eo_dsp->GetProcessTimeInMilliSeconds());
-
-                eo_input = execution_objects_dla[finished_idx % num_eos].get();
-                WriteFrameOutput(*eo_input, *eo_dsp, configuration);
+                ReportTime(eop);
+                WriteFrameOutput(*eop, c, opts);
              }
  
              // Read a frame and start processing it with current eo
-            if (ReadFrame(*eo_dla, frame_idx, configuration, num_frames,
-                          image_file, cap))
-            {
-                clock_gettime(CLOCK_MONOTONIC, &t0[frame_idx % num_eos]);
-                eo_dla->ProcessFrameStartAsync();
-
-                if (eo_dla->ProcessFrameWait())
-                {
-                    clock_gettime(CLOCK_MONOTONIC, &t1);
-                    ReportTime(frame_idx, "DLA",
-                               ms_diff(t0[frame_idx % num_eos], t1),
-                               eo_dla->GetProcessTimeInMilliSeconds());
-
-                    clock_gettime(CLOCK_MONOTONIC, &t0[frame_idx % num_eos]);
-                    eo_dsp->ProcessFrameStartAsync();
-                }
-            }
+            if (ReadFrame(*eop, frame_idx, c, opts, cap))
+                eop->ProcessFrameStartAsync();
          }
  
-        clock_gettime(CLOCK_MONOTONIC, &tloop1);
-        std::cout << "Loop total time (including read/write/print/etc): "
-                  << std::setw(6) << std::setprecision(4)
-                  << ms_diff(tloop0, tloop1) << "ms" << std::endl;
+        tloop1 = chrono::steady_clock::now();
+        chrono::duration<float> elapsed = tloop1 - tloop0;
+        cout << "Loop total time (including read/write/opencv/print/etc): "
+                  << setw(6) << setprecision(4)
+                  << (elapsed.count() * 1000) << "ms" << endl;
  
-        for (auto b : buffers)
-            free(b);
+        FreeMemory(eops);
+        for (auto eop : eops)  delete eop;
+        delete e_eve;
+        delete e_dsp;
      }
      catch (tidl::Exception &e)
      {
-        std::cerr << e.what() << std::endl;
+        cerr << e.what() << endl;
          status = false;
      }
  
      return status;
  }
  
-void ReportTime(int frame_index, std::string device_name, double elapsed_host,
-                double elapsed_device)
+// Create an Executor with the specified type and number of EOs
+Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
+                         int layers_group_id)
  {
-    double overhead = 100 - (elapsed_device/elapsed_host*100);
-    std::cout << "frame[" << frame_index << "]: "
-              << "Time on " << device_name << ": "
-              << std::setw(6) << std::setprecision(4)
-              << elapsed_device << "ms, "
-              << "host: "
-              << std::setw(6) << std::setprecision(4)
-              << elapsed_host << "ms ";
-    std::cout << "API overhead: "
-              << std::setw(6) << std::setprecision(3)
-              << overhead << " %" << std::endl;
-}
+    if (num == 0) return nullptr;
  
+    DeviceIds ids;
+    for (uint32_t i = 0; i < num; i++)
+        ids.insert(static_cast<DeviceId>(i));
  
-bool ReadFrame(ExecutionObject &eo, int frame_idx,
-               const Configuration& configuration, int num_frames,
-               std::string& image_file, VideoCapture &cap)
+    return new Executor(dt, ids, c, layers_group_id);
+}
+
+bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
+               const Configuration& c, const cmdline_opts_t& opts,
+               VideoCapture &cap)
  {
-    if (frame_idx >= num_frames)
+    if ((uint32_t)frame_idx >= opts.num_frames)
          return false;
-    eo.SetFrameIndex(frame_idx);
  
-    char*  frame_buffer = eo.GetInputBufferPtr();
+    eop.SetFrameIndex(frame_idx);
+
+    char*  frame_buffer = eop.GetInputBufferPtr();
      assert (frame_buffer != nullptr);
-    int channel_size = configuration.inWidth * configuration.inHeight;
+    int channel_size = c.inWidth * c.inHeight;
  
      Mat image;
-    if (! image_file.empty())
+    if (!opts.is_camera_input && !opts.is_video_input)
      {
-        if (is_preprocessed_input)
+        if (opts.input_file.empty())
          {
-            std::ifstream ifs(image_file, std::ios::binary);
-            ifs.seekg(frame_idx * channel_size * 3);
+            ifstream ifs(DEFAULT_INPUT, ios::binary);
+            ifs.seekg((frame_idx % DEFAULT_INPUT_FRAMES) * channel_size * 3);
              ifs.read(frame_buffer, channel_size * 3);
              bool ifs_status = ifs.good();
              ifs.close();
-            orig_width  = configuration.inWidth;
-            orig_height = configuration.inHeight;
+            orig_width  = c.inWidth;
+            orig_height = c.inHeight;
              return ifs_status;  // already PreProc-ed
          }
          else
          {
-            image = cv::imread(image_file, CV_LOAD_IMAGE_COLOR);
+            image = cv::imread(opts.input_file, CV_LOAD_IMAGE_COLOR);
              if (image.empty())
              {
-                std::cerr << "Unable to read from: " << image_file << std::endl;
+                cerr << "Unable to read from: " << opts.input_file << endl;
                  return false;
              }
          }
@@ -357,8 +305,7 @@ bool ReadFrame(ExecutionObject &eo, int frame_idx,
      Mat s_image, bgr_frames[3];
      orig_width  = image.cols;
      orig_height = image.rows;
-    cv::resize(image, s_image,
-               Size(configuration.inWidth, configuration.inHeight),
+    cv::resize(image, s_image, Size(c.inWidth, c.inHeight),
                 0, 0, cv::INTER_AREA);
      cv::split(s_image, bgr_frames);
      memcpy(frame_buffer,                bgr_frames[0].ptr(), channel_size);
@@ -368,25 +315,24 @@ bool ReadFrame(ExecutionObject &eo, int frame_idx,
  }
  
  // Create frame with boxes drawn around classified objects
-bool WriteFrameOutput(const ExecutionObject &eo_in,
-                      const ExecutionObject &eo_out,
-                      const Configuration& configuration)
+bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
+                      const Configuration& c, const cmdline_opts_t& opts)
  {
      // Asseembly original frame
-    int width  = configuration.inWidth;
-    int height = configuration.inHeight;
+    int width  = c.inWidth;
+    int height = c.inHeight;
      int channel_size = width * height;
      Mat frame, r_frame, bgr[3];
  
-    unsigned char *in = (unsigned char *) eo_in.GetInputBufferPtr();
+    unsigned char *in = (unsigned char *) eop.GetInputBufferPtr();
      bgr[0] = Mat(height, width, CV_8UC(1), in);
      bgr[1] = Mat(height, width, CV_8UC(1), in + channel_size);
      bgr[2] = Mat(height, width, CV_8UC(1), in + channel_size*2);
      cv::merge(bgr, 3, frame);
  
-    int frame_index = eo_in.GetFrameIndex();
+    int frame_index = eop.GetFrameIndex();
      char outfile_name[64];
-    if (! is_camera_input && is_preprocessed_input)
+    if (opts.input_file.empty())
      {
          snprintf(outfile_name, 64, "frame_%d.png", frame_index);
          cv::imwrite(outfile_name, frame);
@@ -394,15 +340,14 @@ bool WriteFrameOutput(const ExecutionObject &eo_in,
      }
  
      // Draw boxes around classified objects
-    float *out = (float *) eo_out.GetOutputBufferPtr();
-    int num_floats = eo_out.GetOutputBufferSizeInBytes() / sizeof(float);
+    float *out = (float *) eop.GetOutputBufferPtr();
+    int num_floats = eop.GetOutputBufferSizeInBytes() / sizeof(float);
      for (int i = 0; i < num_floats / 7; i++)
      {
          int index = (int)    out[i * 7 + 0];
          if (index < 0)  break;
  
          int   label = (int)  out[i * 7 + 1];
-        float score =        out[i * 7 + 2];
          int   xmin  = (int) (out[i * 7 + 3] * width);
          int   ymin  = (int) (out[i * 7 + 4] * height);
          int   xmax  = (int) (out[i * 7 + 5] * width);
@@ -423,9 +368,13 @@ bool WriteFrameOutput(const ExecutionObject &eo_in,
                               object_class->color.red), 2);
      }
  
-    // output
-    cv::resize(frame, r_frame, Size(orig_width, orig_height));
-    if (is_camera_input)
+    // Resize to output width/height, keep aspect ratio
+    uint32_t output_width = opts.output_width;
+    if (output_width == 0)  output_width = orig_width;
+    uint32_t output_height = (output_width*1.0f) / orig_width * orig_height;
+    cv::resize(frame, r_frame, Size(output_width, output_height));
+
+    if (opts.is_camera_input || opts.is_video_input)
      {
          cv::imshow("SSD_Multibox", r_frame);
          waitKey(1);
@@ -441,77 +390,28 @@ bool WriteFrameOutput(const ExecutionObject &eo_in,
      return true;
  }
  
-
-void ProcessArgs(int argc, char *argv[], std::string& config,
-                 uint32_t& num_devices, DeviceType& device_type,
-                 std::string& input_file)
-{
-    const struct option long_options[] =
-    {
-        {"config",      required_argument, 0, 'c'},
-        {"num_devices", required_argument, 0, 'n'},
-        {"image_file",  required_argument, 0, 'i'},
-        {"help",        no_argument,       0, 'h'},
-        {"verbose",     no_argument,       0, 'v'},
-        {0, 0, 0, 0}
-    };
-
-    int option_index = 0;
-
-    while (true)
-    {
-        int c = getopt_long(argc, argv, "c:n:i:hv", long_options, &option_index);
-
-        if (c == -1)
-            break;
-
-        switch (c)
-        {
-            case 'c': config = optarg;
-                      break;
-
-            case 'n': num_devices = atoi(optarg);
-                      assert (num_devices > 0 && num_devices <= 4);
-                      break;
-
-            case 'i': input_file = optarg;
-                      break;
-
-            case 'v': __TI_show_debug_ = true;
-                      break;
-
-            case 'h': DisplayHelp();
-                      exit(EXIT_SUCCESS);
-                      break;
-
-            case '?': // Error in getopt_long
-                      exit(EXIT_FAILURE);
-                      break;
-
-            default:
-                      std::cerr << "Unsupported option: " << c << std::endl;
-                      break;
-        }
-    }
-}
-
  void DisplayHelp()
  {
-    std::cout << "Usage: ssd_multibox\n"
-                 "  Will run partitioned ssd_multibox network to perform "
-                 "multi-objects detection\n"
-                 "  and classification.  First part of network "
-                 "(layersGroupId 1) runs on DLA,\n"
-                 "  second part (layersGroupId 2) runs on DSP.\n"
-                 "  Use -c to run a different segmentation network. "
-                 "Default is jdetnet.\n"
-                 "Optional arguments:\n"
-                 " -c <config>          Valid configs: jdetnet \n"
-                 " -n <number of cores> Number of cores to use (1 - 4)\n"
-                 " -i <image>           Path to the image file\n"
-                 "                      Default is 1 frame in testvecs\n"
-                 " -i camera            Use camera as input\n"
-                 " -v                   Verbose output during execution\n"
-                 " -h                   Help\n";
+    std::cout <<
+    "Usage: ssd_multibox\n"
+    "  Will run partitioned ssd_multibox network to perform "
+    "multi-objects detection\n"
+    "  and classification.  First part of network "
+    "(layersGroupId 1) runs on EVE,\n"
+    "  second part (layersGroupId 2) runs on DSP.\n"
+    "  Use -c to run a different segmentation network.  Default is jdetnet.\n"
+    "Optional arguments:\n"
+    " -c <config>          Valid configs: jdetnet \n"
+    " -d <number>          Number of dsp cores to use\n"
+    " -e <number>          Number of eve cores to use\n"
+    " -i <image>           Path to the image file as input\n"
+    "                      Default are 9 frames in testvecs\n"
+    " -i camera<number>    Use camera as input\n"
+    "                      video input port: /dev/video<number>\n"
+    " -i <name>.{mp4,mov,avi}  Use video file as input\n"
+    " -f <number>          Number of frames to process\n"
+    " -w <number>          Output image/video width\n"
+    " -v                   Verbose output during execution\n"
+    " -h                   Help\n";
  }