PLSDK-2597
authorDjordje Senicic <x0157990@ti.com>
Thu, 28 Feb 2019 13:47:43 +0000 (08:47 -0500)
committerDjordje Senicic <x0157990@ti.com>
Thu, 28 Feb 2019 13:47:43 +0000 (08:47 -0500)
  - SSD_Multibox: updated to include slider for run-time probability modification
  - SSD_Multibox: skip grabbing frame input multiple times, as real-time would very based on multicore configuration and network complexity
  - SSD_Multibox: resize and central cropping added; instead of showing rectangles in original image, network input is presented
  - Classification: Toydogs configuration added including models

Signed-off-by: Djordje Senicic <x0157990@ti.com>
examples/classification/clips/toydogs_854x480.mp4 [new file with mode: 0644]
examples/classification/main.cpp
examples/classification/stream_config_toydogs.txt [new file with mode: 0644]
examples/classification/toydogsclasses.txt [new file with mode: 0644]
examples/classification/toydogsnet.txt [new file with mode: 0644]
examples/ssd_multibox/main.cpp
examples/test/testvecs/config/tidl_models/tidl_net_toydogs_jacintonet11v2.bin [new file with mode: 0644]
examples/test/testvecs/config/tidl_models/tidl_param_toydogs_jacintonet11v2.bin [new file with mode: 0644]

diff --git a/examples/classification/clips/toydogs_854x480.mp4 b/examples/classification/clips/toydogs_854x480.mp4
new file mode 100644 (file)
index 0000000..dee5f04
Binary files /dev/null and b/examples/classification/clips/toydogs_854x480.mp4 differ
index 22316063f8300be3475dedad51265d6581e9be30..020004bb2263322e3660f9ebf05274ec66402e03 100644 (file)
@@ -530,10 +530,10 @@ void DisplayFrame(const ExecutionObjectPipeline* eop, VideoWriter& writer,
         {
             // overlay the display window, if ball seen during last two times
             cv::putText(show_image, labels_classes[rpt_id].c_str(),
         {
             // overlay the display window, if ball seen during last two times
             cv::putText(show_image, labels_classes[rpt_id].c_str(),
-                cv::Point(rectCrop[r].x + 5,rectCrop[r].y + 20), // Coordinates
+                cv::Point(rectCrop[r].x + 5,rectCrop[r].y + 32), // Coordinates
                 cv::FONT_HERSHEY_COMPLEX_SMALL, // Font
                 cv::FONT_HERSHEY_COMPLEX_SMALL, // Font
-                1.0, // Scale. 2.0 = 2x bigger
-                cv::Scalar(0,0,255), // Color
+                1.5, // Scale. 2.0 = 2x bigger
+                cv::Scalar(0,0,0), // Color
                 1, // Thickness
                 8); // Line type
             cv::rectangle(show_image, rectCrop[r], Scalar(255,0,0), 3);
                 1, // Thickness
                 8); // Line type
             cv::rectangle(show_image, rectCrop[r], Scalar(255,0,0), 3);
diff --git a/examples/classification/stream_config_toydogs.txt b/examples/classification/stream_config_toydogs.txt
new file mode 100644 (file)
index 0000000..e64d1e8
--- /dev/null
@@ -0,0 +1,11 @@
+numFrames   = 999900
+inData   = /usr/share/ti/tidl/examples/test/testvecs/input/preproc_0_224x224.y
+outData   = "/usr/share/ti/tidl/examples/classification/stats_tool_out.bin"
+netBinFile      = "/usr/share/ti/tidl/examples/test/testvecs/config/tidl_models/tidl_net_toydogs_jacintonet11v2.bin"
+paramsBinFile   = "/usr/share/ti/tidl/examples/test/testvecs/config/tidl_models/tidl_param_toydogs_jacintonet11v2.bin"
+preProcType = 0
+inWidth = 224
+inHeight = 224
+inNumChannels = 3
+layerIndex2LayerGroupId = { {12, 2}, {13, 2}, {14, 2} }
+
diff --git a/examples/classification/toydogsclasses.txt b/examples/classification/toydogsclasses.txt
new file mode 100644 (file)
index 0000000..b4729f4
--- /dev/null
@@ -0,0 +1,9 @@
+shar_pei
+golden_retriever
+afghan_hound
+dachshund
+german_shepherd
+labrador_retriever
+pomeranian
+rottweiler
+chow
diff --git a/examples/classification/toydogsnet.txt b/examples/classification/toydogsnet.txt
new file mode 100644 (file)
index 0000000..b4729f4
--- /dev/null
@@ -0,0 +1,9 @@
+shar_pei
+golden_retriever
+afghan_hound
+dachshund
+german_shepherd
+labrador_retriever
+pomeranian
+rottweiler
+chow
index 6f7a5aff76b32499b3db35b70b8338ff0694eb85..10714c74ae5d99409a7c4e2b97c9064dc0059ee0 100644 (file)
@@ -61,6 +61,10 @@ using namespace cv;
 #define DEFAULT_OBJECT_CLASSES_LIST_FILE "./jdetnet_voc_objects.json"
 #define DEFAULT_OUTPUT_PROB_THRESHOLD  25
 
 #define DEFAULT_OBJECT_CLASSES_LIST_FILE "./jdetnet_voc_objects.json"
 #define DEFAULT_OUTPUT_PROB_THRESHOLD  25
 
+/* Enable this macro to record individual output files and */
+/* resized, cropped network input files                    */
+#define DEBUG_FILES
+
 std::unique_ptr<ObjectClasses> object_classes;
 uint32_t orig_width;
 uint32_t orig_height;
 std::unique_ptr<ObjectClasses> object_classes;
 uint32_t orig_width;
 uint32_t orig_height;
@@ -77,6 +81,19 @@ bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
                       const Configuration& c, const cmdline_opts_t& opts);
 static void DisplayHelp();
 
                       const Configuration& c, const cmdline_opts_t& opts);
 static void DisplayHelp();
 
+/***************************************************************/
+/* Slider to control detection confidence level                */
+/***************************************************************/
+int prob_slider = DEFAULT_OUTPUT_PROB_THRESHOLD;
+int prob_slider_max = 100;
+static void on_trackbar( int slider_id, void *inst )
+{
+  //This function is invoked on every slider move. 
+  //No action required, since prob_slider is automatically updated.
+  //But, for any additional operation on slider move, this is the place to insert code.
+  //std::cout << "slider moved to:" << prob_slider << " max val is:" << prob_slider_max << endl;
+}
+
 
 int main(int argc, char *argv[])
 {
 
 int main(int argc, char *argv[])
 {
@@ -148,11 +165,15 @@ bool RunConfiguration(const cmdline_opts_t& opts)
         return false;
     }
     c.enableApiTrace = opts.verbose;
         return false;
     }
     c.enableApiTrace = opts.verbose;
-
     // setup camera/video input
     VideoCapture cap;
     if (! SetVideoInputOutput(cap, opts, "SSD_Multibox"))  return false;
 
     // setup camera/video input
     VideoCapture cap;
     if (! SetVideoInputOutput(cap, opts, "SSD_Multibox"))  return false;
 
+    char TrackbarName[50];
+    prob_slider = (int)floor(opts.output_prob_threshold);
+    sprintf( TrackbarName, "Prob(%d %%)", prob_slider_max );
+    createTrackbar( TrackbarName, "SSD_Multibox", &prob_slider, prob_slider_max, on_trackbar );
+
     // setup preprocessed input
     ifstream ifs;
     if (opts.is_preprocessed_input)
     // setup preprocessed input
     ifstream ifs;
     if (opts.is_preprocessed_input)
@@ -308,22 +329,60 @@ bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
     }
     else
     {
     }
     else
     {
-        // 640x480 camera input, process one in every 5 frames,
-        // can adjust number of skipped frames to match real time processing
-        if (! cap.grab())  return false;
-        if (! cap.grab())  return false;
-        if (! cap.grab())  return false;
-        if (! cap.grab())  return false;
-        if (! cap.grab())  return false;
-        if (! cap.retrieve(image)) return false;
+        if(opts.is_camera_input)
+        {
+           if (! cap.grab()) return false;
+           if (! cap.retrieve(image)) return false;
+        } 
+        else
+        { // Video clip
+           if (cap.grab()) 
+           {
+             if (! cap.retrieve(image)) return false;
+           } else {
+             //Rewind!
+             std::cout << "Video clip rewinded!" << std::endl;
+             cap.set(CAP_PROP_POS_FRAMES, 0);
+             if (! cap.grab()) return false;
+             if (! cap.retrieve(image)) return false;
+           }
+        }
     }
 
     }
 
-    // scale to network input size
+    // Scale to network input size:
+    // Preserve aspect ratio, by doing central cropping
+    // Choose vertical or horizontal central cropping based on dimension reduction
     Mat s_image, bgr_frames[3];
     orig_width  = image.cols;
     orig_height = image.rows;
     Mat s_image, bgr_frames[3];
     orig_width  = image.cols;
     orig_height = image.rows;
-    cv::resize(image, s_image, Size(c.inWidth, c.inHeight),
-               0, 0, cv::INTER_AREA);
+    if(orig_width > orig_height)
+    {
+       float change_width  = (float)c.inWidth / (float)orig_width;
+       float change_height = (float)c.inHeight / (float)orig_height; 
+       if(change_width < change_height)
+       { // E.g. for 1920x1080->512x512, we first crop central part roi(420, 0, 1080, 1080), then resize to (512x512)
+         int offset_x = (int)round(0.5 * ((float)orig_width - ((float)orig_height * (float)c.inWidth / (float)c.inHeight)));
+         cv::resize(image(Rect(offset_x, 0, orig_width - 2 * offset_x, orig_height)), s_image, Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
+       } else {
+         // E.g. for 1920x1080->768x320, we first crop central part roi(0, 140, 1920, 800), then resize to (768x320)
+         int offset_y = (int)round(0.5 * ((float)orig_height - ((float)orig_width * (float)c.inHeight / (float)c.inWidth)));
+         cv::resize(image(Rect(0, offset_y, orig_width, orig_height - 2 * offset_y)), s_image, Size(c.inWidth, c.inHeight), 0, 0, cv::INTER_AREA);
+       }
+    }
+
+    #ifdef DEBUG_FILES
+    {
+      // Image files can be converted into video using, example script
+      // (on desktop Ubuntu, with ffmpeg installed):
+      // ffmpeg -i netin_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
+      // Update width 768, height 320, if necessary 
+      char netin_name[80];
+      sprintf(netin_name, "netin_%04d.png", frame_idx);
+      cv::imwrite(netin_name, s_image);
+      std::cout << "Video input, width:" << orig_width << " height:" << orig_height << " Network width:" << c.inWidth << " height:" << c.inHeight << std::endl;
+    }
+    #endif
+
     cv::split(s_image, bgr_frames);
     memcpy(frame_buffer,                bgr_frames[0].ptr(), channel_size);
     memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
     cv::split(s_image, bgr_frames);
     memcpy(frame_buffer,                bgr_frames[0].ptr(), channel_size);
     memcpy(frame_buffer+1*channel_size, bgr_frames[1].ptr(), channel_size);
@@ -365,7 +424,7 @@ bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
         if (index < 0)  break;
 
         float score =        out[i * 7 + 2];
         if (index < 0)  break;
 
         float score =        out[i * 7 + 2];
-        if (score * 100 < opts.output_prob_threshold)  continue;
+        if (score * 100 < (float)prob_slider)  continue;
 
         int   label = (int)  out[i * 7 + 1];
         int   xmin  = (int) (out[i * 7 + 3] * width);
 
         int   label = (int)  out[i * 7 + 1];
         int   xmin  = (int) (out[i * 7 + 3] * width);
@@ -375,10 +434,10 @@ bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
 
         const ObjectClass& object_class = object_classes->At(label);
 
 
         const ObjectClass& object_class = object_classes->At(label);
 
-#if 0
-        printf("%2d: (%d, %d) -> (%d, %d): %s, score=%f\n",
+        if(opts.verbose) {
+            printf("%2d: (%d, %d) -> (%d, %d): %s, score=%f\n",
                i, xmin, ymin, xmax, ymax, object_class.label.c_str(), score);
                i, xmin, ymin, xmax, ymax, object_class.label.c_str(), score);
-#endif
+        }
 
         if (xmin < 0)       xmin = 0;
         if (ymin < 0)       ymin = 0;
 
         if (xmin < 0)       xmin = 0;
         if (ymin < 0)       ymin = 0;
@@ -390,15 +449,18 @@ bool WriteFrameOutput(const ExecutionObjectPipeline& eop,
                              object_class.color.red), 2);
     }
 
                              object_class.color.red), 2);
     }
 
-    // Resize to output width/height, keep aspect ratio
-    uint32_t output_width = opts.output_width;
-    if (output_width == 0)  output_width = orig_width;
-    uint32_t output_height = (output_width*1.0f) / orig_width * orig_height;
-    cv::resize(frame, r_frame, Size(output_width, output_height));
-
+    r_frame = frame;
     if (opts.is_camera_input || opts.is_video_input)
     {
         cv::imshow("SSD_Multibox", r_frame);
     if (opts.is_camera_input || opts.is_video_input)
     {
         cv::imshow("SSD_Multibox", r_frame);
+#ifdef DEBUG_FILES
+        // Image files can be converted into video using, example script
+        // (on desktop Ubuntu, with ffmpeg installed):
+        // ffmpeg -i multibox_%04d.png -vf "scale=(iw*sar)*max(768/(iw*sar)\,320/ih):ih*max(768/(iw*sar)\,320/ih), crop=768:320" -b:v 4000k out.mp4
+        // Update width 768, height 320, if necessary 
+        snprintf(outfile_name, 64, "multibox_%04d.png", frame_index);
+        cv::imwrite(outfile_name, r_frame);
+#endif
         waitKey(1);
     }
     else
         waitKey(1);
     }
     else
diff --git a/examples/test/testvecs/config/tidl_models/tidl_net_toydogs_jacintonet11v2.bin b/examples/test/testvecs/config/tidl_models/tidl_net_toydogs_jacintonet11v2.bin
new file mode 100644 (file)
index 0000000..f4ca1d8
Binary files /dev/null and b/examples/test/testvecs/config/tidl_models/tidl_net_toydogs_jacintonet11v2.bin differ
diff --git a/examples/test/testvecs/config/tidl_models/tidl_param_toydogs_jacintonet11v2.bin b/examples/test/testvecs/config/tidl_models/tidl_param_toydogs_jacintonet11v2.bin
new file mode 100644 (file)
index 0000000..4ee32f2
Binary files /dev/null and b/examples/test/testvecs/config/tidl_models/tidl_param_toydogs_jacintonet11v2.bin differ