summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuan Zhao2018-09-12 15:01:53 -0500
committerYuan Zhao2018-09-12 15:01:53 -0500
commit7e9a3942ec38efd64d45e34c10cba2f2938f5618 (patch)
tree023b1c79f5e5327000bd33a5d38a22ae3309730a
parenta229faf1c14ddfa48e44d271ddfad44e4e7fc033 (diff)
parent44937ff4f0e2766eee61d55bda405c0e601123e7 (diff)
downloadtidl-api-7e9a3942ec38efd64d45e34c10cba2f2938f5618.tar.gz
tidl-api-7e9a3942ec38efd64d45e34c10cba2f2938f5618.tar.xz
tidl-api-7e9a3942ec38efd64d45e34c10cba2f2938f5618.zip
Merge branch 'hotfix/v01.01.00.01'v01.01.00.01
-rw-r--r--examples/classification/avg_fps_window.h89
-rw-r--r--examples/classification/main.cpp39
-rw-r--r--tidl_api/make.buildid2
3 files changed, 116 insertions, 14 deletions
diff --git a/examples/classification/avg_fps_window.h b/examples/classification/avg_fps_window.h
new file mode 100644
index 0000000..b8b5644
--- /dev/null
+++ b/examples/classification/avg_fps_window.h
@@ -0,0 +1,89 @@
1/******************************************************************************
2 * Copyright (c) 2018, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#pragma once
30
31#include <vector>
32#include <chrono>
33
34#define MAX_WINDOW_SIZE 64
35#define INIT_FRAME_TIME 0.001
36
37// Compute average FPS across a sliding window of frames
38class AvgFPSWindow
39{
40 public:
41 AvgFPSWindow(uint32_t window_size) :
42 window_size_m(window_size), circ_idx_m(0), total_time_m(0.0)
43 {
44 if (window_size_m == 0 || window_size_m > MAX_WINDOW_SIZE)
45 window_size_m = MAX_WINDOW_SIZE;
46 history_times_m.assign(window_size_m, INIT_FRAME_TIME);
47 frame_time_m = INIT_FRAME_TIME;
48 total_time_m = window_size_m * INIT_FRAME_TIME;
49 t0_m = std::chrono::steady_clock::now();
50 }
51
52 // Invoked per loop iteration to capture frame time
53 void Tick()
54 {
55 t1_m = std::chrono::steady_clock::now();
56 std::chrono::duration<double> elapsed = t1_m - t0_m;
57 frame_time_m = elapsed.count(); // in seconds
58 t0_m = t1_m;
59 }
60
61 // Update the frame_time_m into circular array for history timing
62 // Reading will only become valid after window_size_m frames
63 // Return updated average FPS
64 double UpdateAvgFPS()
65 {
66 total_time_m += frame_time_m - history_times_m[circ_idx_m];
67 history_times_m[circ_idx_m] = frame_time_m;
68 circ_idx_m = (circ_idx_m + 1) % window_size_m;
69 return (1.0 * window_size_m) / total_time_m;
70 }
71
72 // Return average FPS
73 double GetAvgFPS()
74 {
75 return (1.0 * window_size_m) / total_time_m;
76 }
77
78 AvgFPSWindow() =delete;
79 AvgFPSWindow(const AvgFPSWindow&) =delete;
80 AvgFPSWindow& operator=(const AvgFPSWindow&) =delete;
81
82 private:
83 uint32_t window_size_m;
84 uint32_t circ_idx_m;
85 double total_time_m;
86 std::vector<double> history_times_m;
87 std::chrono::time_point<std::chrono::steady_clock> t0_m, t1_m;
88 double frame_time_m;
89};
diff --git a/examples/classification/main.cpp b/examples/classification/main.cpp
index 21c05a7..749b713 100644
--- a/examples/classification/main.cpp
+++ b/examples/classification/main.cpp
@@ -43,12 +43,14 @@
43#include "execution_object.h" 43#include "execution_object.h"
44#include "execution_object_pipeline.h" 44#include "execution_object_pipeline.h"
45#include "configuration.h" 45#include "configuration.h"
46#include "avg_fps_window.h"
46 47
47#include "opencv2/core.hpp" 48#include "opencv2/core.hpp"
48#include "opencv2/imgproc.hpp" 49#include "opencv2/imgproc.hpp"
49#include "opencv2/highgui.hpp" 50#include "opencv2/highgui.hpp"
50#include "opencv2/videoio.hpp" 51#include "opencv2/videoio.hpp"
51 52
53
52//#define TWO_ROIs 54//#define TWO_ROIs
53#define LIVE_DISPLAY 55#define LIVE_DISPLAY
54#define PERF_VERBOSE 56#define PERF_VERBOSE
@@ -105,7 +107,8 @@ void imagenetCallBackFunc(int event, int x, int y, int flags, void* userdata)
105Mat in_image, image, r_image, cnn_image, show_image, bgr_frames[3]; 107Mat in_image, image, r_image, cnn_image, show_image, bgr_frames[3];
106Mat to_stream; 108Mat to_stream;
107Rect rectCrop[NUM_ROI]; 109Rect rectCrop[NUM_ROI];
108double avg_fps; 110// Report average FPS across a sliding window of 16 frames
111AvgFPSWindow fps_window(16);
109 112
110static int tf_postprocess(uchar *in, int size, int roi_idx, int frame_idx, int f_id); 113static int tf_postprocess(uchar *in, int size, int roi_idx, int frame_idx, int f_id);
111static void tf_preprocess(uchar *out, uchar *in, int size); 114static void tf_preprocess(uchar *out, uchar *in, int size);
@@ -229,7 +232,6 @@ bool RunConfiguration(const std::string& config_file, int num_layers_groups, uin
229 for (int k = 0; k < NUM_ROI; k++) 232 for (int k = 0; k < NUM_ROI; k++)
230 for(int i = 0; i < 3; i ++) 233 for(int i = 0; i < 3; i ++)
231 selclass_history[k][i] = -1; 234 selclass_history[k][i] = -1;
232 avg_fps = 0.0;
233 int num_frames = configuration.numFrames; 235 int num_frames = configuration.numFrames;
234 std::cout << "About to start ProcessFrame loop!!" << std::endl; 236 std::cout << "About to start ProcessFrame loop!!" << std::endl;
235 237
@@ -249,6 +251,7 @@ bool RunConfiguration(const std::string& config_file, int num_layers_groups, uin
249 DisplayFrame(eop, writer, frame_idx, num_eops, 251 DisplayFrame(eop, writer, frame_idx, num_eops,
250 num_eves, num_dsps); 252 num_eves, num_dsps);
251 } 253 }
254 fps_window.Tick();
252 255
253 if (ReadFrame(eop, frame_idx, num_frames, cap, writer)) 256 if (ReadFrame(eop, frame_idx, num_frames, cap, writer))
254 eop->ProcessFrameStartAsync(); 257 eop->ProcessFrameStartAsync();
@@ -289,6 +292,7 @@ bool CreateExecutionObjectPipelines(uint32_t num_eves, uint32_t num_dsps,
289 ids_eve.insert(static_cast<DeviceId>(i)); 292 ids_eve.insert(static_cast<DeviceId>(i));
290 for (uint32_t i = 0; i < num_dsps; i++) 293 for (uint32_t i = 0; i < num_dsps; i++)
291 ids_dsp.insert(static_cast<DeviceId>(i)); 294 ids_dsp.insert(static_cast<DeviceId>(i));
295 const uint32_t buffer_factor = 2;
292 296
293 switch(num_layers_groups) 297 switch(num_layers_groups)
294 { 298 {
@@ -301,10 +305,15 @@ bool CreateExecutionObjectPipelines(uint32_t num_eves, uint32_t num_dsps,
301 // Construct ExecutionObjectPipeline with single Execution Object to 305 // Construct ExecutionObjectPipeline with single Execution Object to
302 // process each frame. This is parallel processing of frames with 306 // process each frame. This is parallel processing of frames with
303 // as many DSP and EVE cores that we have on hand. 307 // as many DSP and EVE cores that we have on hand.
304 for (uint32_t i = 0; i < num_eves; i++) 308 // If buffer_factor == 2, duplicating EOPs for double buffering
305 eops.push_back(new ExecutionObjectPipeline({(*e_eve)[i]})); 309 // and overlapping host pre/post-processing with device processing
306 for (uint32_t i = 0; i < num_dsps; i++) 310 for (uint32_t j = 0; j < buffer_factor; j++)
307 eops.push_back(new ExecutionObjectPipeline({(*e_dsp)[i]})); 311 {
312 for (uint32_t i = 0; i < num_eves; i++)
313 eops.push_back(new ExecutionObjectPipeline({(*e_eve)[i]}));
314 for (uint32_t i = 0; i < num_dsps; i++)
315 eops.push_back(new ExecutionObjectPipeline({(*e_dsp)[i]}));
316 }
308 break; 317 break;
309 318
310 case 2: // Two layers group 319 case 2: // Two layers group
@@ -324,9 +333,15 @@ bool CreateExecutionObjectPipelines(uint32_t num_eves, uint32_t num_dsps,
324 // Construct ExecutionObjectPipeline that utilizes multiple 333 // Construct ExecutionObjectPipeline that utilizes multiple
325 // ExecutionObjects to process a single frame, each ExecutionObject 334 // ExecutionObjects to process a single frame, each ExecutionObject
326 // processes one layerGroup of the network 335 // processes one layerGroup of the network
327 for (uint32_t i = 0; i < std::max(num_eves, num_dsps); i++) 336 // If buffer_factor == 2, duplicating EOPs for pipelining at
328 eops.push_back(new ExecutionObjectPipeline({(*e_eve)[i%num_eves], 337 // EO level rather than at EOP level, in addition to double buffering
329 (*e_dsp)[i%num_dsps]})); 338 // and overlapping host pre/post-processing with device processing
339 for (uint32_t j = 0; j < buffer_factor; j++)
340 {
341 for (uint32_t i = 0; i < std::max(num_eves, num_dsps); i++)
342 eops.push_back(new ExecutionObjectPipeline(
343 {(*e_eve)[i%num_eves], (*e_dsp)[i%num_dsps]}));
344 }
330 break; 345 break;
331 346
332 default: 347 default:
@@ -585,10 +600,8 @@ void DisplayFrame(const ExecutionObjectPipeline* eop, VideoWriter& writer,
585 selected_items[k] == rpt_id ? cv::Scalar(0,0,255) : 600 selected_items[k] == rpt_id ? cv::Scalar(0,0,255) :
586 cv::Scalar(255,255,255), 1, 8); 601 cv::Scalar(255,255,255), 1, 8);
587 } 602 }
588 double elapsed_host = eop->GetHostProcessTimeInMilliSeconds(); 603
589 /* Exponential averaging */ 604 double avg_fps = fps_window.UpdateAvgFPS();
590 avg_fps = 0.1 * ((double)num_eops * 1000.0 /
591 ((double)NUM_ROI * elapsed_host)) + 0.9 * avg_fps;
592 sprintf(tmp_classwindow_string, "FPS:%5.2lf", avg_fps ); 605 sprintf(tmp_classwindow_string, "FPS:%5.2lf", avg_fps );
593 606
594#ifdef PERF_VERBOSE 607#ifdef PERF_VERBOSE
diff --git a/tidl_api/make.buildid b/tidl_api/make.buildid
index d8677de..c597695 100644
--- a/tidl_api/make.buildid
+++ b/tidl_api/make.buildid
@@ -27,7 +27,7 @@
27MAJOR_VER=1 27MAJOR_VER=1
28MINOR_VER=1 28MINOR_VER=1
29PATCH_VER=0 29PATCH_VER=0
30BUILD_VER=0 30BUILD_VER=1
31 31
32ifeq ($(shell git rev-parse --short HEAD 2>&1 1>/dev/null; echo $$?),0) 32ifeq ($(shell git rev-parse --short HEAD 2>&1 1>/dev/null; echo $$?),0)
33BUILD_SHA?=$(shell git rev-parse --short HEAD) 33BUILD_SHA?=$(shell git rev-parse --short HEAD)