summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (from parent 1: e49cc19)
raw | patch | inline | side by side (from parent 1: e49cc19)
author | Djordje Senicic <x0157990@ti.com> | |
Wed, 31 Oct 2018 17:18:00 +0000 (13:18 -0400) | ||
committer | Ajay Jayaraj <ajayj@ti.com> | |
Thu, 1 Nov 2018 16:18:32 +0000 (11:18 -0500) |
- Add image preprocessing for types 1 and 2.
- if layer groups is 1, force all layers to be in the same group
(MCT-1075)
- if layer groups is 1, force all layers to be in the same group
(MCT-1075)
examples/mcbench/main.cpp | patch | blob | history | |
examples/mcbench/scripts/all_5749.sh | patch | blob | history |
index 3ba7d9f809ca102c1e4419c1df8ece8afe61413c..128bc57695504963548a4451992258ab63080bd0 100644 (file)
#define NUM_VIDEO_FRAMES 100
-#define DEFAULT_CONFIG "../test/testvecs/config/mcbench/tidl_config_j11_v2.txt"
+#define DEFAULT_CONFIG "../test/testvecs/config/infer/tidl_config_j11_v2.txt"
bool RunConfiguration(const cmdline_opts_t& opts);
Executor* CreateExecutor(DeviceType dt, uint32_t num, const Configuration& c,
int layers_group_id);
bool CreateExecutionObjectPipelines(uint32_t num_eves, uint32_t num_dsps,
- Configuration& configuration,
+ Configuration& configuration,
uint32_t num_layers_groups,
Executor*& e_eve, Executor*& e_dsp,
std::vector<ExecutionObjectPipeline*>& eops);
{
// Read the TI DL configuration file
Configuration c;
- std::string config_file = opts.config;
- std::string inputFile;
-
- bool status = c.ReadFromFile(config_file);
- if (!status)
- {
- cerr << "Error in configuration file: " << config_file << endl;
+ if (!c.ReadFromFile(opts.config))
return false;
- }
+
c.enableApiTrace = opts.verbose;
+ if(opts.num_layers_groups == 1)
+ c.runFullNet = true; //Force all layers to be in the same group
+ std::string inputFile;
if (opts.input_file.empty())
inputFile = c.inData;
else
inputFile = opts.input_file;
- int channel_size = c.inWidth * c.inHeight;
- int frame_size = c.inNumChannels * channel_size;
+ int frame_size = c.inNumChannels * c.inWidth * c.inHeight;
c.numFrames = GetBinaryFileSize (inputFile) / frame_size;
// Read input file into memory buffer
char *input_frame_buffer = new char[c.numFrames * frame_size]();
ifstream ifs(inputFile, ios::binary);
- ifs.read(input_frame_buffer, channel_size * 3);
+ ifs.read(input_frame_buffer, c.numFrames * frame_size);
if(!ifs.good()) {
std::cout << "Invalid File input:" << inputFile << std::endl;
- return false;
+ return false;
}
+ bool status = true;
try
{
- // Construct ExecutionObjectPipeline that utilizes multiple
- // ExecutionObjects to process a single frame, each ExecutionObject
- // processes one layerGroup of the network
- //
- // Pipeline depth can enable more optimized pipeline execution:
- // Given one EVE and one DSP as an example, with different
- // pipeline_depth, we have different execution behavior:
- // If pipeline_depth is set to 1,
- // we create one EOP: eop0 (eve0, dsp0)
- // pipeline execution of multiple frames over time is as follows:
- // --------------------- time ------------------->
- // eop0: [eve0...][dsp0]
- // eop0: [eve0...][dsp0]
- // eop0: [eve0...][dsp0]
- // eop0: [eve0...][dsp0]
- // If pipeline_depth is set to 2,
- // we create two EOPs: eop0 (eve0, dsp0), eop1(eve0, dsp0)
- // pipeline execution of multiple frames over time is as follows:
- // --------------------- time ------------------->
- // eop0: [eve0...][dsp0]
- // eop1: [eve0...][dsp0]
- // eop0: [eve0...][dsp0]
- // eop1: [eve0...][dsp0]
- // Additional benefit of setting pipeline_depth to 2 is that
- // it can also overlap host ReadFrame() with device processing:
- // --------------------- time ------------------->
- // eop0: [RF][eve0...][dsp0]
- // eop1: [RF] [eve0...][dsp0]
- // eop0: [RF][eve0...][dsp0]
- // eop1: [RF][eve0...][dsp0]
Executor *e_eve = NULL;
Executor *e_dsp = NULL;
std::vector<ExecutionObjectPipeline *> eops;
if (! CreateExecutionObjectPipelines(opts.num_eves, opts.num_dsps, c,
- opts.num_layers_groups, e_eve, e_dsp, eops))
+ opts.num_layers_groups,
+ e_eve, e_dsp, eops))
return false;
- uint32_t num_eops = eops.size();
+
// Allocate input/output memory for each EOP
AllocateMemory(eops);
// Process frames with available eops in a pipelined manner
// additional num_eops iterations to flush pipeline (epilogue)
+ uint32_t num_eops = eops.size();
for (uint32_t frame_idx = 0;
frame_idx < opts.num_frames + num_eops; frame_idx++)
{
// Wait for previous frame on the same eop to finish processing
if (eop->ProcessFrameWait())
- {
- if(opts.verbose)
- ReportTime(eop);
- }
+ ;
// Read a frame and start processing it with current eo
if (ReadFrame(*eop, frame_idx, c, opts, input_frame_buffer))
cout << "FPS:" << opts.num_frames / elapsed.count() << endl;
FreeMemory(eops);
- for (auto eop : eops) delete eop;
+
+ for (auto eop : eops)
+ delete eop;
+
delete e_eve;
delete e_dsp;
}
cerr << e.what() << endl;
status = false;
}
+
delete [] input_frame_buffer;
return status;
}
}
bool CreateExecutionObjectPipelines(uint32_t num_eves, uint32_t num_dsps,
- Configuration& configuration,
+ Configuration& configuration,
uint32_t num_layers_groups,
Executor*& e_eve, Executor*& e_dsp,
std::vector<ExecutionObjectPipeline*>& eops)
ids_eve.insert(static_cast<DeviceId>(i));
for (uint32_t i = 0; i < num_dsps; i++)
ids_dsp.insert(static_cast<DeviceId>(i));
+
+ // Construct ExecutionObjectPipeline that utilizes multiple
+ // ExecutionObjects to process a single frame, each ExecutionObject
+ // processes one layerGroup of the network
+ //
+ // Pipeline depth can enable more optimized pipeline execution:
+ // Given one EVE and one DSP as an example, with different
+ // buffer_factor, we have different execution behavior:
+ // If buffer_factor is set to 1,
+ // we create one EOP: eop0 (eve0, dsp0)
+ // pipeline execution of multiple frames over time is as follows:
+ // --------------------- time ------------------->
+ // eop0: [eve0...][dsp0]
+ // eop0: [eve0...][dsp0]
+ // eop0: [eve0...][dsp0]
+ // eop0: [eve0...][dsp0]
+ // If buffer_factor is set to 2,
+ // we create two EOPs: eop0 (eve0, dsp0), eop1(eve0, dsp0)
+ // pipeline execution of multiple frames over time is as follows:
+ // --------------------- time ------------------->
+ // eop0: [eve0...][dsp0]
+ // eop1: [eve0...][dsp0]
+ // eop0: [eve0...][dsp0]
+ // eop1: [eve0...][dsp0]
+ // Additional benefit of setting buffer_factor to 2 is that
+ // it can also overlap host ReadFrame() with device processing:
+ // --------------------- time ------------------->
+ // eop0: [RF][eve0...][dsp0]
+ // eop1: [RF] [eve0...][dsp0]
+ // eop0: [RF][eve0...][dsp0]
+ // eop1: [RF][eve0...][dsp0]
const uint32_t buffer_factor = 2;
switch(num_layers_groups)
// EO level rather than at EOP level, in addition to double buffering
// and overlapping host pre/post-processing with device processing
for (uint32_t j = 0; j < buffer_factor; j++)
- {
for (uint32_t i = 0; i < std::max(num_eves, num_dsps); i++)
eops.push_back(new ExecutionObjectPipeline(
{(*e_eve)[i%num_eves], (*e_dsp)[i%num_dsps]}));
- }
break;
default:
- std::cout << "Layers groups can be either 1 or 2!" << std::endl;
+ std::cout << "Layers groups must be either 1 or 2!" << std::endl;
return false;
break;
}
return true;
}
+static void subtractMeanValue(unsigned char *frame_buffer, int channel_size,
+ int32_t mean_value)
+{
+ int32_t one_pixel;
+
+ for (int i = 0; i < channel_size; i ++)
+ {
+ one_pixel = (int32_t)frame_buffer[i];
+ one_pixel -= mean_value;
+ if(one_pixel > 127) one_pixel = 127;
+ if(one_pixel < -128) one_pixel = -128;
+ frame_buffer[i] = (unsigned char)one_pixel;
+ }
+}
+
bool ReadFrame(ExecutionObjectPipeline& eop, uint32_t frame_idx,
- const Configuration& c, const cmdline_opts_t& opts, char *input_frames_buffer)
+ const Configuration& c, const cmdline_opts_t& opts,
+ char *input_frames_buffer)
{
- if ((uint32_t)frame_idx >= opts.num_frames)
+ if (frame_idx >= opts.num_frames)
return false;
eop.SetFrameIndex(frame_idx);
- char* frame_buffer = eop.GetInputBufferPtr();
+ unsigned char* frame_buffer = (unsigned char *)eop.GetInputBufferPtr();
assert (frame_buffer != nullptr);
+ //Current implementation of this function assumes 3 channels on input
+ assert (c.inNumChannels == 3);
+
int channel_size = c.inWidth * c.inHeight;
- char *bgr_frames_input = input_frames_buffer + (frame_idx % c.numFrames) * channel_size * 3;
+ char *bgr_frames_input = input_frames_buffer + (frame_idx % c.numFrames) *
+ channel_size * c.inNumChannels;
+
memcpy(frame_buffer, bgr_frames_input + 0, channel_size);
- memcpy(frame_buffer + 1*channel_size, bgr_frames_input + 1 * channel_size, channel_size);
- memcpy(frame_buffer + 2*channel_size, bgr_frames_input + 2 * channel_size, channel_size);
+ if(c.preProcType == 1)
+ subtractMeanValue(frame_buffer, channel_size, 104);
+ else if(c.preProcType == 2)
+ subtractMeanValue(frame_buffer, channel_size, 128);
+ frame_buffer += channel_size;
+
+ memcpy(frame_buffer, bgr_frames_input + 1 * channel_size, channel_size);
+ if(c.preProcType == 1)
+ subtractMeanValue(frame_buffer, channel_size, 117);
+ else if(c.preProcType == 2)
+ subtractMeanValue(frame_buffer, channel_size, 128);
+ frame_buffer += channel_size;
+
+ memcpy(frame_buffer, bgr_frames_input + 2 * channel_size, channel_size);
+ if(c.preProcType == 1)
+ subtractMeanValue(frame_buffer, channel_size, 123);
+ else if(c.preProcType == 2)
+ subtractMeanValue(frame_buffer, channel_size, 128);
+
return true;
}
{
std::cout <<
"Usage: mcbench\n"
- " Will run partitioned mcbench network to perform "
- "multi-objects detection\n"
- " and classification. First part of network "
- "(layersGroupId 1) runs on EVE,\n"
- " second part (layersGroupId 2) runs on DSP.\n"
+ " Runs partitioned network to perform multi-object detection\n"
+ " and classification. First part of network (layersGroupId 1) runs on\n"
+ " EVE, second part (layersGroupId 2) runs on DSP.\n"
" Use -c to run a different segmentation network. Default is jdetnet.\n"
"Optional arguments:\n"
- " -c <config> Valid configs: ../test/testvecs/config/infer/... files \n"
- " -d <number> Number of dsp cores to use\n"
- " -e <number> Number of eve cores to use\n"
- " -g <1|2> Number of layer groups (layer group <=> consecutive group of layers)\n"
+ " -c <config> Valid configs: ../test/testvecs/config/infer/... \n"
+ " -d <number> Number of DSP cores to use\n"
+ " -e <number> Number of EVE cores to use\n"
+ " -g <1|2> Number of layer groups\n"
" -f <number> Number of frames to process\n"
- " -r <number> Keep repeating specified number of frames from input file\n"
- " -i <image> Path to the image file as input\n"
- " Default are 9 frames in testvecs\n"
- " -i camera<number> Use camera as input\n"
- " video input port: /dev/video<number>\n"
- " -i <name>.{mp4,mov,avi} Use video file as input\n"
- " -w <number> Output image/video width\n"
+ " -i <image> Path to the input image file\n"
" -v Verbose output during execution\n"
" -h Help\n";
}
index 9c9f039dad4dbd8cd6786bcc038ef77412ca4147..98f1f247216fc35a58a325fe702a1e36e8100d52 100755 (executable)
./mcbench -g 1 -d 0 -e 2 -c ../test/testvecs/config/infer/tidl_config_mobileNet1.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_mobileNet1.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
+./mcbench -g 2 -d 1 -e 2 -c ../test/testvecs/config/infer/tidl_config_mobileNet1_lg2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
./mcbench -g 1 -d 0 -e 2 -c ../test/testvecs/config/infer/tidl_config_squeeze1_1.txt -f 50 -i ../test/testvecs/input/preproc_1_227x227_multi.y
./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_squeeze1_1.txt -f 50 -i ../test/testvecs/input/preproc_1_227x227_multi.y
./mcbench -g 1 -d 0 -e 2 -c ../test/testvecs/config/infer/tidl_config_inceptionNetv1.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
./mcbench -g 1 -d 2 -e 0 -c ../test/testvecs/config/infer/tidl_config_inceptionNetv1.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
+./mcbench -g 2 -d 1 -e 2 -c ../test/testvecs/config/infer/tidl_config_inceptionNetv1_lg2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
./mcbench -g 1 -d 0 -e 2 -c ../test/testvecs/config/infer/tidl_config_j11_v2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
./mcbench -g 1 -d 2 -e 2 -c ../test/testvecs/config/infer/tidl_config_j11_v2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y
./mcbench -g 2 -d 1 -e 2 -c ../test/testvecs/config/infer/tidl_config_j11_v2_lg2.txt -f 50 -i ../test/testvecs/input/preproc_0_224x224_multi.y