summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 9463c75)
raw | patch | inline | side by side (parent: 9463c75)
author | Ajay Jayaraj <ajayj@ti.com> | |
Thu, 30 Aug 2018 19:25:11 +0000 (14:25 -0500) | ||
committer | Ajay Jayaraj <ajayj@ti.com> | |
Thu, 30 Aug 2018 21:28:59 +0000 (16:28 -0500) |
two_eo_per_frame is a simple example to illustrate using
ExecutionObjectPipeline to split processing a single frame across EVE
and DSP.
(MCT-1048)
ExecutionObjectPipeline to split processing a single frame across EVE
and DSP.
(MCT-1048)
docs/source/example.rst | patch | blob | history | |
examples/common/utils.cpp | patch | blob | history | |
examples/common/utils.h | patch | blob | history | |
examples/layer_output/Makefile | patch | blob | history | |
examples/layer_output/main.cpp | patch | blob | history | |
examples/one_eo_per_frame/main.cpp | patch | blob | history | |
examples/two_eo_per_frame/Makefile | [new file with mode: 0644] | patch | blob |
examples/two_eo_per_frame/main.cpp | [new file with mode: 0644] | patch | blob |
index 1b16d014643fc45fa24070070598c97ccfee0bd1..b72d6a0eec5b05a3097d496b9f5bef6eb493312e 100644 (file)
--- a/docs/source/example.rst
+++ b/docs/source/example.rst
Examples
********
-+---------------------+-----------------------------------------------------+
-| Example | Description |
-+---------------------+-----------------------------------------------------+
-| one_eo_per_frame | Simple example to illustrate processing a single |
-| | frame with one :term:`EO` using the j11_v2 network. |
-| | The per-frame processing time for this network is |
-| | fairly similar across EVE and C66x DSP. The example |
-| | parallelizes frame processing across all available |
-| | EVE and C66x cores. |
-+---------------------+-----------------------------------------------------+
-| imagenet | Classification |
-+---------------------+-----------------------------------------------------+
-| segmentation | Pixel level segmentation |
-+---------------------+-----------------------------------------------------+
-| ssd_multibox | Object detection |
-+---------------------+-----------------------------------------------------+
-| tidl_classification | Classification |
-+---------------------+-----------------------------------------------------+
-| test | Unit test. Tests supported networks on C66x and EVE |
-+---------------------+-----------------------------------------------------+
++---------------------+----------------------------------------------------------------+
+| Example | Description |
++---------------------+----------------------------------------------------------------+
+| one_eo_per_frame | Simple example to illustrate processing a single |
+| | frame with one :term:`EO` using the j11_v2 network. |
+| | Per-frame processing time for this network is farily similar |
+| | across EVE and C66x DSP. The enables frame processing to be |
+| | parallelized by distributing frames across all available EVE |
+| | and C66x cores. |
++---------------------+----------------------------------------------------------------+
+| two_eo_per_frame | Simple example to illustrate processing a single |
+| | frame with two :term:`EOs<EO>` using the j11_v2 network. |
++---------------------+----------------------------------------------------------------+
+| imagenet | Classification |
++---------------------+----------------------------------------------------------------+
+| segmentation | Pixel level segmentation |
++---------------------+----------------------------------------------------------------+
+| ssd_multibox | Object detection |
++---------------------+----------------------------------------------------------------+
+| tidl_classification | Classification |
++---------------------+----------------------------------------------------------------+
+| layer_output | Illustrates using TIDL APIs to access output buffers |
+| | of intermediate :term:`Layer`s in the network. |
++---------------------+----------------------------------------------------------------+
+| test | Unit test. Tests supported networks on C66x and EVE |
++---------------------+----------------------------------------------------------------+
The examples included in the tidl-api package demonstrate three categories of
-deep learning networks: classification, segmentation and object detection. ``imagenet`` and ``segmentation`` can run on AM57x processors with either EVE or C66x cores. ``ssd_multibox`` requires AM57x processors with both EVE and C66x. The performance
+deep learning networks: classification, segmentation and object detection.
+``imagenet`` and ``segmentation`` can run on AM57x processors with either EVE or C66x cores.
+``ssd_multibox`` requires AM57x processors with both EVE and C66x. The performance
numbers that we present here were obtained on an AM5729 EVM, which
includes 2 Arm Cortex-A15 cores running at 1.5GHz, 4 EVE cores at 535MHz, and
2 DSP cores at 750MHz.
index a4c978b94f2258e2b6ca759772ea94a39ee6b05a..c45acc6b04f3701b0df0ddc92e55b3d86679f800 100644 (file)
using boost::format;
using std::string;
+using std::istream;
+using std::ostream;
-bool ReadFrame(ExecutionObject* eo, int frame_idx,
+static bool read_frame_helper(char* ptr, size_t size, istream& input_file);
+
+bool ReadFrame(ExecutionObject* eo,
+ int frame_idx,
const Configuration& configuration,
- std::istream& input_file)
+ std::istream& input_file)
{
if (frame_idx >= configuration.numFrames)
return false;
- assert (eo->GetInputBufferPtr() != nullptr);
+ // Note: Frame index is used by the EO for debug messages only
+ eo->SetFrameIndex(frame_idx);
+
+ return read_frame_helper(eo->GetInputBufferPtr(),
+ eo->GetInputBufferSizeInBytes(),
+ input_file);
+}
+
+bool ReadFrame(ExecutionObjectPipeline* eop,
+ int frame_idx,
+ const Configuration& configuration,
+ std::istream& input_file)
+{
+ if (frame_idx >= configuration.numFrames)
+ return false;
+
+ // Note: Frame index is used by the EOP for debug messages only
+ eop->SetFrameIndex(frame_idx);
+
+ return read_frame_helper(eop->GetInputBufferPtr(),
+ eop->GetInputBufferSizeInBytes(),
+ input_file);
+}
+
+bool read_frame_helper(char* ptr, size_t size, istream& input_file)
+{
+ assert (ptr != nullptr);
assert (input_file.good());
- input_file.read(eo->GetInputBufferPtr(),
- eo->GetInputBufferSizeInBytes());
+ input_file.read(ptr, size);
assert (input_file.good());
if (input_file.eof())
return false;
- // Note: Frame index is used by the EO for debug messages only
- eo->SetFrameIndex(frame_idx);
-
// Wrap-around : if EOF is reached, start reading from the beginning.
if (input_file.peek() == EOF)
input_file.seekg(0, input_file.beg);
return false;
}
-bool WriteFrame(const ExecutionObject* eo, std::ostream& output_file)
+
+bool WriteFrame(const ExecutionObject* eo, ostream& output_file)
{
output_file.write(eo->GetOutputBufferPtr(),
eo->GetOutputBufferSizeInBytes());
% elapsed_device % elapsed_host % overhead;
}
+void ReportTime(const ExecutionObjectPipeline* eop)
+{
+ double elapsed_host = eop->GetHostProcessTimeInMilliSeconds();
+ double elapsed_device = eop->GetProcessTimeInMilliSeconds();
+ double overhead = 100 - (elapsed_device/elapsed_host*100);
+
+ std::cout << format("frame[%3d]: Time on %s: %4.2f ms, host: %4.2f ms"
+ " API overhead: %2.2f %%\n")
+ % eop->GetFrameIndex() % eop->GetDeviceName()
+ % elapsed_device % elapsed_host % overhead;
+}
+
// Compare output against reference output
bool CheckFrame(const ExecutionObject *eo, const char *ref_output)
{
return false;
}
+bool CheckFrame(const ExecutionObjectPipeline *eop, const char *ref_output)
+{
+ if (std::memcmp(static_cast<const void*>(ref_output),
+ static_cast<const void*>(eop->GetOutputBufferPtr()),
+ eop->GetOutputBufferSizeInBytes()) == 0)
+ return true;
+
+ return false;
+}
+
namespace tidl {
std::size_t GetBinaryFileSize (const std::string &F);
index 9da19164eaf457d194db6a1a3e5a4cffcaa1fc64..732b8af63d243d7bc32f4b10ca90e83afeebe1f0 100644 (file)
--- a/examples/common/utils.h
+++ b/examples/common/utils.h
#include <fstream>
#include "executor.h"
#include "execution_object.h"
+#include "execution_object_pipeline.h"
#include "configuration.h"
-bool ReadFrame(tidl::ExecutionObject* eo,
- int frame_idx,
- const tidl::Configuration& configuration,
- std::istream& input_file);
+using tidl::Executor;
+using tidl::ExecutionObject;
+using tidl::ExecutionObjectPipeline;
+using tidl::Configuration;
-bool WriteFrame(const tidl::ExecutionObject* eo, std::ostream& output_file);
+bool ReadFrame(ExecutionObject* eo,
+ int frame_idx,
+ const Configuration& configuration,
+ std::istream& input_file);
-void ReportTime(const tidl::ExecutionObject* eo);
+bool ReadFrame(ExecutionObjectPipeline* eop,
+ int frame_idx,
+ const Configuration& configuration,
+ std::istream& input_file);
-bool CheckFrame(const tidl::ExecutionObject* eo, const char *ref_output);
+bool WriteFrame(const ExecutionObject* eo, std::ostream& output_file);
+
+void ReportTime(const ExecutionObject* eo);
+void ReportTime(const ExecutionObjectPipeline* eop);
+
+bool CheckFrame(const ExecutionObject* eo, const char *ref_output);
+bool CheckFrame(const ExecutionObjectPipeline *eop, const char *ref_output);
const char* ReadReferenceOutput(const std::string& name);
index 9d5cabb8f7c66be14edce7ba43a45c0d8a4a07fb..52b3eaec9e4eef3ec7342314188e5889ee4882f2 100644 (file)
include ../make.common
-SOURCES = main.cpp
+CXXFLAGS += -I../common
-$(EXE): $(TIDL_API_LIB) $(TIDL_API_LIB_IMGUTIL) $(HEADERS) $(SOURCES)
+SOURCES = main.cpp ../common/utils.cpp
+
+$(EXE): $(TIDL_API_LIB) $(HEADERS) $(SOURCES)
$(CXX) $(CXXFLAGS) $(SOURCES) $(TIDL_API_LIB) $(LDFLAGS) $(LIBS) -o $@
index 4758d237222808da1dd8399dd3cf019f5d31a04f..9b33f2336b4495e0eff6c154e8d7101d3033a0a3 100644 (file)
#include <signal.h>
#include <getopt.h>
#include <iostream>
-#include <iomanip>
#include <fstream>
#include <cassert>
#include <string>
-#include <functional>
-#include <algorithm>
-#include <time.h>
#include "executor.h"
#include "execution_object.h"
#include "configuration.h"
-
-bool __TI_show_debug_ = false;
+#include "utils.h"
using namespace tidl;
bool RunConfiguration(const std::string& config_file, int num_devices,
DeviceType device_type);
-bool ReadFrame(ExecutionObject& eo,
- int frame_idx,
- const Configuration& configuration,
- std::istream& input_file);
-
-bool WriteFrame(const ExecutionObject &eo,
- std::ostream& output_file);
-
static void ProcessTrace(const ExecutionObject* eo, const Configuration& c);
int main(int argc, char *argv[])
ids.insert(static_cast<DeviceId>(i));
// Read the TI DL configuration file
- Configuration configuration;
- bool status = configuration.ReadFromFile(config_file);
- if (!status)
- {
- std::cerr << "Error in configuration file: " << config_file
- << std::endl;
+ Configuration c;
+ if (!c.ReadFromFile(config_file))
return false;
- }
// Open input files
- std::ifstream input_data_file(configuration.inData, std::ios::binary);
- assert (input_data_file.good());
+ std::ifstream input(c.inData, std::ios::binary);
+ assert (input.good());
+
+ bool status = true;
try
{
- // Create a executor with the approriate core type, number of cores
- // and configuration specified
- Executor executor(device_type, ids, configuration);
+ // Create a executor with the specified core type, number of cores
+ // and configuration
+ Executor E(device_type, ids, c);
- // Query Executor for set of ExecutionObjects created
- const ExecutionObjects& execution_objects =
- executor.GetExecutionObjects();
- int num_eos = execution_objects.size();
+ std::vector<ExecutionObject *> EOs;
+ for (unsigned int i = 0; i < E.GetNumExecutionObjects(); i++)
+ EOs.push_back(E[i]);
+
+ int num_eos = EOs.size();
// Allocate input and output buffers for each execution object
- std::vector<void *> buffers;
- for (auto &eo : execution_objects)
+ for (auto eo : EOs)
{
size_t in_size = eo->GetInputBufferSizeInBytes();
size_t out_size = eo->GetOutputBufferSizeInBytes();
ArgInfo in = { ArgInfo(malloc(in_size), in_size)};
ArgInfo out = { ArgInfo(malloc(out_size), out_size)};
eo->SetInputOutputBuffer(in, out);
-
- buffers.push_back(in.ptr());
- buffers.push_back(out.ptr());
}
- // Process frames across execution objects in a pipelined manner
+ // Process frames with available EOs in a pipelined manner
// additional num_eos iterations to flush the pipeline (epilogue)
- for (int frame_idx = 0;
- frame_idx < configuration.numFrames + num_eos; frame_idx++)
+ for (int frame_idx = 0; frame_idx < c.numFrames + num_eos; frame_idx++)
{
- ExecutionObject* eo = execution_objects[frame_idx % num_eos].get();
+ ExecutionObject* eo = EOs[frame_idx % num_eos];
// Wait for previous frame on the same eo to finish processing
if (eo->ProcessFrameWait())
- ProcessTrace(eo, configuration);
+ ProcessTrace(eo, c);
// Read a frame and start processing it with current eo
- if (ReadFrame(*eo, frame_idx, configuration, input_data_file))
+ if (ReadFrame(eo, frame_idx, c, input))
eo->ProcessFrameStartAsync();
}
- for (auto b : buffers)
- free(b);
-
+ for (auto eo : EOs)
+ {
+ free(eo->GetInputBufferPtr());
+ free(eo->GetOutputBufferPtr());
+ }
}
catch (tidl::Exception &e)
{
status = false;
}
- input_data_file.close();
+ input.close();
return status;
}
void ProcessTrace(const ExecutionObject* eo, const Configuration& c)
{
if (!c.enableOutputTrace)
+ {
+ std::cout << "Trace is not enabled. Set"
+ " Configuration::enableOutputTrace to true"
+ << std::endl;
return;
+ }
// 1. Write the outputs from each layer to files
// filename: trace_data_<layer_index>_<channels>_<width>_<height>.bin
// 2. Get all outputs from all layers and iterate through them
const LayerOutputs* los = eo->GetOutputsFromAllLayers();
+ if (!los) return;
for (const std::unique_ptr<const LayerOutput> &lo : *los)
{
// 3. Get the output from a single layer
const LayerOutput* lo = eo->GetOutputFromLayer(1);
+ if (!lo) return;
- if (lo)
- {
- std::cout << "Layer index: " << lo->LayerIndex()
- << " Shape: " << lo->NumberOfChannels() << " x "
- << lo->Width() << " x " << lo->Height()
- << " Data ptr: " << static_cast<const void*>(lo->Data())
- << " Size in bytes: " << lo->Size()
- << std::endl;
-
- delete lo;
- }
-}
-
-
-bool ReadFrame(ExecutionObject &eo, int frame_idx,
- const Configuration& configuration,
- std::istream& input_file)
-{
- if (frame_idx >= configuration.numFrames)
- return false;
-
- char* frame_buffer = eo.GetInputBufferPtr();
- assert (frame_buffer != nullptr);
-
- input_file.read(eo.GetInputBufferPtr(),
- eo.GetInputBufferSizeInBytes());
-
- if (input_file.eof())
- return false;
-
- assert (input_file.good());
-
- eo.SetFrameIndex(frame_idx);
-
- if (input_file.good())
- return true;
+ std::cout << "Layer index: " << lo->LayerIndex()
+ << " Shape: " << lo->NumberOfChannels() << " x "
+ << lo->Width() << " x " << lo->Height()
+ << " Data ptr: " << static_cast<const void*>(lo->Data())
+ << " Size in bytes: " << lo->Size()
+ << std::endl;
- return false;
+ delete lo;
}
index a7f10a8b106bddc79b58ed8efc199d4c78bad980..009efdd8f0e4231d37780a849fb21030603efbf4 100644 (file)
if (!c.ReadFromFile(config_file))
return false;
- // heap sizes determined using Configuration::showHeapStats
+ // Heap sizes for this network determined using Configuration::showHeapStats
c.PARAM_HEAP_SIZE = (3 << 20); // 3MB
c.NETWORK_HEAP_SIZE = (20 << 20); // 20MB
diff --git a/examples/two_eo_per_frame/Makefile b/examples/two_eo_per_frame/Makefile
--- /dev/null
@@ -0,0 +1,37 @@
+# Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Texas Instruments Incorporated nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+
+EXE = two_eo_per_frame
+
+include ../make.common
+
+CXXFLAGS += -I../common
+
+SOURCES = main.cpp ../common/utils.cpp
+
+$(EXE): $(TIDL_API_LIB) $(HEADERS) $(SOURCES)
+ $(CXX) $(CXXFLAGS) $(SOURCES) $(TIDL_API_LIB) $(LDFLAGS) $(LIBS) -o $@
+
diff --git a/examples/two_eo_per_frame/main.cpp b/examples/two_eo_per_frame/main.cpp
--- /dev/null
@@ -0,0 +1,205 @@
+/******************************************************************************
+ * Copyright (c) 2017-2018 Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+//
+// This example illustrates using multiple EOs to process a single frame
+// For details, refer http://downloads.ti.com/mctools/esd/docs/tidl-api/
+//
+#include <signal.h>
+#include <iostream>
+#include <fstream>
+#include <cassert>
+#include <string>
+
+#include "executor.h"
+#include "execution_object.h"
+#include "execution_object_pipeline.h"
+#include "configuration.h"
+#include "utils.h"
+
+using namespace tidl;
+using std::string;
+using std::unique_ptr;
+using std::vector;
+
+using EOP = tidl::ExecutionObjectPipeline;
+
+bool Run(int num_eve,int num_dsp, const char* ref_output);
+
+Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c,
+ int layer_group_id);
+
+void AllocateMemory(const vector<EOP *>& EOPs);
+void FreeMemory (const vector<EOP *>& EOPs);
+
+
+int main(int argc, char *argv[])
+{
+ // Catch ctrl-c to ensure a clean exit
+ signal(SIGABRT, exit);
+ signal(SIGTERM, exit);
+
+ // This example requires both EVE and C66x
+ uint32_t num_eve = Executor::GetNumDevices(DeviceType::EVE);
+ uint32_t num_dsp = Executor::GetNumDevices(DeviceType::DSP);
+ if (num_eve == 0 || num_dsp == 0)
+ {
+ std::cout << "TI DL not supported on this SoC." << std::endl;
+ return EXIT_SUCCESS;
+ }
+
+ string ref_file ="../test/testvecs/reference/j11_v2_ref.bin";
+ unique_ptr<const char> reference_output(ReadReferenceOutput(ref_file));
+
+ bool status = Run(num_eve, num_dsp, reference_output.get());
+
+ if (!status)
+ {
+ std::cout << "FAILED" << std::endl;
+ return EXIT_FAILURE;
+ }
+
+ std::cout << "PASSED" << std::endl;
+ return EXIT_SUCCESS;
+}
+
+bool Run(int num_eve, int num_dsp, const char* ref_output)
+{
+ string config_file ="../test/testvecs/config/infer/tidl_config_j11_v2.txt";
+
+ Configuration c;
+ if (!c.ReadFromFile(config_file))
+ return false;
+
+ // Heap sizes for this network determined using Configuration::showHeapStats
+ c.PARAM_HEAP_SIZE = (3 << 20); // 3MB
+ c.NETWORK_HEAP_SIZE = (20 << 20); // 20MB
+
+ c.numFrames = 16;
+
+ // Assign layers 12, 13 and 14 to layer group 2
+ c.layerIndex2LayerGroupId = { {12, 2}, {13, 2}, {14, 2} };
+
+ // Open input file for reading
+ std::ifstream input(c.inData, std::ios::binary);
+
+ bool status = true;
+ try
+ {
+ // Create Executors - use all the DSP and EVE cores available
+ // Layer group 1 will be executed on EVE, 2 on DSP
+ unique_ptr<Executor> eve(CreateExecutor(DeviceType::EVE,num_eve,c,1));
+ unique_ptr<Executor> dsp(CreateExecutor(DeviceType::DSP,num_dsp,c,2));
+
+ // Create pipelines. Each pipeline has 1 EVE and 1 DSP. If there are
+ // more EVEs than DSPs, the DSPs are shared across multiple
+ // pipelines. E.g.
+ // 2 EVE, 2 DSP: EVE1 -> DSP1, EVE2 -> DSP2
+ // 4 EVE, 2 DSP: EVE1 -> DSP1, EVE2 -> DSP2, EVE3 -> DSP1, EVE4 ->DSP2
+ std::vector<EOP *> EOPs;
+ uint32_t num_pipe = std::max(num_eve, num_dsp);
+ for (uint32_t i = 0; i < num_pipe; i++)
+ EOPs.push_back(new EOP( { (*eve)[i % num_eve],
+ (*dsp)[i % num_dsp] } ));
+
+ AllocateMemory(EOPs);
+
+ // Process frames with EOs in a pipelined manner
+ // additional num_eos iterations to flush the pipeline (epilogue)
+ int num_eops = EOPs.size();
+ for (int frame_idx = 0; frame_idx < c.numFrames + num_eops; frame_idx++)
+ {
+ EOP* eop = EOPs[frame_idx % num_eops];
+
+ // Wait for previous frame on the same eo to finish processing
+ if (eop->ProcessFrameWait())
+ {
+ ReportTime(eop);
+
+ // The reference output is valid only for the first frame
+ // processed on each EOP
+ if (frame_idx < num_eops && !CheckFrame(eop, ref_output))
+ status = false;
+ }
+
+ // Read a frame and start processing it with current eo
+ if (ReadFrame(eop, frame_idx, c, input))
+ eop->ProcessFrameStartAsync();
+ }
+
+ FreeMemory(EOPs);
+
+ }
+ catch (tidl::Exception &e)
+ {
+ std::cerr << e.what() << std::endl;
+ status = false;
+ }
+
+ input.close();
+
+ return status;
+}
+
+// Create an Executor with the specified type and number of EOs
+Executor* CreateExecutor(DeviceType dt, int num, const Configuration& c,
+ int layer_group_id)
+{
+ if (num == 0) return nullptr;
+
+ DeviceIds ids;
+ for (int i = 0; i < num; i++)
+ ids.insert(static_cast<DeviceId>(i));
+
+ return new Executor(dt, ids, c, layer_group_id);
+}
+
+// Allocate input and output memory for each EO
+void AllocateMemory(const vector<EOP *>& EOPs)
+{
+ // Allocate input and output buffers for each execution object
+ for (auto eop : EOPs)
+ {
+ size_t in_size = eop->GetInputBufferSizeInBytes();
+ size_t out_size = eop->GetOutputBufferSizeInBytes();
+ ArgInfo in = { ArgInfo(malloc(in_size), in_size)};
+ ArgInfo out = { ArgInfo(malloc(out_size), out_size)};
+ eop->SetInputOutputBuffer(in, out);
+ }
+}
+
+// Free the input and output memory associated with each EO
+void FreeMemory(const vector<EOP *>& EOPs)
+{
+ for (auto eop : EOPs)
+ {
+ free(eop->GetInputBufferPtr());
+ free(eop->GetOutputBufferPtr());
+ }
+
+}