Add support for accessing layer output data
authorAjay Jayaraj <ajayj@ti.com>
Fri, 3 Aug 2018 19:26:14 +0000 (14:26 -0500)
committerAjay Jayaraj <ajayj@ti.com>
Wed, 8 Aug 2018 18:11:13 +0000 (13:11 -0500)
* Added API methods to enable tracing and dump layer output buffers to
  file
  1. ExecutionObject::WriteLayerOutputsToFile
  2. ExecutionObject::GetOutputsFromAllLayers
  3. ExecutionObject::GetOutputFromLayer
  See examples/layer_output/main.cpp, ProcessTrace() for examples of
  using these tracing APIs.
* Added a 'enableTrace' field to the configuration file. Set to true
  to enable tracing.
* Update configuration file parser to allow comments, blank lines

Note:
Tracing requires ti-opencl, SHA-ID 188cc38627315d3041b80ee86e6ca33087348062
or newer.

(MCT-1023)

12 files changed:
examples/layer_output/Makefile [new file with mode: 0644]
examples/layer_output/j11_v2_trace.txt [new file with mode: 0644]
examples/layer_output/main.cpp [new file with mode: 0644]
examples/test/main.cpp
tidl_api/dsp/ocl_wrapper.cl
tidl_api/inc/configuration.h
tidl_api/inc/execution_object.h
tidl_api/src/configuration.cpp
tidl_api/src/configuration_parser.cpp
tidl_api/src/execution_object.cpp
tidl_api/src/executor.cpp
tidl_api/src/executor_impl.h

diff --git a/examples/layer_output/Makefile b/examples/layer_output/Makefile
new file mode 100644 (file)
index 0000000..9d5cabb
--- /dev/null
@@ -0,0 +1,34 @@
+# Copyright (c) 2018 Texas Instruments Incorporated - http://www.ti.com/
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# * Neither the name of Texas Instruments Incorporated nor the
+# names of its contributors may be used to endorse or promote products
+# derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+
+EXE = layer_output
+
+include ../make.common
+
+SOURCES = main.cpp
+
+$(EXE): $(TIDL_API_LIB) $(TIDL_API_LIB_IMGUTIL) $(HEADERS) $(SOURCES)
+       $(CXX) $(CXXFLAGS) $(SOURCES) $(TIDL_API_LIB) $(LDFLAGS) $(LIBS) -o $@
diff --git a/examples/layer_output/j11_v2_trace.txt b/examples/layer_output/j11_v2_trace.txt
new file mode 100644 (file)
index 0000000..79480dd
--- /dev/null
@@ -0,0 +1,12 @@
+numFrames     = 1
+preProcType   = 0
+inData        = ../test/testvecs/input/preproc_0_224x224.y
+outData       = stats_tool_out.bin
+netBinFile    = ../test/testvecs/config/tidl_models/tidl_net_imagenet_jacintonet11v2.bin
+paramsBinFile = ../test/testvecs/config/tidl_models/tidl_param_imagenet_jacintonet11v2.bin
+inWidth       = 224
+inHeight      = 224
+inNumChannels = 3
+
+# Enable tracing of output buffers
+enableTrace = true
diff --git a/examples/layer_output/main.cpp b/examples/layer_output/main.cpp
new file mode 100644 (file)
index 0000000..4758d23
--- /dev/null
@@ -0,0 +1,243 @@
+/******************************************************************************
+ * Copyright (c) 2017-2018  Texas Instruments Incorporated - http://www.ti.com/
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions are met:
+ *       * Redistributions of source code must retain the above copyright
+ *         notice, this list of conditions and the following disclaimer.
+ *       * Redistributions in binary form must reproduce the above copyright
+ *         notice, this list of conditions and the following disclaimer in the
+ *         documentation and/or other materials provided with the distribution.
+ *       * Neither the name of Texas Instruments Incorporated nor the
+ *         names of its contributors may be used to endorse or promote products
+ *         derived from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *   THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <signal.h>
+#include <getopt.h>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <cassert>
+#include <string>
+#include <functional>
+#include <algorithm>
+#include <time.h>
+
+#include "executor.h"
+#include "execution_object.h"
+#include "configuration.h"
+
+bool __TI_show_debug_ = false;
+
+using namespace tidl;
+
+bool RunConfiguration(const std::string& config_file, int num_devices,
+                      DeviceType device_type);
+
+bool ReadFrame(ExecutionObject&     eo,
+               int                  frame_idx,
+               const Configuration& configuration,
+               std::istream&        input_file);
+
+bool WriteFrame(const ExecutionObject &eo,
+                std::ostream& output_file);
+
+static void ProcessTrace(const ExecutionObject* eo, const Configuration& c);
+
+int main(int argc, char *argv[])
+{
+    // Catch ctrl-c to ensure a clean exit
+    signal(SIGABRT, exit);
+    signal(SIGTERM, exit);
+
+    std::cout << "API Version: " << Executor::GetAPIVersion() << std::endl;
+
+    // If there are no devices capable of offloading TIDL on the SoC, exit
+    uint32_t num_eve = Executor::GetNumDevices(DeviceType::EVE);
+    uint32_t num_dsp = Executor::GetNumDevices(DeviceType::DSP);
+    if (num_eve == 0 && num_dsp == 0)
+    {
+        std::cout << "TI DL not supported on this SoC." << std::endl;
+        return EXIT_SUCCESS;
+    }
+
+    // Configuration file with tracing enabled
+    std::string config_file = "j11_v2_trace.txt";
+    int         num_devices = 1;
+    DeviceType  device_type = num_eve > 0 ? DeviceType::EVE :
+                                            DeviceType::DSP;
+
+    bool status = RunConfiguration(config_file, num_devices, device_type);
+
+    if (!status)
+    {
+        std::cout << "tidl FAILED" << std::endl;
+        return EXIT_FAILURE;
+    }
+
+    std::cout << "tidl PASSED" << std::endl;
+    return EXIT_SUCCESS;
+}
+
+bool RunConfiguration(const std::string& config_file, int num_devices,
+                      DeviceType device_type)
+{
+    DeviceIds ids;
+    for (int i = 0; i < num_devices; i++)
+        ids.insert(static_cast<DeviceId>(i));
+
+    // Read the TI DL configuration file
+    Configuration configuration;
+    bool status = configuration.ReadFromFile(config_file);
+    if (!status)
+    {
+        std::cerr << "Error in configuration file: " << config_file
+                  << std::endl;
+        return false;
+    }
+
+    // Open input files
+    std::ifstream input_data_file(configuration.inData, std::ios::binary);
+    assert (input_data_file.good());
+
+    try
+    {
+        // Create a executor with the approriate core type, number of cores
+        // and configuration specified
+        Executor executor(device_type, ids, configuration);
+
+        // Query Executor for set of ExecutionObjects created
+        const ExecutionObjects& execution_objects =
+                                                executor.GetExecutionObjects();
+        int num_eos = execution_objects.size();
+
+        // Allocate input and output buffers for each execution object
+        std::vector<void *> buffers;
+        for (auto &eo : execution_objects)
+        {
+            size_t in_size  = eo->GetInputBufferSizeInBytes();
+            size_t out_size = eo->GetOutputBufferSizeInBytes();
+            ArgInfo in  = { ArgInfo(malloc(in_size),  in_size)};
+            ArgInfo out = { ArgInfo(malloc(out_size), out_size)};
+            eo->SetInputOutputBuffer(in, out);
+
+            buffers.push_back(in.ptr());
+            buffers.push_back(out.ptr());
+        }
+
+        // Process frames across execution objects in a pipelined manner
+        // additional num_eos iterations to flush the pipeline (epilogue)
+        for (int frame_idx = 0;
+             frame_idx < configuration.numFrames + num_eos; frame_idx++)
+        {
+            ExecutionObject* eo = execution_objects[frame_idx % num_eos].get();
+
+            // Wait for previous frame on the same eo to finish processing
+            if (eo->ProcessFrameWait())
+                ProcessTrace(eo, configuration);
+
+            // Read a frame and start processing it with current eo
+            if (ReadFrame(*eo, frame_idx, configuration, input_data_file))
+                eo->ProcessFrameStartAsync();
+        }
+
+        for (auto b : buffers)
+            free(b);
+
+    }
+    catch (tidl::Exception &e)
+    {
+        std::cerr << e.what() << std::endl;
+        status = false;
+    }
+
+    input_data_file.close();
+
+    return status;
+}
+
+
+// APIs for accessing output buffers from individual layers
+// 1. ExecutionObject::WriteLayerOutputsToFile
+// 2. ExecutionObject::GetOutputsFromAllLayers
+// 3. ExecutionObject::GetOutputFromLayer
+void ProcessTrace(const ExecutionObject* eo, const Configuration& c)
+{
+    if (!c.enableOutputTrace)
+        return;
+
+    // 1. Write the outputs from each layer to files
+    // filename: trace_data_<layer_index>_<channels>_<width>_<height>.bin
+    eo->WriteLayerOutputsToFile();
+
+    // 2. Get all outputs from all layers and iterate through them
+    const LayerOutputs* los = eo->GetOutputsFromAllLayers();
+
+    for (const std::unique_ptr<const LayerOutput> &lo : *los)
+    {
+        std::cout << "Layer index: " << lo->LayerIndex()
+              << " Shape: " << lo->NumberOfChannels() << " x "
+              << lo->Width() << " x " << lo->Height()
+              << " Data ptr: " << static_cast<const void*>(lo->Data())
+              << " Size in bytes: " << lo->Size()
+              << std::endl;
+    }
+
+    // Call delete to free the memory used to store layer outputs
+    delete los;
+
+    // 3. Get the output from a single layer
+    const LayerOutput* lo = eo->GetOutputFromLayer(1);
+
+    if (lo)
+    {
+        std::cout << "Layer index: " << lo->LayerIndex()
+              << " Shape: " << lo->NumberOfChannels() << " x "
+              << lo->Width() << " x " << lo->Height()
+              << " Data ptr: " << static_cast<const void*>(lo->Data())
+              << " Size in bytes: " << lo->Size()
+              << std::endl;
+
+        delete lo;
+    }
+}
+
+
+bool ReadFrame(ExecutionObject &eo, int frame_idx,
+               const Configuration& configuration,
+               std::istream& input_file)
+{
+    if (frame_idx >= configuration.numFrames)
+        return false;
+
+    char*  frame_buffer = eo.GetInputBufferPtr();
+    assert (frame_buffer != nullptr);
+
+    input_file.read(eo.GetInputBufferPtr(),
+                    eo.GetInputBufferSizeInBytes());
+
+    if (input_file.eof())
+        return false;
+
+    assert (input_file.good());
+
+    eo.SetFrameIndex(frame_idx);
+
+    if (input_file.good())
+        return true;
+
+    return false;
+}
index bc87855ab6fbcb381103bf2d439cd88fb74e440e..4c82ca0ee7494c459206f4eefce5b93a21d625e4 100644 (file)
@@ -215,6 +215,8 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
                           << overhead << " %" << std::endl;
 
                 WriteFrame(*eo, output_data_file);
+                if (configuration.enableOutputTrace)
+                    eo->WriteLayerOutputsToFile();
             }
 
             // Read a frame and start processing it with current eo
index f94c004d97a71253b0388a87847c5cf3428771ce..2ee6bbf88f1d854d5748352882fb2486b36a2ccb 100644 (file)
@@ -55,9 +55,11 @@ void ocl_tidl_initialize(global unsigned char*            createParams,
 kernel void ocl_tidl_process(global OCL_TIDL_ProcessParams* processParams,
                              global unsigned char*          inputFrame,
                              global unsigned char*          outputData,
-                             global unsigned char*          externalMemory)
+                             global unsigned char*          externalMemory,
+                             global unsigned char*          traceBufferParams)
 {
-    ocl_dsp_tidl_process(processParams, inputFrame, outputData);
+    ocl_dsp_tidl_process(processParams, inputFrame, outputData,
+                         traceBufferParams);
 }
 
 
index d530749adcd94128ca2f64d6a3342b4d71a71731..4bd375470fa60abd67728cbc938247c24f5c3784 100644 (file)
@@ -60,11 +60,11 @@ class Configuration
     int     preProcType;
 
     //! Force to run all layers, regardless of layersGroupId partitioning
-    int     runFullNet;
+    bool    runFullNet;
 
     //! When set, inputs are taken from TIDL internal buffers that contain
     //! outputs of previous layersGroupId, instead of from user application
-    int     enableInternalInput;
+    bool     enableInternalInput;
 
     //! Size of the TI DL per Execution Object heap
     size_t EXTMEM_HEAP_SIZE;
@@ -86,6 +86,9 @@ class Configuration
     //! Path to the TIDL parameter binary file
     std::string paramsBinFile;
 
+    //! Enable tracing of output buffers associated with each layer
+    bool enableOutputTrace;
+
     //! Default constructor.
     Configuration();
 
index 3b07c86c90663be5c4473a4d718037c021e1132f..bd4a0717f9a1d155360a8a91810f2d4e57c3e28d 100644 (file)
@@ -36,6 +36,9 @@ namespace tidl {
 
 class Kernel;
 class Device;
+class LayerOutput;
+
+typedef std::vector<std::unique_ptr<const LayerOutput>> LayerOutputs;
 
 /*! @class ExecutionObject
     @brief Runs the TIDL network on an OpenCL device
@@ -100,6 +103,23 @@ class ExecutionObject
         //! @return Number of milliseconds to process a frame on the device.
         float    GetProcessTimeInMilliSeconds() const;
 
+        //! Write the output buffer for each layer to a file
+        //! <filename_prefix>_<ID>_HxW.bin
+        void WriteLayerOutputsToFile(const std::string& filename_prefix=
+                                     "trace_dump_") const;
+
+        //! Returns a LayerOutput object corresponding to a layer.
+        //! Caller is responsible for deleting the LayerOutput object.
+        //! @see LayerOutput
+        //! @param layer_index The layer index of the layer
+        //! @param output_index The output index of the buffer for a given
+        //!                     layer. Defaults to 0.
+        const LayerOutput* GetOutputFromLayer(uint32_t layer_index,
+                                              uint32_t output_index=0) const;
+
+        //! Get output buffers from all layers
+        const LayerOutputs* GetOutputsFromAllLayers() const;
+
         //! @private
         // Used by the Executor
         enum class CallType { INIT, PROCESS, CLEANUP };
@@ -110,9 +130,44 @@ class ExecutionObject
         ExecutionObject(const ExecutionObject&)            = delete;
         ExecutionObject& operator=(const ExecutionObject&) = delete;
 
+        void EnableOutputBufferTrace();
+
     private:
         class Impl;
         std::unique_ptr<Impl> pimpl_m;
 };
 
+
+/*! @class LayerOutput
+    @brief Describes the output of a layer in terms of its shape. Also
+    includes a pointer to the data.
+*/
+class LayerOutput
+{
+    public:
+        LayerOutput(int layer_index, int output_index, int buffer_id,
+                    int num_roi_m, int num_channels, size_t height,
+                    size_t width, const char* data);
+        ~LayerOutput();
+
+        int    LayerIndex()       const { return layer_index_m; }
+        int    NumberOfChannels() const { return num_channels_m; }
+        size_t Height()           const { return height_m; }
+        size_t Width()            const { return width_m; }
+        size_t Size()             const { return height_m * width_m *
+                                                 num_channels_m; }
+        const char* Data()        const { return data_m; }
+
+    private:
+        int layer_index_m;
+        int output_index_m;
+        int buffer_id_m;
+        int num_roi_m;
+        int num_channels_m;
+        size_t height_m;
+        size_t width_m;
+        const char* data_m;
+};
+
+
 } // namespace tidl
index eca20b5a7c32d100d90a977501f3d1cab819991b..f9ec0bffa97e3fec8903848aff8f6a848a3b2e03 100644 (file)
@@ -38,10 +38,11 @@ Configuration::Configuration(): numFrames(0), inHeight(0), inWidth(0),
                      inNumChannels(0),
                      noZeroCoeffsPercentage(100),
                      preProcType(0),
-                     runFullNet(0),
+                     runFullNet(false),
                      enableInternalInput(0),
                      EXTMEM_HEAP_SIZE(64 << 20),  // 64MB for inceptionNetv1
-                     PARAM_HEAP_SIZE(9 << 20)     // 9MB for mobileNet1
+                     PARAM_HEAP_SIZE(9 << 20),    // 9MB for mobileNet1
+                     enableOutputTrace(false)
 {
 }
 
index 2deb3d7b475eae2a867f21236ea37dc241ecc021..e9df4c891a7599d48093cd70b3e4b077043b750b 100644 (file)
@@ -32,6 +32,8 @@
 #include <string>
 #include <fstream>
 #include <iostream>
+#include <algorithm>
+#include <cctype>
 
 #include "configuration.h"
 
@@ -47,30 +49,30 @@ struct ConfigParser : qi::grammar<Iterator, ascii::space_type>
     ConfigParser(Configuration &x) : ConfigParser::base_type(entry)
     {
         using qi::int_;
+        using qi::bool_;
         using qi::lit;
         using qi::lexeme;
         using ascii::char_;
         using qi::_1;
 
-        //TODO: Ignore blank lines and comments
         path %= lexeme[+(char_ - '"')];
 
         // Discard '"'
         q_path = qi::omit[*char_('"')] >> path >> qi::omit[*char_('"')];
 
         entry %=
-          lit("numFrames")   >> '=' >> int_[ph::ref(x.numFrames) = _1]    |
-          lit("preProcType") >> '=' >> int_[ph::ref(x.preProcType) = _1]    |
-          lit("inWidth")     >> '=' >> int_[ph::ref(x.inWidth) = _1]   |
-          lit("inHeight")    >> '=' >> int_[ph::ref(x.inHeight) = _1]  |
-          lit("inNumChannels") >> '=' >> int_[ph::ref(x.inNumChannels) = _1]  |
-
-          lit("inData")     >> "=" >>  q_path[ph::ref(x.inData) = _1]     |
-          lit("outData")    >> "=" >> q_path[ph::ref(x.outData) = _1]     |
-          lit("netBinFile") >> "=" >> q_path[ph::ref(x.netBinFile) = _1]  |
-
-          lit("paramsBinFile") >> "=" >> q_path[ph::ref(x.paramsBinFile) = _1]
-          ;
+         lit("#") >> *(char_) /* discard comments */                         |
+         lit("numFrames")   >> '=' >> int_[ph::ref(x.numFrames) = _1]        |
+         lit("preProcType") >> '=' >> int_[ph::ref(x.preProcType) = _1]      |
+         lit("inWidth")     >> '=' >> int_[ph::ref(x.inWidth) = _1]          |
+         lit("inHeight")    >> '=' >> int_[ph::ref(x.inHeight) = _1]         |
+         lit("inNumChannels") >> '=' >> int_[ph::ref(x.inNumChannels) = _1]  |
+         lit("inData")     >> "=" >>  q_path[ph::ref(x.inData) = _1]         |
+         lit("outData")    >> "=" >> q_path[ph::ref(x.outData) = _1]         |
+         lit("netBinFile") >> "=" >> q_path[ph::ref(x.netBinFile) = _1]      |
+         lit("paramsBinFile") >> "=" >> q_path[ph::ref(x.paramsBinFile) = _1] |
+         lit("enableTrace") >> "=" >> bool_[ph::ref(x.enableOutputTrace) = _1]
+         ;
     }
 
     qi::rule<Iterator, std::string(), ascii::space_type> path;
@@ -92,13 +94,22 @@ bool Configuration::ReadFromFile(const std::string &file_name)
 
     bool result = true;
 
+    int line_num = 0;
     while (getline(IFS, str))
     {
+        line_num++;
+
+        // Skip lines with whitespace
+        auto f = [](unsigned char const c) { return std::isspace(c); };
+        if (std::all_of(str.begin(),str.end(), f))
+            continue;
+
         result = phrase_parse(str.cbegin(), str.cend(), G, ascii::space);
 
         if (!result)
         {
-            std::cout << "Parsing failed at: " << str << std::endl;
+            std::cout << "Parsing failed on line " << line_num
+                      << ": " << str << std::endl;
             break;
         }
     }
index 2d49bf14347998a00229ce088f39a265ad4c2a58..eb72cbd6dcbf146812d5dd7bc190f5dc21d7d29e 100644 (file)
@@ -36,6 +36,9 @@
 #include "configuration.h"
 #include "common_defines.h"
 #include <string.h>
+#include "tidl_create_params.h"
+#include <fstream>
+#include <climits>
 
 using namespace tidl;
 
@@ -76,6 +79,16 @@ class ExecutionObject::Impl
 
         // Frame being processed by the EO
         int                             current_frame_idx_m;
+
+        // Trace related
+        uint32_t                          num_network_layers_m;
+        up_malloc_ddr<OCL_TIDL_BufParams> trace_buf_params_m;
+        size_t                            trace_buf_params_sz_m;
+        void WriteLayerOutputsToFile (const std::string& filename_prefix) const;
+
+        const LayerOutput* GetOutputFromLayer (uint32_t layer_index,
+                                               uint32_t output_index) const;
+        const LayerOutputs* GetOutputsFromAllLayers() const;
 };
 
 
@@ -113,7 +126,10 @@ ExecutionObject::Impl::Impl(Device* d,
     in_m(nullptr, 0),
     out_m(nullptr, 0),
     device_index_m(device_index),
-    current_frame_idx_m(0)
+    current_frame_idx_m(0),
+    num_network_layers_m(0),
+    trace_buf_params_m(nullptr, &__free_ddr),
+    trace_buf_params_sz_m(0)
 {
     // Allocate a heap for TI DL to use on the device
     tidl_extmem_heap_m.reset(malloc_ddr<char>(extmem_heap_size));
@@ -148,6 +164,11 @@ ExecutionObject::Impl::Impl(Device* d,
 
     k_initialize_m.reset(new Kernel(device_m,
                                     STRING(INIT_KERNEL), args, device_index_m));
+
+    // Save number of layers in the network
+    const TIDL_CreateParams* cp =
+                static_cast<const TIDL_CreateParams *>(create_arg.ptr());
+    num_network_layers_m = cp->net.numLayers;
 }
 
 // Pointer to implementation idiom: https://herbsutter.com/gotw/_100/:
@@ -229,6 +250,48 @@ float ExecutionObject::GetProcessTimeInMilliSeconds() const
     return ((float)GetProcessCycles())/frequency * 1000;
 }
 
+const LayerOutput* ExecutionObject::GetOutputFromLayer(
+                         uint32_t layer_index, uint32_t output_index) const
+{
+    return pimpl_m->GetOutputFromLayer(layer_index, output_index);
+}
+
+const LayerOutputs* ExecutionObject::GetOutputsFromAllLayers() const
+{
+    return pimpl_m->GetOutputsFromAllLayers();
+}
+
+//
+// Allocate an OpenCL buffer for TIDL layer output buffer metadata.
+// The device will populate metadata for every buffer that is used as an
+// output buffer by a layer.
+//
+void ExecutionObject::EnableOutputBufferTrace()
+{
+    pimpl_m->trace_buf_params_sz_m = (sizeof(OCL_TIDL_BufParams)*
+                                       pimpl_m->num_network_layers_m*
+                                       TIDL_NUM_OUT_BUFS);
+
+    pimpl_m->trace_buf_params_m.reset(malloc_ddr<OCL_TIDL_BufParams>
+                                      (pimpl_m->trace_buf_params_sz_m));
+
+    // Device will update bufferId if there is valid data for the entry
+    OCL_TIDL_BufParams* bufferParams = pimpl_m->trace_buf_params_m.get();
+    for (uint32_t i = 0; i < pimpl_m->num_network_layers_m; i++)
+        for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
+        {
+            OCL_TIDL_BufParams *bufP =
+                                &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
+            bufP->bufferId = UINT_MAX;
+        }
+}
+
+void
+ExecutionObject::WriteLayerOutputsToFile(const std::string& filename_prefix) const
+{
+    pimpl_m->WriteLayerOutputsToFile(filename_prefix);
+}
+
 //
 // Create a kernel to call the "process" function
 //
@@ -252,7 +315,10 @@ ExecutionObject::Impl::SetupProcessKernel(const ArgInfo& in, const ArgInfo& out)
                         in,
                         out,
                         ArgInfo(tidl_extmem_heap_m.get(),
-                                shared_initialize_params_m->tidlHeapSize)
+                                shared_initialize_params_m->tidlHeapSize),
+                        ArgInfo(trace_buf_params_m.get(),
+                                trace_buf_params_sz_m)
+
                       };
 
     k_process_m.reset(new Kernel(device_m,
@@ -292,10 +358,13 @@ static size_t writeDataS8(char *writePtr, const char *ptr, int n, int width,
     return width*height*n;
 }
 
+//
+// Copy from host buffer to TIDL device buffer
+//
 void ExecutionObject::Impl::HostWriteNetInput()
 {
-    char* readPtr  = (char *) in_m.ptr();
-    PipeInfo *pipe = in_m.GetPipe();
+    const char*     readPtr  = (const char *) in_m.ptr();
+    const PipeInfo* pipe     = in_m.GetPipe();
 
     for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
     {
@@ -325,10 +394,13 @@ void ExecutionObject::Impl::HostWriteNetInput()
     }
 }
 
+//
+// Copy from TIDL device buffer into host buffer
+//
 void ExecutionObject::Impl::HostReadNetOutput()
 {
     char* writePtr = (char *) out_m.ptr();
-    PipeInfo *pipe = out_m.GetPipe();
+    PipeInfopipe = out_m.GetPipe();
 
     for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
     {
@@ -456,3 +528,127 @@ bool ExecutionObject::Impl::Wait(CallType ct)
 
     return false;
 }
+
+//
+// Write the trace data to output files
+//
+void
+ExecutionObject::Impl::WriteLayerOutputsToFile(const std::string& filename_prefix) const
+{
+    if (trace_buf_params_sz_m == 0)
+        return;
+
+    OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
+
+    for (uint32_t i = 0; i < num_network_layers_m; i++)
+        for (int j = 0; j < TIDL_NUM_OUT_BUFS; j++)
+        {
+            OCL_TIDL_BufParams* buf = &bufferParams[i*TIDL_NUM_OUT_BUFS+j];
+
+            if (buf->bufferId == UINT_MAX)
+                continue;
+
+            size_t buffer_size = buf->numChannels * buf->ROIHeight *
+                                 buf->ROIWidth;
+
+            char *tmp = new char[buffer_size];
+
+            if (tmp == nullptr)
+                throw Exception("Out of memory, new failed",
+                        __FILE__, __FUNCTION__, __LINE__);
+
+            writeDataS8(
+                tmp,
+                (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
+                + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
+                + OCL_TIDL_MAX_PAD_SIZE,
+                buf->numChannels,
+                buf->ROIWidth,
+                buf->ROIHeight,
+                buf->bufPlaneWidth,
+                ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
+                 buf->numChannels));
+
+            std::string filename(filename_prefix);
+            filename += std::to_string(buf->bufferId) + "_";
+            filename += std::to_string(buf->ROIWidth) + "x";
+            filename += std::to_string(buf->ROIHeight) + ".bin";
+
+            std::ofstream ofs;
+            ofs.open(filename, std::ofstream::out);
+            ofs.write(tmp, buffer_size);
+            ofs.close();
+
+            delete[] tmp;
+        }
+}
+
+
+const LayerOutput* ExecutionObject::Impl::GetOutputFromLayer(
+                            uint32_t layer_index, uint32_t output_index) const
+{
+    if (trace_buf_params_sz_m == 0)
+        return nullptr;
+
+    if (layer_index > num_network_layers_m || output_index > TIDL_NUM_OUT_BUFS)
+        return nullptr;
+
+    OCL_TIDL_BufParams* bufferParams = trace_buf_params_m.get();
+    OCL_TIDL_BufParams* buf = &bufferParams[layer_index*TIDL_NUM_OUT_BUFS+
+                                            output_index];
+
+    if (buf->bufferId == UINT_MAX)
+        return nullptr;
+
+    size_t buffer_size = buf->numChannels * buf->ROIHeight *
+                         buf->ROIWidth;
+
+    char *data = new char[buffer_size];
+
+    if (data == nullptr)
+        throw Exception("Out of memory, new failed",
+                __FILE__, __FUNCTION__, __LINE__);
+
+    writeDataS8(data,
+                (char *) tidl_extmem_heap_m.get() + buf->bufPlaneBufOffset
+                + buf->bufPlaneWidth * OCL_TIDL_MAX_PAD_SIZE
+                + OCL_TIDL_MAX_PAD_SIZE,
+                buf->numChannels,
+                buf->ROIWidth,
+                buf->ROIHeight,
+                buf->bufPlaneWidth,
+                ((buf->bufPlaneWidth * buf->bufPlaneHeight)/
+                 buf->numChannels));
+
+    return new LayerOutput(layer_index, output_index, buf->bufferId,
+                           buf->numROIs, buf->numChannels, buf->ROIHeight,
+                           buf->ROIWidth, data);
+}
+
+const LayerOutputs* ExecutionObject::Impl::GetOutputsFromAllLayers() const
+{
+    LayerOutputs* result = new LayerOutputs;
+
+    for (uint32_t i=0; i < num_network_layers_m; i++)
+        for (int j=0; j < TIDL_NUM_OUT_BUFS; j++)
+        {
+            const LayerOutput* lo = GetOutputFromLayer(i, j);
+            if (lo)
+                result->push_back(std::unique_ptr<const LayerOutput>{ lo });
+        }
+
+    return result;
+}
+
+LayerOutput::LayerOutput(int layer_index, int output_index, int buffer_id,
+                         int num_roi, int num_channels, size_t height,
+                         size_t width, const char* data):
+                        layer_index_m(layer_index), buffer_id_m(buffer_id),
+                        num_roi_m(num_roi), num_channels_m(num_channels),
+                        height_m(height), width_m(width), data_m(data)
+{ }
+
+LayerOutput::~LayerOutput()
+{
+    delete[] data_m;
+}
index 6283a98406e27d19c1f99a93781ecc7b5b7d06f2..d67f683a8e86cfbeefbc65f4c23ef69f4caa9a2a 100644 (file)
@@ -104,7 +104,7 @@ bool ExecutorImpl::Initialize(const Configuration& configuration)
     up_malloc_ddr<TIDL_CreateParams> shared_createparam(
                                             malloc_ddr<TIDL_CreateParams>(),
                                             &__free_ddr);
-    InitializeNetworkCreateParam(shared_createparam.get(), configuration);
+    InitializeNetworkCreateParam(shared_createparam.get());
 
     // Read network from file into network struct in TIDL_CreateParams
     sTIDL_Network_t *net = &(shared_createparam.get())->net;
@@ -114,9 +114,6 @@ bool ExecutorImpl::Initialize(const Configuration& configuration)
                              sizeof(sTIDL_Network_t));
     assert(status != false);
 
-    //TODO: Why is this set here?
-    net->interElementSize = 4;
-
     // Force to run full network if runFullNet is set
     if (configuration.runFullNet)
     {
@@ -143,6 +140,10 @@ bool ExecutorImpl::Initialize(const Configuration& configuration)
                                   configuration_m.enableInternalInput)} );
     }
 
+    if (configuration_m.enableOutputTrace)
+        for (auto &eo : execution_objects_m)
+            eo->EnableOutputBufferTrace();
+
     for (auto &eo : execution_objects_m)
         eo->RunAsync(ExecutionObject::CallType::INIT);
 
@@ -215,8 +216,7 @@ void ExecutorImpl::Cleanup()
 }
 
 
-void ExecutorImpl::InitializeNetworkCreateParam(TIDL_CreateParams *CP,
-                                          const Configuration& configuration)
+void ExecutorImpl::InitializeNetworkCreateParam(TIDL_CreateParams *CP)
 {
     CP->currCoreId           = layers_group_id_m;
     CP->currLayersGroupId    = layers_group_id_m;
@@ -227,7 +227,14 @@ void ExecutorImpl::InitializeNetworkCreateParam(TIDL_CreateParams *CP,
     CP->quantHistoryParam1   = tidl::internal::QUANT_HISTORY_PARAM1;
     CP->quantHistoryParam2   = tidl::internal::QUANT_HISTORY_PARAM2;
     CP->quantMargin          = tidl::internal::QUANT_MARGIN;
-    CP->optimiseExtMem       = TIDL_optimiseExtMemL1;
+
+    // If trace is enabled, setup the device TIDL library to allocate separate
+    // output buffers for each layer. This makes it possible for the host
+    // to access the output of each layer after a frame is processed.
+    if (configuration_m.enableOutputTrace)
+        CP->optimiseExtMem       = TIDL_optimiseExtMemL0;
+    else
+        CP->optimiseExtMem       = TIDL_optimiseExtMemL1;
 }
 
 Exception::Exception(const std::string& error, const std::string& file,
index 3852631f6a465d37ef0e1a2b463a091d4633c318..81805dff279a5231ef55b30edf41c7a943b2afcf 100644 (file)
@@ -65,8 +65,7 @@ class ExecutorImpl
         ExecutionObjects execution_objects_m;
 
     private:
-        void InitializeNetworkCreateParam(TIDL_CreateParams *cp,
-                                          const Configuration& configuration);
+        void InitializeNetworkCreateParam(TIDL_CreateParams *cp);
         bool InitializeNetworkParams(TIDL_CreateParams *cp);
         void Cleanup();