Removed unused API functionality, added changelog
authorAjay Jayaraj <ajayj@ti.com>
Wed, 3 Oct 2018 21:57:49 +0000 (16:57 -0500)
committerAjay Jayaraj <ajayj@ti.com>
Mon, 8 Oct 2018 15:28:59 +0000 (10:28 -0500)
(MCT-1062)

15 files changed:
docs/source/changelog.rst [new file with mode: 0644]
docs/source/example.rst
docs/source/index.rst
docs/source/readme/index.rst [deleted file]
docs/source/readme/v01.00.x.rst [deleted file]
docs/source/readme/v01.01.x.rst [deleted file]
docs/source/using_api.rst
examples/classification/main.cpp
examples/test/main.cpp
tidl_api/inc/configuration.h
tidl_api/inc/executor.h
tidl_api/src/configuration.cpp
tidl_api/src/execution_object.cpp
tidl_api/src/executor.cpp
tidl_api/src/executor_impl.h

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
new file mode 100644 (file)
index 0000000..93ddfed
--- /dev/null
@@ -0,0 +1,48 @@
+#########
+Changelog
+#########
+
+1.2.0 [Processor Linux SDK 5.2]
+===============================
+**Added**
+
+* Python 3 bindings for TIDL API
+
+**Removed**
+
+* Configuration::enableInternalInput, not used.
+* Execution::GetExecutionObjects. Use Execution::operator[] and Execution::GetNumExecutionObjects() instead. See :ref:`examples` for usage.
+
+1.1.0 [Processor Linux SDK 5.1]
+===============================
+**Added**
+
+* :term:`ExecutionObjectPipeline` class to hide complexity of executing network across C66x/EVE
+* API methods for tracing outputs from intermediate network layers - see :ref:`network_layer_output`.
+* Support for updating layer group id assignment before execution - see :ref:`layer-group-override`.
+* Provide feedback to the user on parameter and network heap size requirements - see :ref:`sizing_device_heaps`.
+
+
+1.0.0 [Processor Linux SDK 5.0]
+===============================
+First release of the TI Deep Learning API. TIDL API brings deep learning to the edge by enabling applications to leverage TI's proprietary, highly optimized CNN/DNN implementation on the EVE and C66x DSP compute engines. TIDL will initially target Vision/2D use cases.
+
+**Supported AM57x Sitara Processors**
+
+ * `AM571x`_ (offload to C66x DSPs)
+ * `AM5728`_ (offload to C66x DSPs)
+ * `AM574x`_ (offload to EVEs and C66x DSPs)
+
+**Supported Evaluation Modules (EVMs)**
+
+ * `AM572x EVM`_
+ * `AM571x IDK EVM`_
+ * `AM574x IDK EVM`_
+
+
+.. _AM572x EVM:  http://www.ti.com/tool/tmdsevm572x
+.. _AM571x IDK EVM:  http://www.ti.com/tool/tmdxidk5718
+.. _AM574x IDK EVM:  http://www.ti.com/tool/tmdsidk574
+.. _AM571x:     http://www.ti.com/processors/sitara/arm-cortex-a15/am57x/products.html#p2098=1%20C66x&p809=2;2
+.. _AM5728:     http://www.ti.com/product/AM5728
+.. _AM574x:     http://www.ti.com/processors/sitara/arm-cortex-a15/am57x/products.html#p2098=2%20C66x&p815=ECC
index 21721c18d7bbec6699cb5c6325c1a09992e3bd12..28486ccb4d4d33690cdfcc744313504d2f7408e6 100644 (file)
@@ -1,3 +1,5 @@
+.. _examples:
+
 ********
 Examples
 ********
index a32f4c15fba8650c47e55f1974915a45d523bfcf..a020cc4ad3ce8171123b39912c9b13f26bf25d7c 100644 (file)
@@ -14,7 +14,7 @@ TI Deep Learning API User's Guide
    api
    viewer
    faq/index
-   readme/index
+   changelog
    notice
    disclaimer
 
diff --git a/docs/source/readme/index.rst b/docs/source/readme/index.rst
deleted file mode 100644 (file)
index 5247678..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-#############
-Release Notes
-#############
-
-.. toctree::
-   :maxdepth: 1
-
-   v01.01.x
-   v01.00.x
-
-
-Processor SDK to TIDL API version map
-=====================================
-
-+---------------+------------+--------------------------------------------+
-| Processor SDK | TIDL API   | Key features                               |
-+---------------+------------+--------------------------------------------+
-| 05.01.00.x    | 01.01.x    | Added ExecutionObjectPipeline and          |
-|               |            | output trace capability.                   |
-|               |            | Refer :doc:`v01.01.x` for details.         |
-+---------------+------------+--------------------------------------------+
-| 05.00.00.x    | 01.00.x    | Initial release. Refer                     |
-|               |            | Refer :doc:`v01.00.x` for details.         |
-+---------------+------------+--------------------------------------------+
-
diff --git a/docs/source/readme/v01.00.x.rst b/docs/source/readme/v01.00.x.rst
deleted file mode 100644 (file)
index 278deb9..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-******************
-TIDL API v01.00.x
-******************
-
-New Features
-=============
-First release of the TI Deep Learning API. TIDL API brings deep learning to the edge by enabling applications to leverage TI's proprietary, highly optimized CNN/DNN implementation on the EVE and C66x DSP compute engines. TIDL will initially target Vision/2D use cases and is available on the following AM57x Sitara Processors:
-
- * `AM571x`_ (offload to C66x DSPs)
- * `AM5728`_ (offload to C66x DSPs)
- * `AM574x`_ (offload to EVEs and C66x DSPs)
-
-Supported Evaluation Modules (EVMs)
-===================================
-
-* `AM572x EVM`_
-* `AM571x IDK EVM`_
-* `AM574x IDK EVM`_
-
-.. _AM572x EVM:  http://www.ti.com/tool/tmdsevm572x
-.. _AM571x IDK EVM:  http://www.ti.com/tool/tmdxidk5718
-.. _AM574x IDK EVM:  http://www.ti.com/tool/tmdsidk574
-.. _AM571x:     http://www.ti.com/processors/sitara/arm-cortex-a15/am57x/products.html#p2098=1%20C66x&p809=2;2
-.. _AM5728:     http://www.ti.com/product/AM5728
-.. _AM574x:     http://www.ti.com/processors/sitara/arm-cortex-a15/am57x/products.html#p2098=2%20C66x&p815=ECC
diff --git a/docs/source/readme/v01.01.x.rst b/docs/source/readme/v01.01.x.rst
deleted file mode 100644 (file)
index 8a231cc..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-******************
-TIDL API v01.01.x
-******************
-
-New Features
-=============
-
-* Enhance API to hide complexity of executing network across DSP/EVE
-* Add support for tracing outputs from intermediate network layers
-* Update layer group id assignment before execution
-* Provide feedback to the user on parameter and network heap size requirements
-
-Defect Fixes
-============
-N/A
-
-Supported Evaluation Modules (EVMs)
-===================================
-* `AM572x EVM`_
-* `AM571x IDK EVM`_
-* `AM574x IDK EVM`_
-
-.. _AM572x EVM:  http://www.ti.com/tool/tmdsevm572x
-.. _AM571x IDK EVM:  http://www.ti.com/tool/tmdxidk5718
-.. _AM574x IDK EVM:  http://www.ti.com/tool/tmdsidk574
-.. _AM571x:     http://www.ti.com/processors/sitara/arm-cortex-a15/am57x/products.html#p2098=1%20C66x&p809=2;2
-.. _AM5728:     http://www.ti.com/product/AM5728
-.. _AM574x:     http://www.ti.com/processors/sitara/arm-cortex-a15/am57x/products.html#p2098=2%20C66x&p815=ECC
index 9d216d00e9b5cda2e2ee2eae07684976bac73220..724824b5fc5305d813d26e8d978a0e7f28b2d2a7 100644 (file)
@@ -189,6 +189,8 @@ The only change in the code compared to :ref:`use-case-2` is to create an additi
 
 The complete example is available at ``/usr/share/ti/tidl/examples/two_eo_per_frame_opt/main.cpp``.
 
+.. _sizing_device_heaps:
+
 Sizing device side heaps
 ++++++++++++++++++++++++
 
@@ -255,6 +257,9 @@ and the ``configuration.showHeapStats = true`` line can be removed.
     The memory for parameter and network heaps is itself allocated from OpenCL global memory (CMEM). Refer :ref:`opencl-global-memory` for details.
 
 
+
+.. _network_layer_output:
+
 Accessing outputs of network layers
 +++++++++++++++++++++++++++++++++++
 
index 749b713f8dc43010322bba9e964e0ac2ca8bbfac..9f6da3aba600d9804f20a1bd468f11ecad0606ef 100644 (file)
@@ -111,26 +111,22 @@ Rect rectCrop[NUM_ROI];
 AvgFPSWindow fps_window(16);
 
 static int tf_postprocess(uchar *in, int size, int roi_idx, int frame_idx, int f_id);
-static void tf_preprocess(uchar *out, uchar *in, int size);
 static int ShowRegion(int roi_history[]);
 // from most recent to oldest at top indices
 static int selclass_history[MAX_NUM_ROI][3];
 
-bool __TI_show_debug_ = false;
-
 bool RunConfiguration(const std::string& config_file, int num_layers_groups,
                       uint32_t num_dsps, uint32_t num_eves);
 bool CreateExecutionObjectPipelines(uint32_t num_eves, uint32_t num_dsps,
-                                    Configuration& configuration, 
+                                    Configuration& configuration,
                                     uint32_t num_layers_groups,
                                     Executor*& e_eve, Executor*& e_dsp,
                                   std::vector<ExecutionObjectPipeline*>& eops);
 void AllocateMemory(const std::vector<ExecutionObjectPipeline*>& eops);
 void SetupLiveDisplay(uint32_t num_eves, uint32_t num_dsps);
 bool SetupInput(VideoCapture& cap, VideoWriter& writer);
-bool ReadFrame(ExecutionObjectPipeline* eop,
-               uint32_t frame_idx, uint32_t num_frames,
-               VideoCapture &cap, VideoWriter& writer);
+bool ReadFrame(ExecutionObjectPipeline* eop, const Configuration& c,
+               int frame_idx, VideoCapture &cap, VideoWriter& writer);
 void DisplayFrame(const ExecutionObjectPipeline* eop, VideoWriter& writer,
                   uint32_t frame_idx, uint32_t num_eops,
                   uint32_t num_eves, uint32_t num_dsps);
@@ -148,6 +144,7 @@ extern int selected_items[];
 extern int populate_selected_items (char *filename);
 extern void populate_labels (char *filename);
 
+bool verbose = false;
 
 int main(int argc, char *argv[])
 {
@@ -191,30 +188,21 @@ bool RunConfiguration(const std::string& config_file, int num_layers_groups, uin
 
     // Read the TI DL configuration file
     Configuration configuration;
-    bool status = configuration.ReadFromFile(config_file);
-    if (!status)
-    {
-        std::cerr << "Error in configuration file: " << config_file
-                  << std::endl;
+    if (!configuration.ReadFromFile(config_file))
         return false;
-    }
-
-    std::ifstream input_data_file(configuration.inData, std::ios::binary);
-    std::ofstream output_data_file(configuration.outData, std::ios::binary);
-    assert (input_data_file.good());
-    assert (output_data_file.good());
 
+    if (verbose)
+        configuration.enableApiTrace = true;
 
     try
     {
         // Create ExecutionObjectPipelines
-        Executor *e_eve = NULL;
-        Executor *e_dsp = NULL;
+        Executor *e_eve = nullptr;
+        Executor *e_dsp = nullptr;
         std::vector<ExecutionObjectPipeline *> eops;
         if (! CreateExecutionObjectPipelines(num_eves, num_dsps, configuration,
                                         num_layers_groups, e_eve, e_dsp, eops))
             return false;
-        uint32_t num_eops = eops.size();
 
         // Allocate input/output memory for each EOP
         AllocateMemory(eops);
@@ -227,17 +215,16 @@ bool RunConfiguration(const std::string& config_file, int num_layers_groups, uin
         VideoWriter writer;  // gstreamer
         if (! SetupInput(cap, writer))  return false;
 
-
         // More initialization
         for (int k = 0; k < NUM_ROI; k++)
             for(int i = 0; i < 3; i ++)
                 selclass_history[k][i] = -1;
-        int num_frames = configuration.numFrames;
         std::cout << "About to start ProcessFrame loop!!" << std::endl;
+
         // Process frames with available EOPs in a pipelined manner
         // additional num_eops iterations to flush the pipeline (epilogue)
-        for (uint32_t frame_idx = 0;
+        int num_eops = eops.size();
+        for (int frame_idx = 0;
              frame_idx < configuration.numFrames + num_eops; frame_idx++)
         {
             ExecutionObjectPipeline* eop = eops[frame_idx % num_eops];
@@ -253,7 +240,7 @@ bool RunConfiguration(const std::string& config_file, int num_layers_groups, uin
             }
             fps_window.Tick();
 
-            if (ReadFrame(eop, frame_idx, num_frames, cap, writer))
+            if (ReadFrame(eop, configuration, frame_idx, cap, writer))
                 eop->ProcessFrameStartAsync();
         }
 
@@ -264,25 +251,21 @@ bool RunConfiguration(const std::string& config_file, int num_layers_groups, uin
             free(eop->GetOutputBufferPtr());
             delete eop;
         }
-        if(num_dsps) delete e_dsp;
-        if(num_eves) delete e_eve;
+        if (e_dsp) delete e_dsp;
+        if (e_eve) delete e_eve;
     }
     catch (tidl::Exception &e)
     {
         std::cerr << e.what() << std::endl;
-        status = false;
+        return false;
     }
 
-
-    input_data_file.close();
-    output_data_file.close();
-
-    return status;
+    return true;
 }
 
 
 bool CreateExecutionObjectPipelines(uint32_t num_eves, uint32_t num_dsps,
-                                    Configuration& configuration, 
+                                    Configuration& configuration,
                                     uint32_t num_layers_groups,
                                     Executor*& e_eve, Executor*& e_dsp,
                                     std::vector<ExecutionObjectPipeline*>& eops)
@@ -474,11 +457,11 @@ bool SetupInput(VideoCapture& cap, VideoWriter& writer)
    return true;
 }
 
-bool ReadFrame(ExecutionObjectPipeline* eop,
-               uint32_t frame_idx, uint32_t num_frames,
-               VideoCapture &cap, VideoWriter& writer)
+bool ReadFrame(ExecutionObjectPipeline* eop, const Configuration& c,
+               int frame_idx, VideoCapture &cap, VideoWriter& writer)
 {
-    if (cap.grab() && frame_idx < num_frames)
+
+    if (cap.grab() && frame_idx < c.numFrames)
     {
         if (cap.retrieve(in_image))
         {
@@ -491,8 +474,8 @@ bool ReadFrame(ExecutionObjectPipeline* eop,
 
               cv::resize(in_image(Rect(loc_xmin, loc_ymin, loc_w, loc_h)), image, Size(RES_X, RES_Y));
             } else {
-              if((in_image.size().width != RES_X) || (in_image.size().height != RES_Y)) 
-              {  
+              if((in_image.size().width != RES_X) || (in_image.size().height != RES_Y))
+              {
                 cv::resize(in_image, image, Size(RES_X,RES_Y));
               }
             }
@@ -508,14 +491,15 @@ bool ReadFrame(ExecutionObjectPipeline* eop,
             }
 #endif
                 //Convert from BGR pixel interleaved to BGR plane interleaved!
-            cv::resize(r_image, cnn_image, Size(224,224));
+            cv::resize(r_image, cnn_image, Size(c.inWidth,c.inHeight));
             cv::split(cnn_image, bgr_frames);
-            tf_preprocess((uchar*) eop->GetInputBufferPtr(),
-                          bgr_frames[0].ptr(), 224*224);
-            tf_preprocess((uchar*) eop->GetInputBufferPtr()+224*224,
-                          bgr_frames[1].ptr(), 224*224);
-            tf_preprocess((uchar*) eop->GetInputBufferPtr()+2*224*224,
-                          bgr_frames[2].ptr(), 224*224);
+            int channel_size = c.inWidth * c.inHeight;
+
+            char* ptr = eop->GetInputBufferPtr();
+            memcpy(ptr,                bgr_frames[0].ptr(), channel_size);
+            memcpy(ptr+1*channel_size, bgr_frames[1].ptr(), channel_size);
+            memcpy(ptr+2*channel_size, bgr_frames[2].ptr(), channel_size);
+
             eop->SetFrameIndex(frame_idx);
 
 #ifdef RMT_GST_STREAMER
@@ -690,7 +674,7 @@ void ProcessArgs(int argc, char *argv[], std::string& config_file,
                       assert (num_eves >= 0 && num_eves <= 2);
                       break;
 
-            case 'v': __TI_show_debug_ = true;
+            case 'v': verbose = true;
                       break;
 
             case 'h': DisplayHelp();
@@ -786,14 +770,6 @@ int tf_postprocess(uchar *in, int size, int roi_idx, int frame_idx, int f_id)
   return rpt_id;
 }
 
-void tf_preprocess(uchar *out, uchar *in, int size)
-{
-  for (int i = 0; i < size; i++)
-  {
-    out[i] = (uchar) (in[i] /*- 128*/);
-  }
-}
-
 int ShowRegion(int roi_history[])
 {
   if((roi_history[0] >= 0) && (roi_history[0] == roi_history[1])) return roi_history[0];
index 3fdb9065438f8bd3ebe47e70af0c1183b3357985..be7bb851fae425532be3885cf59acc94d7a5ae4c 100644 (file)
@@ -237,7 +237,7 @@ bool RunAllConfigurations(int32_t num_devices, DeviceType device_type)
                           "j11_controlLayers", "j11_prelu", "j11_v2",
                           "jseg21", "jseg21_tiscapes", "smallRoi", "squeeze1_1"};
     else
-        configurations = {"j11_bn",
+        configurations = {"dense_1x1",  "j11_bn", "j11_cifar",
                           "j11_controlLayers", "j11_v2",
                           "jseg21", "jseg21_tiscapes", "smallRoi", "squeeze1_1"};
 
index 1fc033ceb32e7faa40d4ea8d5073c3b4b688aed5..a26fbb9c309eb1b6276661da34b13a6a915ac3e3 100644 (file)
@@ -72,11 +72,6 @@ class Configuration
     //! Force to run all layers, regardless of layersGroupId partitioning
     bool    runFullNet;
 
-    //! Deprecated, do not use.
-    //! When set, inputs are taken from TIDL internal buffers that contain
-    //! outputs of previous layersGroupId, instead of from user application
-    bool     enableInternalInput;
-
     //! @brief Size of the device side network heap
     //! This heap is used for allocating memory required to
     //! run the network on the device. One per Execution Object.
index fb730729f8bd94d4a7c45bd6768933816efef48b..faa637a767927ffa7a36b6a3e8792a468f913d13 100644 (file)
@@ -60,9 +60,6 @@ typedef std::set<DeviceId> DeviceIds;
 class ExecutorImpl;
 class ExecutionObject;
 
-//! Defines the return type for Executor::GetExecutionObjects
-typedef std::vector<std::unique_ptr<ExecutionObject>> ExecutionObjects;
-
 /*! @class Executor
     @brief Manages the overall execution of a layersGroup in a network using the
     specified configuration and the set of devices available to the
@@ -94,10 +91,6 @@ class Executor
         //! Executor object
         ~Executor();
 
-        //! Returns a vector of unique_ptr's to execution objects
-        //! available on this instance of the Executor
-        const ExecutionObjects& GetExecutionObjects() const;
-
         //! Returns a single execution object at index
         ExecutionObject* operator[](uint32_t index) const;
 
index daa15f41dcd5003dc7a559645706b0ae4d2c893a..789638730f21791b4f303c405eeb68abb57da0fe 100644 (file)
@@ -39,7 +39,6 @@ Configuration::Configuration(): numFrames(0), inHeight(0), inWidth(0),
                      noZeroCoeffsPercentage(100),
                      preProcType(0),
                      runFullNet(false),
-                     enableInternalInput(false),
                      NETWORK_HEAP_SIZE(64 << 20),  // 64MB for inceptionNetv1
                      PARAM_HEAP_SIZE(9 << 20),    // 9MB for mobileNet1
                      enableOutputTrace(false),
@@ -55,7 +54,6 @@ void Configuration::Print(std::ostream &os) const
                            << inHeight << "x" << inNumChannels
        << "\nPreProcType              " << preProcType
        << "\nRunFullNet               " << runFullNet
-       << "\nEnableInternalInput      " << enableInternalInput
        << "\nInputFile                " << inData
        << "\nOutputFile               " << outData
        << "\nNetwork                  " << netBinFile
index e89a96e075a04b2ab2f0fb9843bdc24fb53a6679..00d6804d03ad2f481e034e78d1ba589c09eaf1e1 100644 (file)
@@ -334,8 +334,7 @@ ExecutionObject::Impl::SetupInitializeKernel(const DeviceArgInfo& create_arg,
     shared_initialize_params_m->tidlHeapSize =configuration_m.NETWORK_HEAP_SIZE;
     shared_initialize_params_m->l2HeapSize   = tidl::internal::DMEM1_SIZE;
     shared_initialize_params_m->l1HeapSize   = tidl::internal::DMEM0_SIZE;
-    shared_initialize_params_m->enableInternalInput =
-                   configuration_m.enableInternalInput ? 1 : 0;
+    shared_initialize_params_m->enableInternalInput = 0;
 
     // Set up execution trace specified in the configuration
     EnableExecutionTrace(configuration_m,
index cf867c54ec707f6d1d7726007b81550a8552a1b7..9bd3a6e72678664e48a7728a93dc12fe2cdb07f0 100644 (file)
@@ -96,12 +96,6 @@ ExecutorImpl::ExecutorImpl(DeviceType core_type, const DeviceIds& ids,
     device_m = Device::Create(core_type_m, ids, name);
 }
 
-
-const ExecutionObjects& Executor::GetExecutionObjects() const
-{
-    return pimpl_m->execution_objects_m;
-}
-
 ExecutionObject* Executor::operator[](uint32_t index) const
 {
     assert(index < pimpl_m->execution_objects_m.size());
index 81805dff279a5231ef55b30edf41c7a943b2afcf..a6a84215858b6f192bcb602ff879bc6bd5f5db75 100644 (file)
@@ -44,6 +44,7 @@
 
 namespace tidl {
 
+typedef std::vector<std::unique_ptr<ExecutionObject>> ExecutionObjects;
 
 // One instance across all devices available in the context
 // Also need this to work in host emulation mode
@@ -56,9 +57,6 @@ class ExecutorImpl
 
         bool Initialize(const Configuration& configuration);
 
-        ExecutionObjects& GetExecutionObjects()
-        { return execution_objects_m; }
-
         ExecutorImpl(const ExecutorImpl&)            = delete;
         ExecutorImpl& operator=(const ExecutorImpl&) = delete;