Compute input/output size based on network
authorYuan Zhao <yuanzhao@ti.com>
Tue, 8 May 2018 17:00:12 +0000 (12:00 -0500)
committerYuan Zhao <yuanzhao@ti.com>
Thu, 10 May 2018 20:55:52 +0000 (15:55 -0500)
- MCT-974

examples/imagenet/main.cpp
examples/segmentation/main.cpp
examples/ssd_multibox/main.cpp
examples/test/main.cpp
tinn_api/Makefile
tinn_api/src/execution_object.cpp

index bcd61931662cc89339ecf54493cdf5d534222ad0..d7decc6b79a085a34b60d55efc147a9e92a4fb08 100644 (file)
@@ -157,10 +157,6 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
         image_file = input_file;
     }
 
-    // Determine input frame size from configuration
-    size_t frame_sz = configuration.inWidth * configuration.inHeight *
-                      configuration.inNumChannels;
-
     try
     {
         // Create a executor with the approriate core type, number of cores
@@ -176,8 +172,10 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
         std::vector<void *> buffers;
         for (auto &eo : execution_objects)
         {
-            ArgInfo in  = { ArgInfo(malloc(frame_sz), frame_sz)};
-            ArgInfo out = { ArgInfo(malloc(frame_sz), frame_sz)};
+            size_t in_size  = eo->GetInputBufferSizeInBytes();
+            size_t out_size = eo->GetOutputBufferSizeInBytes();
+            ArgInfo in  = { ArgInfo(malloc(in_size),  in_size)};
+            ArgInfo out = { ArgInfo(malloc(out_size), out_size)};
             eo->SetInputOutputBuffer(in, out);
 
             buffers.push_back(in.ptr());
index ed7b53375f8f880e98620acc3f987c841339a8ce..67b712c075662798a50fbbfa74d86f9b64b37b2b 100644 (file)
@@ -176,10 +176,6 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
         image_file = input_file;
     }
 
-    // Determine input frame size from configuration
-    size_t frame_sz = configuration.inWidth * configuration.inHeight *
-                      configuration.inNumChannels;
-
     try
     {
         // Create a executor with the approriate core type, number of cores
@@ -195,8 +191,10 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
         std::vector<void *> buffers;
         for (auto &eo : execution_objects)
         {
-            ArgInfo in  = { ArgInfo(malloc(frame_sz), frame_sz)};
-            ArgInfo out = { ArgInfo(malloc(frame_sz), frame_sz)};
+            size_t in_size  = eo->GetInputBufferSizeInBytes();
+            size_t out_size = eo->GetOutputBufferSizeInBytes();
+            ArgInfo in  = { ArgInfo(malloc(in_size),  in_size)};
+            ArgInfo out = { ArgInfo(malloc(out_size), out_size)};
             eo->SetInputOutputBuffer(in, out);
 
             buffers.push_back(in.ptr());
index 50545a7b81eb970c9ececc8f62da2a5c1847ad1f..5780fe8d693ccb38dbe9c50f95942be0bf05e112 100644 (file)
@@ -177,10 +177,6 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
         image_file = input_file;
     }
 
-    // Determine input frame size from configuration
-    size_t frame_sz = configuration.inWidth * configuration.inHeight *
-                      configuration.inNumChannels;
-
     try
     {
         // Create a executor with the approriate core type, number of cores
@@ -196,8 +192,10 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
         std::vector<void *> buffers;
         for (auto &eo : execution_objects)
         {
-            ArgInfo in  = { ArgInfo(malloc(frame_sz), frame_sz)};
-            ArgInfo out = { ArgInfo(malloc(frame_sz), frame_sz)};
+            size_t in_size  = eo->GetInputBufferSizeInBytes();
+            size_t out_size = eo->GetOutputBufferSizeInBytes();
+            ArgInfo in  = { ArgInfo(malloc(in_size),  in_size)};
+            ArgInfo out = { ArgInfo(malloc(out_size), out_size)};
             eo->SetInputOutputBuffer(in, out);
 
             buffers.push_back(in.ptr());
index 4d6f2b7ea00ca48f62055c5c1c611973eab0357e..2233dad413228c6954ee3323e686a529f354e43a 100644 (file)
@@ -150,10 +150,6 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
     assert (input_data_file.good());
     assert (output_data_file.good());
 
-    // Determine input frame size from configuration
-    size_t frame_sz = configuration.inWidth * configuration.inHeight *
-                      configuration.inNumChannels;
-
     try
     {
         // Create a executor with the approriate core type, number of cores
@@ -169,8 +165,10 @@ bool RunConfiguration(const std::string& config_file, int num_devices,
         std::vector<void *> buffers;
         for (auto &eo : execution_objects)
         {
-            ArgInfo in  = { ArgInfo(malloc(frame_sz), frame_sz)};
-            ArgInfo out = { ArgInfo(malloc(frame_sz), frame_sz)};
+            size_t in_size  = eo->GetInputBufferSizeInBytes();
+            size_t out_size = eo->GetOutputBufferSizeInBytes();
+            ArgInfo in  = { ArgInfo(malloc(in_size),  in_size)};
+            ArgInfo out = { ArgInfo(malloc(out_size), out_size)};
             eo->SetInputOutputBuffer(in, out);
 
             buffers.push_back(in.ptr());
index f403fa1d5534747519a8b4104ff066fb03e4d253..6c7db05852ea4f0c7a1e9e21426114f8ab04e312 100644 (file)
@@ -84,6 +84,7 @@ $(DSP_OUTFILE): $(DSP_SRCFILE)
        $(MAKE) -C dsp
 
 src/ocl_device.cpp: $(DSP_OUTFILE)
+       touch $@
 
 obj/%.o: src/%.cpp $(HEADERS)
        @mkdir -p obj
index 46606833e140e9b39482ba5446f5838fe389eed9..6a71d87b47550c9708709974f161bec95593c163 100644 (file)
@@ -54,6 +54,7 @@ class ExecutionObject::Impl
         bool SetupProcessKernel(const ArgInfo& in, const ArgInfo& out);
         void HostWriteNetInput();
         void HostReadNetOutput();
+        void ComputeInputOutputSizes();
 
         Device*                         device_m;
         std::unique_ptr<Kernel>         k_initialize_m;
@@ -64,6 +65,8 @@ class ExecutionObject::Impl
         up_malloc_ddr<OCL_TIDL_InitializeParams> shared_initialize_params_m;
         up_malloc_ddr<OCL_TIDL_ProcessParams>    shared_process_params_m;
 
+        size_t                          in_size;
+        size_t                          out_size;
         ArgInfo                         in_m;
         ArgInfo                         out_m;
 
@@ -101,6 +104,8 @@ ExecutionObject::Impl::Impl(Device* d,
     tidl_extmem_heap_m (nullptr, &__free_ddr),
     shared_initialize_params_m(nullptr, &__free_ddr),
     shared_process_params_m(nullptr, &__free_ddr),
+    in_size(0),
+    out_size(0),
     in_m(nullptr, 0),
     out_m(nullptr, 0),
     device_index_m(device_index),
@@ -152,7 +157,8 @@ char* ExecutionObject::GetInputBufferPtr() const
 
 size_t ExecutionObject::GetInputBufferSizeInBytes() const
 {
-    return pimpl_m->in_m.size();
+    if (pimpl_m->in_m.ptr() == nullptr)  return pimpl_m->in_size;
+    else                                 return pimpl_m->in_m.size();
 }
 
 char* ExecutionObject::GetOutputBufferPtr() const
@@ -162,7 +168,8 @@ char* ExecutionObject::GetOutputBufferPtr() const
 
 size_t ExecutionObject::GetOutputBufferSizeInBytes() const
 {
-    return pimpl_m->shared_process_params_m.get()->bytesWritten;
+    if (pimpl_m->out_m.ptr() == nullptr)  return pimpl_m->out_size;
+    else           return pimpl_m->shared_process_params_m.get()->bytesWritten;
 }
 
 void  ExecutionObject::SetFrameIndex(int idx)
@@ -320,6 +327,23 @@ void ExecutionObject::Impl::HostReadNetOutput()
     shared_process_params_m->bytesWritten = writePtr - (char *) out_m.ptr();
 }
 
+void ExecutionObject::Impl::ComputeInputOutputSizes()
+{
+    in_size  = 0;
+    out_size = 0;
+    for (unsigned int i = 0; i < shared_initialize_params_m->numInBufs; i++)
+    {
+        OCL_TIDL_BufParams *inBuf = &shared_initialize_params_m->inBufs[i];
+        in_size += inBuf->numROIs * inBuf->numChannels * inBuf->ROIWidth *
+                   inBuf->ROIHeight;
+    }
+    for (unsigned int i = 0; i < shared_initialize_params_m->numOutBufs; i++)
+    {
+        OCL_TIDL_BufParams *outBuf = &shared_initialize_params_m->outBufs[i];
+        out_size += outBuf->numChannels * outBuf->ROIWidth * outBuf->ROIHeight;
+    }
+}
+
 
 bool ExecutionObject::Impl::RunAsync(CallType ct)
 {
@@ -363,6 +387,7 @@ bool ExecutionObject::Impl::Wait(CallType ct)
                 if (shared_initialize_params_m->errorCode != OCL_TIDL_SUCCESS)
                     throw Exception(shared_initialize_params_m->errorCode,
                                     __FILE__, __FUNCTION__, __LINE__);
+                ComputeInputOutputSizes();
             }
             return has_work;
         }
@@ -371,11 +396,10 @@ bool ExecutionObject::Impl::Wait(CallType ct)
             bool has_work = k_process_m->Wait();
             if (has_work)
             {
-                HostReadNetOutput();
-
                 if (shared_process_params_m->errorCode != OCL_TIDL_SUCCESS)
                     throw Exception(shared_process_params_m->errorCode,
                                     __FILE__, __FUNCTION__, __LINE__);
+                HostReadNetOutput();
             }
 
             return has_work;