summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: f4aea3a)
raw | patch | inline | side by side (parent: f4aea3a)
author | Ajay Jayaraj <ajayj@ti.com> | |
Mon, 15 Oct 2018 16:00:46 +0000 (11:00 -0500) | ||
committer | Ajay Jayaraj <ajayj@ti.com> | |
Fri, 19 Oct 2018 21:46:52 +0000 (16:46 -0500) |
1. Simplify context API in ExecutionObject. Replace context_id variants to
multiple existing APIs with these two APIs:
bool AcquireAndRunContext(uint32_t& context_idx,
int frame_idx,
const IODeviceArgInfo& in,
const IODeviceArgInfo& out);
bool WaitAndReleaseContext(uint32_t context_idx);
2. The timing methods for host execution in EOPs and EOs:
* GetProcessTimeInMilliSeconds()
* GetHostProcessTimeInMilliSeconds()
are no longer accurate with multiple contexts and pipelining.
Replace these methods and replace with a generic timestamp
based approach. There is a single API call to enable time stamps in an
application:
//! Enable time stamp generation for TIDL API events
bool EnableTimeStamps(const std::string& file = "timestamp.log", size_t
num_frames=32);
If this method is called before TIDL API frame processing, the API will
generate timestamps for events corresponding to each frame (e.g.
EOP::ProcessFrameStartAsync, EOP::ProcessFrameWait, etc.). These
timestamps are then written to file when the user's application
completes.
A separate script is used for post-processing the time stamps and
generating data for the user.
(MCT-1073, MCT-1074)
multiple existing APIs with these two APIs:
bool AcquireAndRunContext(uint32_t& context_idx,
int frame_idx,
const IODeviceArgInfo& in,
const IODeviceArgInfo& out);
bool WaitAndReleaseContext(uint32_t context_idx);
2. The timing methods for host execution in EOPs and EOs:
* GetProcessTimeInMilliSeconds()
* GetHostProcessTimeInMilliSeconds()
are no longer accurate with multiple contexts and pipelining.
Replace these methods and replace with a generic timestamp
based approach. There is a single API call to enable time stamps in an
application:
//! Enable time stamp generation for TIDL API events
bool EnableTimeStamps(const std::string& file = "timestamp.log", size_t
num_frames=32);
If this method is called before TIDL API frame processing, the API will
generate timestamps for events corresponding to each frame (e.g.
EOP::ProcessFrameStartAsync, EOP::ProcessFrameWait, etc.). These
timestamps are then written to file when the user's application
completes.
A separate script is used for post-processing the time stamps and
generating data for the user.
(MCT-1073, MCT-1074)
17 files changed:
index 9f6da3aba600d9804f20a1bd468f11ecad0606ef..f0473f4f6bb6c89508f653f4e90205404f55106d 100644 (file)
std::string& config_file,
uint32_t & num_dsps, uint32_t &num_eves,
int & num_layers_groups);
-void ReportTime(const ExecutionObjectPipeline* eop);
static void DisplayHelp();
extern std::string labels_classes[];
@@ -232,9 +231,6 @@ bool RunConfiguration(const std::string& config_file, int num_layers_groups, uin
// Wait for previous frame on the same eo to finish processing
if (eop->ProcessFrameWait())
{
- #ifdef PERF_VERBOSE
- ReportTime(eop);
- #endif
DisplayFrame(eop, writer, frame_idx, num_eops,
num_eves, num_dsps);
}
return false;
}
-void ReportTime(const ExecutionObjectPipeline* eop)
-{
- uint32_t frame_index = eop->GetFrameIndex();
- std::string device_name = eop->GetDeviceName();
- float elapsed_host = eop->GetHostProcessTimeInMilliSeconds();
- float elapsed_device = eop->GetProcessTimeInMilliSeconds();
- double overhead = 100 - (elapsed_device/elapsed_host*100);
- std::cout << "frame[" << frame_index << "]: "
- << "Time on " << device_name << ": "
- << std::setw(6) << std::setprecision(4)
- << elapsed_device << "ms, "
- << "host: "
- << std::setw(6) << std::setprecision(4)
- << elapsed_host << "ms ";
- std::cout << "API overhead: "
- << std::setw(6) << std::setprecision(3)
- << overhead << " %" << std::endl;
-}
void DisplayFrame(const ExecutionObjectPipeline* eop, VideoWriter& writer,
uint32_t frame_idx, uint32_t num_eops,
index 43810a9773809e475188ee93ed2d1fcc6c2c7c72..4476c17a706022328d702ff565f5995a9d94589b 100644 (file)
void ReportTime(const ExecutionObject* eo)
{
- double elapsed_host = eo->GetHostProcessTimeInMilliSeconds();
double elapsed_device = eo->GetProcessTimeInMilliSeconds();
- double overhead = 100 - (elapsed_device/elapsed_host*100);
- std::cout << format("frame[%3d]: Time on %s: %4.2f ms, host: %4.2f ms"
- " API overhead: %2.2f %%\n")
+ std::cout << format("frame[%3d]: Time on %s: %4.2f ms\n")
% eo->GetFrameIndex() % eo->GetDeviceName()
- % elapsed_device % elapsed_host % overhead;
-}
-
-void ReportTime(const ExecutionObjectPipeline* eop)
-{
- double elapsed_host = eop->GetHostProcessTimeInMilliSeconds();
- double elapsed_device = eop->GetProcessTimeInMilliSeconds();
- double overhead = 100 - (elapsed_device/elapsed_host*100);
-
- std::cout << format("frame[%3d]: Time on %s: %4.2f ms, host: %4.2f ms"
- " API overhead: %2.2f %%\n")
- % eop->GetFrameIndex() % eop->GetDeviceName()
- % elapsed_device % elapsed_host % overhead;
+ % elapsed_device;
}
// Compare output against reference output
index 57c570d31aede1d20d0e5e62081753d552304d13..3774d1eb2337e91f368db43661cf47260edbbf14 100644 (file)
--- a/examples/common/utils.h
+++ b/examples/common/utils.h
bool WriteFrame(const ExecutionObject* eo, std::ostream& output_file);
void ReportTime(const ExecutionObject* eo);
-void ReportTime(const ExecutionObjectPipeline* eop);
bool CheckFrame(const ExecutionObject* eo, const char *ref_output);
bool CheckFrame(const ExecutionObjectPipeline *eop, const char *ref_output);
index 4cce811f52017279c0a7bed0898091acf2aa0e45..149f759ad50befebb136bf501490cb1cc0c9fb51 100644 (file)
// Wait for previous frame on the same eop to finish processing
if (eop->ProcessFrameWait())
{
- ReportTime(eop);
WriteFrameOutput(*eop);
}
index 35294c4d444a302dbf203369317d7adde1313a1b..4e538b74cab714964810ef98bc8afabb6c292585 100644 (file)
// Wait for previous frame on the same eop to finish processing
if (eop->ProcessFrameWait())
{
- ReportTime(eop);
WriteFrameOutput(*eop, c, opts);
}
index 4bcc707984bae927bf2c521e45c2138a89061872..b9c57dbbdb4308929de827c596c808f10f3ccd6d 100644 (file)
// Wait for previous frame on the same eop to finish processing
if (eop->ProcessFrameWait())
{
- ReportTime(eop);
WriteFrameOutput(*eop, c, opts);
}
index 5cc618feec4515aeb02783e06ff6c3e1afa88e88..d1aa6dc7a5d403538e5cdbbedb9f5ae135847d48 100644 (file)
// Wait for previous frame on the same EOP to finish processing
if (eop->ProcessFrameWait())
{
- ReportTime(eop);
-
// The reference output is valid only for the first frame
// processed on each EOP
if (frame_idx < num_eops && !CheckFrame(eop, ref_output))
index 8c67e556b5ee2b218ae861354fb1c4e72de64ae5..659485e1dbf7e1c053593a2bc4a2bad6d5cb6dd0 100644 (file)
// Wait for previous frame on the same EOP to finish processing
if (eop->ProcessFrameWait())
{
- ReportTime(eop);
-
// The reference output is valid only for the first frame
// processed on each EOP
if (frame_idx < num_eops && !CheckFrame(eop, ref_output))
index d875d7b5e9244748c66db8e382ec31334d4f6a49..5abe33d15370315ebc388dbb49ef8626218d70a0 100644 (file)
void SetInputOutputBuffer(const ArgInfo& in,
const ArgInfo& out) override;
- //! Specify the input and output buffers used by the EO in a context
- //! @param in buffer used for input.
- //! @param out buffer used for output.
- //! @param context_idx the index of the context
- void SetInputOutputBuffer(const ArgInfo& in,
- const ArgInfo& out, uint32_t context_idx);
-
//! Returns a pointer to the input buffer set via SetInputOutputBuffer
char* GetInputBufferPtr() const override;
//! returns immediately. Use ExecutionObject::ProcessFrameWait to wait
bool ProcessFrameStartAsync() override;
- //! @brief Start processing with a context. The call is asynchronous and
- //! returns immediately. Use ExecutionObject::ProcessFrameWait to wait
- //! @param context_idx the index of the context
- bool ProcessFrameStartAsync(uint32_t context_idx);
-
//! Wait for the execution object to complete processing a frame
//! @return false if ExecutionObject::ProcessFrameWait was called
//! without a corresponding call to
//! ExecutionObject::ProcessFrameStartAsync.
bool ProcessFrameWait() override;
- //! Wait for the execution object to complete processing with a context
- //! @param context_idx the index of the context
- //! @return false if ExecutionObject::ProcessFrameWait was called
- //! without a corresponding call to
- //! ExecutionObject::ProcessFrameStartAsync.
- bool ProcessFrameWait(uint32_t context_idx);
-
//! @brief return the number of milliseconds taken *on the device* to
//! execute the process call
//! @return Number of milliseconds to process a frame on the device.
- float GetProcessTimeInMilliSeconds() const override;
-
- //! @brief return the number of milliseconds taken *on the device* to
- //! execute the process call with a contex
- //! @param context_idx the index of the context
- //! @return Number of milliseconds to process a frame on the device.
- float GetProcessTimeInMilliSeconds(uint32_t context_idx) const;
-
- //! @brief return the number of milliseconds taken *on the host* to
- //! execute the process call
- //! @return Number of milliseconds to process a frame on the host.
- float GetHostProcessTimeInMilliSeconds() const override;
-
- //! @brief return the number of milliseconds taken *on the host* to
- //! execute the process call with a contex
- //! @param context_idx the index of the context
- //! @return Number of milliseconds to process a frame on the host.
- float GetHostProcessTimeInMilliSeconds(uint32_t context_idx) const;
+ float GetProcessTimeInMilliSeconds() const;
//! Returns the device name that the ExecutionObject runs on
const std::string& GetDeviceName() const override;
//! @private
// Used by the ExecutionObjectPipeline
bool AddCallback(CallType ct, void *user_data, uint32_t context_idx);
- void AcquireContext(uint32_t& context_idx);
- void ReleaseContext(uint32_t context_idx);
+ bool AcquireAndRunContext(uint32_t& context_idx,
+ int frame_idx,
+ const IODeviceArgInfo& in,
+ const IODeviceArgInfo& out);
+
+ bool WaitAndReleaseContext(uint32_t context_idx);
ExecutionObject() = delete;
ExecutionObject(const ExecutionObject&) = delete;
ExecutionObject& operator=(const ExecutionObject&) = delete;
- //! @private
- void SetInputOutputBuffer(const IODeviceArgInfo* in,
- const IODeviceArgInfo* out, uint32_t context_idx);
-
private:
class Impl;
std::unique_ptr<Impl> pimpl_m;
index 816da94adeffa9b45810646a8a683af75708a500..e9da88b7249c0c4ce3ad31f5b6754ada73370aee 100644 (file)
//! ExecutionObject::ProcessFrameStartAsync.
virtual bool ProcessFrameWait() =0;
- //! @brief return the number of milliseconds taken *on the device* to
- //! execute the process call
- //! @return Number of milliseconds to process a frame on the device.
- virtual float GetProcessTimeInMilliSeconds() const =0;
-
- //! @brief return the number of milliseconds taken *on the host* to
- //! execute the process call
- //! @return Number of milliseconds to process a frame on the host.
- virtual float GetHostProcessTimeInMilliSeconds() const =0;
-
//! Returns the device name that the ExecutionObject runs on
virtual const std::string& GetDeviceName() const =0;
index b31fec6ab94882222e0cc660912394b715960b00..0787d0e9dbc1a4eb165b09f84d737f4efc778f54 100644 (file)
//! ExecutionObjectPipeline::ProcessFrameStartAsync().
bool ProcessFrameWait() override;
- //! @brief return the number of milliseconds taken *on the device* to
- //! execute the process call
- //! @return Number of milliseconds to process a frame on the device.
- float GetProcessTimeInMilliSeconds() const override;
-
- //! @brief return the number of milliseconds taken *on the device*
- //! to process a layersGroup by a componenet ExecutionObject
- //! @return Number of milliseconds to process a layersGroup on the
- //! device by a component ExecutionObject.
- float GetProcessTimeInMilliSeconds(uint32_t eo_index) const;
-
- //! @brief return the number of milliseconds taken *on the host* to
- //! execute the process call
- //! @return Number of milliseconds to process a frame on the host.
- float GetHostProcessTimeInMilliSeconds() const override;
-
- //! @brief return the number of milliseconds taken *on the host*
- //! to process a layersGroup by a componenet ExecutionObject
- //! @return Number of milliseconds to process a layersGroup on the
- //! host by a component ExecutionObject.
- float GetHostProcessTimeInMilliSeconds(uint32_t eo_index) const;
-
//! Return the combined device names that this pipeline runs on
const std::string& GetDeviceName() const override;
index faa637a767927ffa7a36b6a3e8792a468f913d13..11e6aa3fa32c75ea199e5a522571e521548d485a 100644 (file)
--- a/tidl_api/inc/executor.h
+++ b/tidl_api/inc/executor.h
return val;
}
+//! Enable time stamp generation for TIDL API events
+bool EnableTimeStamps(const std::string& file = "timestamp.log",
+ size_t num_frames=32);
+
/*! @class Exception
* @brief Used to error reporting
*/
index 9bc9f05d61f1cd06574f986fc175ef95c1347b7c..b6292da6761a35f962707839f01f7065a39d09cc 100644 (file)
#include "common_defines.h"
#include "tidl_create_params.h"
#include "device_arginfo.h"
+#include "util.h"
using namespace tidl;
IODeviceArgInfo out_m[tidl::internal::NUM_CONTEXTS];
// Frame being processed by the EO
- int current_frame_idx_m;
+ int current_frame_idx_m[tidl::internal::NUM_CONTEXTS];
// LayersGroupId being processed by the EO
- int layers_group_id_m;
+ int layers_group_id_m;
// Trace related
void WriteLayerOutputsToFile (const std::string& filename_prefix) const;
up_malloc_ddr<OCL_TIDL_BufParams> trace_buf_params_m;
size_t trace_buf_params_sz_m;
- // host time tracking: eo start to finish
- float host_time_m[tidl::internal::NUM_CONTEXTS];
-
private:
void SetupInitializeKernel(const DeviceArgInfo& create_arg,
const DeviceArgInfo& param_heap_arg);
shared_process_params_m(nullptr, &__free_ddr),
in_size_m(0),
out_size_m(0),
- current_frame_idx_m(0),
layers_group_id_m(layers_group_id),
num_network_layers_m(0),
trace_buf_params_m(nullptr, &__free_ddr),
void ExecutionObject::SetFrameIndex(int idx)
{
- pimpl_m->current_frame_idx_m = idx;
+ pimpl_m->current_frame_idx_m[0] = idx;
}
int ExecutionObject::GetFrameIndex() const
{
- return pimpl_m->current_frame_idx_m;
+ return pimpl_m->current_frame_idx_m[0];
}
void ExecutionObject::SetInputOutputBuffer(const ArgInfo& in,
const ArgInfo& out)
{
- SetInputOutputBuffer(in, out, 0);
-}
-
-void ExecutionObject::SetInputOutputBuffer(const ArgInfo& in,
- const ArgInfo& out, uint32_t context_idx)
-{
- assert(in.ptr() != nullptr && in.size() >= pimpl_m->in_size_m);
- assert(out.ptr() != nullptr && out.size() >= pimpl_m->out_size_m);
-
- pimpl_m->in_m[context_idx] = IODeviceArgInfo(in);
- pimpl_m->out_m[context_idx] = IODeviceArgInfo(out);
-}
-
-void ExecutionObject::SetInputOutputBuffer(const IODeviceArgInfo* in,
- const IODeviceArgInfo* out,
- uint32_t context_idx)
-{
- pimpl_m->in_m[context_idx] = *in;
- pimpl_m->out_m[context_idx] = *out;
+ pimpl_m->in_m[0] = IODeviceArgInfo(in);
+ pimpl_m->out_m[0] = IODeviceArgInfo(out);
}
bool ExecutionObject::ProcessFrameStartAsync()
{
- return ProcessFrameStartAsync(0);
-}
-
-bool ExecutionObject::ProcessFrameStartAsync(uint32_t context_idx)
-{
- TRACE::print("-> ExecutionObject::ProcessFrameStartAsync(%d)\n",
- context_idx);
- assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
- return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS, context_idx);
+ return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS, 0);
}
bool ExecutionObject::ProcessFrameWait()
{
- return ProcessFrameWait(0);
-}
-
-bool ExecutionObject::ProcessFrameWait(uint32_t context_idx)
-{
- TRACE::print("-> ExecutionObject::ProcessFrameWait(%d)\n", context_idx);
- return pimpl_m->Wait(ExecutionObject::CallType::PROCESS, context_idx);
+ return pimpl_m->Wait(ExecutionObject::CallType::PROCESS, 0);
}
bool ExecutionObject::RunAsync (CallType ct)
return pimpl_m->Wait(ct, 0);
}
-bool ExecutionObject::AddCallback(CallType ct, void *user_data,
- uint32_t context_idx)
-{
- return pimpl_m->AddCallback(ct, user_data, context_idx);
-}
-float ExecutionObject::GetProcessTimeInMilliSeconds() const
+bool ExecutionObject::AcquireAndRunContext(uint32_t& context_idx,
+ int frame_idx,
+ const IODeviceArgInfo& in,
+ const IODeviceArgInfo& out)
{
- return GetProcessTimeInMilliSeconds(0);
+ pimpl_m->AcquireContext(context_idx);
+
+ pimpl_m->current_frame_idx_m[context_idx] = frame_idx;
+ pimpl_m->in_m[context_idx] = in;
+ pimpl_m->out_m[context_idx] = out;
+
+ return pimpl_m->RunAsync(ExecutionObject::CallType::PROCESS,
+ context_idx);
}
-float ExecutionObject::GetProcessTimeInMilliSeconds(uint32_t context_idx) const
+bool ExecutionObject::WaitAndReleaseContext(uint32_t context_idx)
{
- float frequency = pimpl_m->device_m->GetFrequencyInMhz() * 1000000;
- return ((float)pimpl_m->GetProcessCycles(context_idx)) / frequency * 1000;
+ TRACE::print("-> ExecutionObject::WaitAndReleaseContext(%d)\n",
+ context_idx);
+
+ bool status = pimpl_m->Wait(ExecutionObject::CallType::PROCESS,
+ context_idx);
+ pimpl_m->ReleaseContext(context_idx);
+
+ return status;
}
-float ExecutionObject::GetHostProcessTimeInMilliSeconds() const
+bool ExecutionObject::AddCallback(CallType ct, void *user_data,
+ uint32_t context_idx)
{
- return GetHostProcessTimeInMilliSeconds(0);
+ return pimpl_m->AddCallback(ct, user_data, context_idx);
}
-float ExecutionObject::GetHostProcessTimeInMilliSeconds(uint32_t context_idx) const
+float ExecutionObject::GetProcessTimeInMilliSeconds() const
{
- return pimpl_m->host_time_m[context_idx];
+ float frequency = pimpl_m->device_m->GetFrequencyInMhz() * 1000000;
+ return ((float)pimpl_m->GetProcessCycles(0)) / frequency * 1000;
}
void
return pimpl_m->device_name_m;
}
-void ExecutionObject::AcquireContext(uint32_t& context_idx)
-{
- pimpl_m->AcquireContext(context_idx);
-}
-
-void ExecutionObject::ReleaseContext(uint32_t context_idx)
-{
- pimpl_m->ReleaseContext(context_idx);
-}
//
// Create a kernel to call the "initialize" function
}
}
-
bool ExecutionObject::Impl::RunAsync(CallType ct, uint32_t context_idx)
{
switch (ct)
}
case CallType::PROCESS:
{
- std::chrono::time_point<std::chrono::steady_clock> t1, t2;
- t1 = std::chrono::steady_clock::now();
+ RecordEvent(current_frame_idx_m[context_idx],
+ (layers_group_id_m == 1) ? TimeStamp::EO1_PFSA_START:
+ TimeStamp::EO2_PFSA_START);
OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get()
+ context_idx;
- p_params->frameIdx = current_frame_idx_m;
+ p_params->frameIdx = current_frame_idx_m[context_idx];
HostWriteNetInput(context_idx);
{
std::unique_lock<std::mutex> lock(mutex_access_m);
k_process_m->RunAsync(context_idx);
}
- t2 = std::chrono::steady_clock::now();
- std::chrono::duration<float> elapsed = t2 - t1;
- host_time_m[context_idx] = elapsed.count() * 1000;
+ RecordEvent(current_frame_idx_m[context_idx],
+ (layers_group_id_m == 1) ? TimeStamp::EO1_PFSA_END:
+ TimeStamp::EO2_PFSA_END);
break;
}
case CallType::CLEANUP:
case CallType::PROCESS:
{
float host_elapsed_ms = 0.0f;
- bool has_work = k_process_m->Wait(&host_elapsed_ms);
+ float *p_host_elapsed_ms = tidl::internal::NUM_CONTEXTS == 1 ?
+ &host_elapsed_ms : nullptr;
+
+ RecordEvent(current_frame_idx_m[context_idx],
+ (layers_group_id_m == 1) ? TimeStamp::EO1_PFW_START:
+ TimeStamp::EO2_PFW_START);
+
+ bool has_work = k_process_m->Wait(p_host_elapsed_ms, context_idx);
if (has_work)
{
OCL_TIDL_ProcessParams *p_params = shared_process_params_m.get()
throw Exception(p_params->errorCode,
__FILE__, __FUNCTION__, __LINE__);
- std::chrono::time_point<std::chrono::steady_clock> t1, t2;
- t1 = std::chrono::steady_clock::now();
HostReadNetOutput(context_idx);
- t2 = std::chrono::steady_clock::now();
- std::chrono::duration<float> elapsed = t2 - t1;
- host_time_m += elapsed.count() * 1000 + host_elapsed_ms;
}
+ RecordEvent(current_frame_idx_m[context_idx],
+ (layers_group_id_m == 1) ? TimeStamp::EO1_PFW_END:
+ TimeStamp::EO2_PFW_END);
+
return has_work;
}
case CallType::CLEANUP:
diff --git a/tidl_api/src/execution_object_pipeline.cpp b/tidl_api/src/execution_object_pipeline.cpp
index 279d1ed7e9c0262807110aa81533ce5283b26208..30856e290078d7417b6cc0d743c61ea696c9e16a 100644 (file)
#include <assert.h>
#include <mutex>
#include <condition_variable>
-#include <chrono>
#include "device_arginfo.h"
#include "execution_object_pipeline.h"
#include "parameters.h"
+#include "util.h"
using namespace tidl;
//! for pipelined execution
std::vector<ExecutionObject*> eos_m;
std::vector<IODeviceArgInfo*> iobufs_m;
- std::vector<float> eo_device_time_m;
- std::vector<float> eo_host_time_m;
std::string device_name_m;
uint32_t curr_eo_idx_m;
uint32_t curr_eo_context_idx_m;
- // device and host time tracking: pipeline start to finish
- float device_time_m;
- float host_time_m;
-
private:
//! @brief Initialize ExecutionObjectPipeline with given
//! ExecutionObjects: check consecutive layersGroup, allocate memory
bool has_work_m, is_processed_m;
std::mutex mutex_m;
std::condition_variable cv_m;
-
- // host time tracking: pipeline start to finish
- std::chrono::time_point<std::chrono::steady_clock> start_m;
};
ExecutionObjectPipeline::ExecutionObjectPipeline(
bool ExecutionObjectPipeline::ProcessFrameStartAsync()
{
+ RecordEvent(pimpl_m->frame_idx_m, TimeStamp::EOP_PFSA_START);
+
assert(GetInputBufferPtr() != nullptr && GetOutputBufferPtr() != nullptr);
bool st = pimpl_m->RunAsyncStart();
if (st)
st = pimpl_m->eos_m[0]->AddCallback(ExecutionObject::CallType::PROCESS,
this, pimpl_m->curr_eo_context_idx_m);
+
+ RecordEvent(pimpl_m->frame_idx_m, TimeStamp::EOP_PFSA_END);
return st;
}
void CallbackWrapper(void *user_data)
{
+ int frame_index = ((ExecutionObjectPipeline *) user_data)->GetFrameIndex();
+ RecordEvent(frame_index, TimeStamp::EOP_RAN_START);
+
((ExecutionObjectPipeline *) user_data)->RunAsyncNext();
+
+ RecordEvent(frame_index, TimeStamp::EOP_RAN_END);
}
void ExecutionObjectPipeline::RunAsyncNext()
pimpl_m->curr_eo_context_idx_m);
}
-float ExecutionObjectPipeline::GetProcessTimeInMilliSeconds() const
-{
- return pimpl_m->device_time_m;
-}
-
-float ExecutionObjectPipeline::GetHostProcessTimeInMilliSeconds() const
-{
- return pimpl_m->host_time_m;
-}
-
-float ExecutionObjectPipeline::GetProcessTimeInMilliSeconds(
- uint32_t eo_index) const
-{
- assert(eo_index < pimpl_m->eos_m.size());
- return pimpl_m->eo_device_time_m[eo_index];
-}
-
-float ExecutionObjectPipeline::GetHostProcessTimeInMilliSeconds(
- uint32_t eo_index) const
-{
- assert(eo_index < pimpl_m->eos_m.size());
- return pimpl_m->eo_host_time_m[eo_index];
-}
-
const std::string& ExecutionObjectPipeline::GetDeviceName() const
{
return pimpl_m->device_name_m;
ArgInfo out(ptr, size);
iobufs_m.push_back(new IODeviceArgInfo(out));
}
-
- // Record keeping for each EO's device time and host time
- // because EO could be shared by another EOP
- eo_device_time_m.resize(eos_m.size());
- eo_host_time_m.resize(eos_m.size());
}
ExecutionObjectPipeline::Impl::~Impl()
iobufs_m.back() = new IODeviceArgInfo(out);
}
+// Start execution on the first EO in the pipeline. Callbacks are used
+// to trigger execution on subsequent EOs
bool ExecutionObjectPipeline::Impl::RunAsyncStart()
{
has_work_m = true;
is_processed_m = false;
- device_time_m = 0.0f;
- host_time_m = 0.0f;
curr_eo_idx_m = 0;
- eos_m[0]->AcquireContext(curr_eo_context_idx_m);
- if (tidl::internal::NUM_CONTEXTS == 1)
- start_m = std::chrono::steady_clock::now();
- eos_m[0]->SetFrameIndex(frame_idx_m);
- eos_m[0]->SetInputOutputBuffer(iobufs_m[0], iobufs_m[1],
- curr_eo_context_idx_m);
- return eos_m[0]->ProcessFrameStartAsync(curr_eo_context_idx_m);
+ return eos_m[0]->AcquireAndRunContext(curr_eo_context_idx_m,
+ frame_idx_m,
+ *iobufs_m[0], *iobufs_m[1]);
}
+// Invoked via the callback function, CallbackWrapper. Used to advance the
+// pipeline.
// returns true if we have more EOs to execute
bool ExecutionObjectPipeline::Impl::RunAsyncNext()
{
- eos_m[curr_eo_idx_m]->ProcessFrameWait(curr_eo_context_idx_m);
- // need to capture EO's device/host time before we release its lock
- eo_device_time_m[curr_eo_idx_m] = eos_m[curr_eo_idx_m]->
- GetProcessTimeInMilliSeconds(curr_eo_context_idx_m);
- eo_host_time_m[curr_eo_idx_m] = eos_m[curr_eo_idx_m]->
- GetHostProcessTimeInMilliSeconds(curr_eo_context_idx_m);
- device_time_m += eo_device_time_m[curr_eo_idx_m];
- if (tidl::internal::NUM_CONTEXTS > 1)
- host_time_m += eo_host_time_m[curr_eo_idx_m];
- eos_m[curr_eo_idx_m]->ReleaseContext(curr_eo_context_idx_m);
+ eos_m[curr_eo_idx_m]->WaitAndReleaseContext(curr_eo_context_idx_m);
curr_eo_idx_m += 1;
if (curr_eo_idx_m < eos_m.size())
{
- eos_m[curr_eo_idx_m]->AcquireContext(curr_eo_context_idx_m);
- eos_m[curr_eo_idx_m]->SetFrameIndex(frame_idx_m);
- eos_m[curr_eo_idx_m]->SetInputOutputBuffer(iobufs_m[curr_eo_idx_m],
- iobufs_m[curr_eo_idx_m+1], curr_eo_context_idx_m);
- eos_m[curr_eo_idx_m]->ProcessFrameStartAsync(curr_eo_context_idx_m);
+ eos_m[curr_eo_idx_m]->AcquireAndRunContext(curr_eo_context_idx_m,
+ frame_idx_m,
+ *iobufs_m[curr_eo_idx_m],
+ *iobufs_m[curr_eo_idx_m+1]);
return true;
}
else
{
- if (tidl::internal::NUM_CONTEXTS == 1)
{
- std::chrono::duration<float> elapsed =
- std::chrono::steady_clock::now() - start_m;
- host_time_m = elapsed.count() * 1000; // seconds to milliseconds
+ std::lock_guard<std::mutex> lock(mutex_m);
+ is_processed_m = true;
}
- is_processed_m = true;
cv_m.notify_all();
return false;
}
{
if (! has_work_m) return false;
+ RecordEvent(frame_idx_m, TimeStamp::EOP_PFW_START);
+
std::unique_lock<std::mutex> lock(mutex_m);
cv_m.wait(lock, [this]{ return this->is_processed_m; });
has_work_m = false;
+
+ RecordEvent(frame_idx_m, TimeStamp::EOP_PFW_END);
+
return true;
}
index 508c5498a352d43ab99c24390ffbd9010d3d1d88..74e1150263b6d4a0ca1dcd3225e6de16e26aae5d 100644 (file)
int index = static_cast<int>(id);
queue_m[index] = clCreateCommandQueue(context_m,
all_device_ids[index],
- CL_QUEUE_PROFILING_ENABLE,
+ CL_QUEUE_PROFILING_ENABLE|
+ CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
&errcode);
errorCheck(errcode, __LINE__);
}
diff --git a/tidl_api/src/util.cpp b/tidl_api/src/util.cpp
index 239b94399cfcc7719dc5182d207a4ffffd58a619..711b289ec4977a952f8b9979151fe0a8dc4387b9 100644 (file)
--- a/tidl_api/src/util.cpp
+++ b/tidl_api/src/util.cpp
#include <iostream>
#include <fstream>
#include <assert.h>
+#include <chrono>
+#include <mutex>
+#include <cstring>
+#include <memory>
+#include "executor.h"
using namespace tidl;
+using namespace std::chrono;
+
+std::unique_ptr<TimeStamp> tidl_api_timestamps(nullptr);
+
+bool tidl::EnableTimeStamps(const std::string& file, size_t num_frames)
+{
+ std::mutex m;
+ std::lock_guard<std::mutex> guard(m);
+
+ if (tidl_api_timestamps.get() != nullptr)
+ return true;
+
+ tidl_api_timestamps.reset(new TimeStamp(file, num_frames));
+ if (tidl_api_timestamps.get() == nullptr)
+ return false;
+
+ return true;
+}
+
+TimeStamp::TimeStamp(const std::string& file, int num_entries):
+ num_entries_m(num_entries), file_m(file)
+{
+ entries_m = new Entry[num_entries_m];
+ std::memset(entries_m, 0, sizeof(Entry)*num_entries_m);
+}
+
+void TimeStamp::Update(int frame_idx, EventKind k)
+{
+ int idx = frame_idx % num_entries_m;
+ entries_m[idx].frame_idx = frame_idx;
+ entries_m[idx].timestamp[k] = duration_cast<microseconds>
+ (high_resolution_clock::now().time_since_epoch()).count();
+}
+
+TimeStamp::~TimeStamp()
+{
+ std::ofstream ofs;
+ ofs.open(file_m, std::ofstream::out);
+
+ for (int i=0; i < num_entries_m; i++)
+ for (int j=0; j < EventKind::NUM_EVENTS; j++)
+ if (entries_m[i].timestamp[j] != 0)
+ {
+ ofs << entries_m[i].frame_idx << ",";
+ switch (j)
+ {
+ case EOP_PFSA_START: ofs << "EOP:PFSA:Start"; break;
+ case EOP_PFSA_END: ofs << "EOP:PFSA:End"; break;
+ case EOP_PFW_START: ofs << "EOP:PFW:Start"; break;
+ case EOP_PFW_END: ofs << "EOP:PFW:End"; break;
+ case EOP_RAN_START: ofs << "EOP:RAN:Start"; break;
+ case EOP_RAN_END: ofs << "EOP:RAN:End"; break;
+
+ case EO1_PFSA_START: ofs << "EO1:PFSA:Start"; break;
+ case EO1_PFSA_END: ofs << "EO1:PFSA:End"; break;
+ case EO1_PFW_START: ofs << "EO1:PFW:Start"; break;
+ case EO1_PFW_END: ofs << "EO1:PFW:End"; break;
+
+ case EO2_PFSA_START: ofs << "EO2:PFSA:Start"; break;
+ case EO2_PFSA_END: ofs << "EO2:PFSA:End"; break;
+ case EO2_PFW_START: ofs << "EO2:PFW:Start"; break;
+ case EO2_PFW_END: ofs << "EO2:PFW:End"; break;
+
+ default: ofs << "UNKNOWN"; break;
+ }
+ ofs << "," << entries_m[i].timestamp[j] << std::endl;
+ }
+
+ ofs.close();
+
+ delete [] entries_m;
+}
+
+
+void tidl::RecordEvent(int frame_idx, TimeStamp::EventKind k)
+{
+ TimeStamp* t = tidl_api_timestamps.get();
+ if (t)
+ t->Update(frame_idx, k);
+}
+
std::size_t tidl::GetBinaryFileSize(const std::string &F)
{
std::ifstream is;
diff --git a/tidl_api/src/util.h b/tidl_api/src/util.h
index 10b9ef82cd7fa1911331a5e6001267fdf88ba93d..d40d4c9be780b676b14287e3934f9c7181c22f1f 100644 (file)
--- a/tidl_api/src/util.h
+++ b/tidl_api/src/util.h
bool CompareFrames(const std::string &F1, const std::string &F2,
int numFrames, int width, int height);
+class TimeStamp
+{
+ public:
+ enum EventKind { EOP_PFSA_START=0, EOP_PFSA_END,
+ EOP_PFW_START, EOP_PFW_END,
+ EOP_RAN_START, EOP_RAN_END,
+ EO1_PFSA_START, EO1_PFSA_END,
+ EO1_PFW_START, EO1_PFW_END,
+ EO2_PFSA_START, EO2_PFSA_END,
+ EO2_PFW_START, EO2_PFW_END,
+ NUM_EVENTS };
+ struct Entry
+ {
+ unsigned long long frame_idx;
+ unsigned long long timestamp[EventKind::NUM_EVENTS];
+ };
+
+
+ TimeStamp(const std::string& file, int num_entries);
+ ~TimeStamp();
+ void Update(int frame_idx, EventKind k);
+
+ private:
+ Entry* entries_m;
+ const int num_entries_m;
+ const std::string file_m;
+};
+
+
+void RecordEvent(int frame_idx, TimeStamp::EventKind k);
+
} // namespace tidl