index 6c4c5c68165f7617ffec1ecbab625f0b6f4a909a..bc9bd319789c620421d3a59dfa2e5f3c0ad238eb 100644 (file)
@@ -44,20 +44,19 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
if (!ti_cblas_init_done) ti_cblas_init();
TI_CBLAS_DEBUG_PRINT("Intercepted call to %s\n", "cblas_ztrsm");
- TI_CBLAS_PROFILE_START();
-
+ TI_CBLAS_PROFILE_START();
/* Dynamic condtional offload to ARM */
- if ((TI_CBLAS_L3_OFFLOAD == TI_CBLAS_OFFLOAD_NONE) || ((TI_CBLAS_L3_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE) && (!ztrsm_offload_dsp(Order,Side,M,N)))) {
- TI_CBLAS_DEBUG_PRINT("Executing ARM %s\n", "cblas_ztrsm");
- __real_cblas_ztrsm(Order,Side,Uplo,TransA,Diag,M,N,alpha,A,lda,B,ldb);
- TI_CBLAS_PROFILE_REPORT(" Entire %s call (ARM) took %8.2f us\n","cblas_ztrsm", (float) clock_diff);
- return ;
- }
- /* End ARM offload */
+ if ((TI_CBLAS_L3_OFFLOAD == TI_CBLAS_OFFLOAD_NONE) || ((TI_CBLAS_L3_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE) && (!ztrsm_offload_dsp(Order,Side,M,N)))) {
+ TI_CBLAS_DEBUG_PRINT("Executing ARM %s\n", "cblas_ztrsm");
+ __real_cblas_ztrsm(Order,Side,Uplo,TransA,Diag,M,N,alpha,A,lda,B,ldb);
+ TI_CBLAS_PROFILE_REPORT(" Entire %s call (ARM) took %8.2f us\n","cblas_ztrsm", (float) clock_diff);
+ return ;
+ }
+ /* End ARM offload */
/******************************************************************/
/* DSP offload WILL be done if control reaches here */
- TI_CBLAS_DEBUG_PRINT("Offloading to DSP %s\n", "cblas_ztrsm");
+ TI_CBLAS_DEBUG_PRINT("Offloading to DSP %s\n", "cblas_ztrsm");
/* Lookup kernel pointer from global table */
#ifdef __cplusplus
#endif
{
-
#ifdef __cplusplus
__K->setArg(0, Order);
#else
@@ -172,8 +170,8 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
void *msmc_ptr;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
- //Buffer buf_MSMC(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_MSMC_TI, MSMC_BUF_SIZE);
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
+ //Buffer buf_MSMC(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_MSMC_TI, MSMC_BUF_SIZE);
__K->setArg(12, buf_MSMC);
#else
@@ -203,7 +201,9 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
#endif
+
ti_cblas_mem_free(msmc_ptr);
+
ti_cblas_delete_kernel(__K);
TI_CBLAS_DEBUG_PRINT("Finished executing %s\n", "cblas_ztrsm");