]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - dense-linear-algebra-libraries/linalg.git/blobdiff - blasblisacc/src/ti_cblas_cblas_strmm.c
Added bli_mem_init() to level 3 facade functions. Use Perl script to generate wrapper...
[dense-linear-algebra-libraries/linalg.git] / blasblisacc / src / ti_cblas_cblas_strmm.c
index fb28e11717b2c9c1e9d6d5827434e9bd849b73b9..3f1244127fa2497ca782188213ad147d022dbaca 100644 (file)
@@ -44,20 +44,19 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
        if (!ti_cblas_init_done) ti_cblas_init();
        TI_CBLAS_DEBUG_PRINT("Intercepted call to %s\n", "cblas_strmm");
 
-        TI_CBLAS_PROFILE_START();
-
+    TI_CBLAS_PROFILE_START();
        /* Dynamic condtional offload to ARM */
-        if ((TI_CBLAS_L3_OFFLOAD == TI_CBLAS_OFFLOAD_NONE) || ((TI_CBLAS_L3_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE) && (!strmm_offload_dsp(Order,Side,M,N)))) { 
-               TI_CBLAS_DEBUG_PRINT("Executing ARM %s\n", "cblas_strmm"); 
-               __real_cblas_strmm(Order,Side,Uplo,TransA,Diag,M,N,alpha,A,lda,B,ldb); 
-               TI_CBLAS_PROFILE_REPORT("  Entire %s call (ARM) took %8.2f us\n","cblas_strmm", (float) clock_diff);
-               return ;
-        }
-        /* End ARM offload */
+    if ((TI_CBLAS_L3_OFFLOAD == TI_CBLAS_OFFLOAD_NONE) || ((TI_CBLAS_L3_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE) && (!strmm_offload_dsp(Order,Side,M,N)))) { 
+       TI_CBLAS_DEBUG_PRINT("Executing ARM %s\n", "cblas_strmm"); 
+       __real_cblas_strmm(Order,Side,Uplo,TransA,Diag,M,N,alpha,A,lda,B,ldb); 
+    TI_CBLAS_PROFILE_REPORT("  Entire %s call (ARM) took %8.2f us\n","cblas_strmm", (float) clock_diff);
+       return ;
+    }
+    /* End ARM offload */
 
        /******************************************************************/
        /* DSP offload WILL be done if control reaches here */
-               TI_CBLAS_DEBUG_PRINT("Offloading to DSP %s\n", "cblas_strmm"); 
+       TI_CBLAS_DEBUG_PRINT("Offloading to DSP %s\n", "cblas_strmm"); 
 
        /* Lookup kernel pointer from global table */
 #ifdef __cplusplus
@@ -74,7 +73,6 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
 #endif
        {
 
-
 #ifdef __cplusplus
                __K->setArg(0, Order);
 #else
@@ -167,8 +165,8 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
                void *msmc_ptr;
                msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
 #ifdef __cplusplus
-               //Buffer buf_MSMC(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_MSMC_TI, MSMC_BUF_SIZE);
                Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
+               //Buffer buf_MSMC(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_MSMC_TI, MSMC_BUF_SIZE);
                __K->setArg(12, buf_MSMC);
 
 #else
@@ -198,9 +196,10 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
                TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
 
 #endif
+
                ti_cblas_mem_free(msmc_ptr);
-               ti_cblas_delete_kernel(__K);
 
+               ti_cblas_delete_kernel(__K);
 
                TI_CBLAS_DEBUG_PRINT("Finished executing %s\n", "cblas_strmm");
                TI_CBLAS_PROFILE_REPORT("  Entire %s call (DSP) took %8.2f us\n","cblas_strmm", (float) clock_diff);