1. Replaced lib_clock64() with lib_clock_read(). 1.2.0.0.iter2
authorJianzhong Xu <xuj@ti.com>
Thu, 24 Mar 2016 20:07:22 +0000 (16:07 -0400)
committerJianzhong Xu <xuj@ti.com>
Thu, 24 Mar 2016 20:07:22 +0000 (16:07 -0400)
2. Replaced TSCL=0 with lib_clock_enable().
3. Minor change to medium memory model.

18 files changed:
examples/dsponly/common/ticblas_config.c
src/ti/linalg/blis/config/c66x/bli_kernel.h
src/ti/linalg/blis/frame/3/gemm/bli_gemm_int.c
src/ti/linalg/blis/frame/3/gemm/bli_gemm_ker_var2.c
src/ti/linalg/blis/frame/3/herk/bli_herk_int.c
src/ti/linalg/blis/frame/3/herk/bli_herk_l_ker_var2.c
src/ti/linalg/blis/frame/3/herk/bli_herk_u_ker_var2.c
src/ti/linalg/blis/frame/3/trmm/bli_trmm_int.c
src/ti/linalg/blis/frame/3/trmm/bli_trmm_ll_ker_var2.c
src/ti/linalg/blis/frame/3/trmm/bli_trmm_lu_ker_var2.c
src/ti/linalg/blis/frame/3/trmm/bli_trmm_rl_ker_var2.c
src/ti/linalg/blis/frame/3/trmm/bli_trmm_ru_ker_var2.c
src/ti/linalg/blis/frame/3/trsm/bli_trsm_blk_var1b.c
src/ti/linalg/blis/frame/3/trsm/bli_trsm_int.c
src/ti/linalg/blis/frame/3/trsm/bli_trsm_rl_ker_var2.c
src/ti/linalg/blis/frame/3/trsm/bli_trsm_ru_ker_var2.c
src/ti/linalg/blis/frame/base/bli_dma.c
src/ti/linalg/blis/testsuite/input.general

index 02c0ebd13d7ac1356427f937bbe51cec0de966a1..bbded82a7308faa1ef844099136c2adf93f7ad9d 100644 (file)
@@ -36,7 +36,7 @@
 #ifdef SOC_C6678
 //#define BLAS_L2_BUF_SIZE   (220*1024UL)     /* 220KB SRAM is available in L2 for C6678 EVM */
 #define BLAS_L2_BUF_SIZE   (256*1024UL)     /* 256KB SRAM is available in L2 for C6678 EVM */
-#define BLAS_MSMC_BUF_SIZE (2*1024*1024UL)  /* reserve 2MB for BLAS */    
+#define BLAS_MSMC_BUF_SIZE (5*512*1024UL)   /* reserve 2.5MB for BLAS */    
 #define BLAS_L3_DDR_SIZE   (5120)              
 #else
 #  if SOC_K2H
index 59993769e15d6108ab17721dce9ea482dd410ea6..96cdbf6364b128c8eb94471334726f91da77c407 100755 (executable)
 #define BLIS_DEFAULT_3M_NC_Z           100
 
 #elif defined (MEM_MODEL_MEDIUM)
-
-#define BLIS_DEFAULT_MC_S              144
-#define BLIS_DEFAULT_KC_S              400 //320 good // 240 good // 428 error
+#define BLIS_DEFAULT_MC_S              128
+#define BLIS_DEFAULT_KC_S              440 
 #define BLIS_DEFAULT_NC_S              1224
 
-#define BLIS_DEFAULT_MC_D              140
+#define BLIS_DEFAULT_MC_D              120
 #define BLIS_DEFAULT_KC_D              220
 #define BLIS_DEFAULT_NC_D              1184
 
 #define BLIS_DEFAULT_KC_Z              178
 #define BLIS_DEFAULT_NC_Z              736
 
-#define BLIS_DEFAULT_4M_MC_C           140
-#define BLIS_DEFAULT_4M_KC_C           220
+#define BLIS_DEFAULT_4M_MC_C           128
+#define BLIS_DEFAULT_4M_KC_C           224
 #define BLIS_DEFAULT_4M_NC_C           1184
 
-#define BLIS_DEFAULT_4M_MC_Z           86
-#define BLIS_DEFAULT_4M_KC_Z           178
+#define BLIS_DEFAULT_4M_MC_Z           84
+#define BLIS_DEFAULT_4M_KC_Z           168
 #define BLIS_DEFAULT_4M_NC_Z           736
 
 #define BLIS_DEFAULT_3M_MC_C           88
-#define BLIS_DEFAULT_3M_KC_C           220
+#define BLIS_DEFAULT_3M_KC_C           200
 #define BLIS_DEFAULT_3M_NC_C           792
 
 #define BLIS_DEFAULT_3M_MC_Z           56
-#define BLIS_DEFAULT_3M_KC_Z           178
+#define BLIS_DEFAULT_3M_KC_Z           168
 #define BLIS_DEFAULT_3M_NC_Z           488
 
 #elif defined(MEM_MODEL_SMALL)
index af1ba8d8600fe8c28ff743b9ea117221b8762e54..9fe0e3c773da4217644fafdffc9fdff7b142c70c 100644 (file)
@@ -68,8 +68,8 @@ void bli_gemm_int( obj_t*  alpha,
        impl_t    i;
        FUNCPTR_T f;
 #if defined(BLIS_ENABLE_PROFILE)
-    volatile uint64_t counter_start;
-    volatile uint64_t counter_end;
+    uint64_t counter_start;
+    uint64_t counter_end;
     extern profile_data_t *bli_gemm_profile_data;
     dim_t m_var, k_var, n_var;
     dim_t index;
@@ -156,8 +156,8 @@ void bli_gemm_int( obj_t*  alpha,
        n_var = bli_obj_width( c_local );
 
 #if defined(BLIS_ENABLE_C66X_BUILD)
-       TSCL = 0;
-    counter_start = lib_clock64();
+    lib_clock_enable();
+    counter_start = lib_clock_read();
 #else
     counter_start = (uint64_t) (bli_clock()*1.2e9);
 #endif
@@ -170,7 +170,7 @@ void bli_gemm_int( obj_t*  alpha,
 
 #if defined(BLIS_ENABLE_PROFILE)
 #if defined(BLIS_ENABLE_C66X_BUILD)
-       counter_end = lib_clock64();
+       counter_end = lib_clock_read();
 #else
        counter_end = (uint64_t) (bli_clock()*1.2e9);
 #endif
index 43d2d05be5e6da4ddf99764bd45f77e4ac15b046..d96ae503a6ed82d43ed7e48cfef5d8cd8cdcfa61 100644 (file)
@@ -218,8 +218,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_gemm_profile_data; \
        /*
           Assumptions/assertions:
@@ -321,7 +321,7 @@ void PASTEMAC(ch,varname)( \
     /* initiate first c transfer */ \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
        n_cur = ( bli_is_not_edge_f( jr_thread_id, n_iter, n_left ) ? NR : n_left ); \
        if(cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
@@ -398,7 +398,7 @@ void PASTEMAC(ch,varname)( \
                /* Loop over the m dimension (MR rows at a time). */  \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_start_mr = lib_clock64();  \
+                       counter_start_mr = lib_clock_read();  \
                } \
                for ( i = ir_thread_id; i < m_iter; i += ir_num_threads )  \
                { \
@@ -438,7 +438,7 @@ void PASTEMAC(ch,varname)( \
                        /* Handle interior and edge cases separately. */  \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_ker = lib_clock64();  \
+                               counter_start_ker = lib_clock_read();  \
                        } \
                        if ( m_cur == MR && n_cur == NR )  \
                        { \
@@ -470,7 +470,7 @@ void PASTEMAC(ch,varname)( \
                        } \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_ker = lib_clock64();  \
+                               counter_end_ker = lib_clock_read();  \
                                bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                (counter_end_ker-counter_start_ker), 2*m_cur*k*n_cur); \
                        } \
@@ -484,7 +484,7 @@ void PASTEMAC(ch,varname)( \
                } \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_end_mr = lib_clock64();  \
+                       counter_end_mr = lib_clock_read();  \
                        bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND],\
                                                                        (counter_end_mr-counter_start_mr), (uint64_t) 2*m*k*n_cur); \
                } \
@@ -519,7 +519,7 @@ void PASTEMAC(ch,varname)( \
        } \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
                                                                (counter_end_nr-counter_start_nr), 2*m*k*n); \
        } \
@@ -728,7 +728,7 @@ void PASTEMAC(ch,varname)( \
     /* initiate first c transfer */ \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
 \
        for ( j = jr_thread_id; j < n_iter; j += jr_num_threads ) \
@@ -752,7 +752,7 @@ void PASTEMAC(ch,varname)( \
                /* Loop over the m dimension (MR rows at a time). */  \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_start_mr = lib_clock64();  \
+                       counter_start_mr = lib_clock_read();  \
                } \
                for ( i = ir_thread_id; i < m_iter; i += ir_num_threads )  \
                { \
@@ -791,7 +791,7 @@ void PASTEMAC(ch,varname)( \
                        /* Handle interior and edge cases separately. */  \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_ker = lib_clock64();  \
+                               counter_start_ker = lib_clock_read();  \
                        } \
                        if ( m_cur == MR && n_cur == NR )  \
                        { \
@@ -823,7 +823,7 @@ void PASTEMAC(ch,varname)( \
                        }  \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_ker = lib_clock64();  \
+                               counter_end_ker = lib_clock_read();  \
                                bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
                                                                                (counter_end_ker-counter_start_ker), 2*m_cur*k*n_cur); \
                        } \
@@ -835,14 +835,14 @@ void PASTEMAC(ch,varname)( \
                } \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_end_mr = lib_clock64();  \
+                       counter_end_mr = lib_clock_read();  \
                        bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                        (counter_end_mr-counter_start_mr), 2*m*k*n_cur); \
                } \
        } \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
                                                                (counter_end_nr-counter_start_nr), 2*m*k*n); \
        } \
index b548fef4d368bde329d71ce8dea48056ee685797..2f7a5f4d833242bb75e143bdd8e8c92e5175f9f2 100644 (file)
@@ -78,8 +78,8 @@ void bli_herk_int( obj_t*  alpha,
        bool_t    uplo;
        FUNCPTR_T f;
 #if defined(BLIS_ENABLE_PROFILE)
-    volatile uint64_t counter_start;
-    volatile uint64_t counter_end;
+    uint64_t counter_start;
+    uint64_t counter_end;
     extern profile_data_t *bli_herk_profile_data;
     dim_t m_var, k_var, n_var;
     dim_t index;
@@ -150,8 +150,8 @@ void bli_herk_int( obj_t*  alpha,
        n_var = bli_obj_width_after_trans( ah_local );
 
 #if defined(BLIS_ENABLE_C66X_BUILD)
-       TSCL = 0;
-    counter_start = lib_clock64();
+    lib_clock_enable();
+    counter_start = lib_clock_read();
 #else
     counter_start = (uint64_t) (bli_clock()*1.2e9);
 #endif
@@ -165,7 +165,7 @@ void bli_herk_int( obj_t*  alpha,
 #if defined(BLIS_ENABLE_PROFILE)
 
 #if defined(BLIS_ENABLE_C66X_BUILD)
-       counter_end = lib_clock64();
+       counter_end = lib_clock_read();
 #else // if not DSP
        counter_end = (uint64_t) (bli_clock()*1.2e9);
 #endif
index 2af25bbcf0c2f36d786b2479703b9fa622c738ac..0ffd2b4b6cd6ba1c831039b878c3e75bd3f16f97 100644 (file)
@@ -228,8 +228,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_herk_profile_data; \
 \
        /*
@@ -373,7 +373,7 @@ void PASTEMAC(ch,varname)( \
        diagoffc_j = diagoffc - (doff_t) jr_thread_id * NR; \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
        /*if ( diagoffc_j < 0 ) \
        { \
@@ -502,15 +502,15 @@ void PASTEMAC(ch,varname)( \
                }\
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_start_mr = lib_clock64();  \
+                       counter_start_mr = lib_clock_read();  \
                } \
                /* Interior loop over the m dimension (MR rows at a time). */ \
                for ( i = ir_thread_id; i < m_iter_new; i += ir_num_threads ) \
                { \
                        ctype* restrict a2; \
 \
-                       /*TSCL=0; \
-                       counter_start = TSCL;*/ \
+                       /*lib_clock_enable(); \
+                       counter_start = lib_clock_read();*/ \
                        /*a1  = a_cast + i * rstep_a; \
                        c11 = c1     + i * rstep_c;*/ \
                        a1  = a1_new + i * rstep_a; \
@@ -569,7 +569,7 @@ void PASTEMAC(ch,varname)( \
                        { \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                /* Invoke the gemm micro-kernel. */ \
                                gemm_ukr_cast( k, \
@@ -589,7 +589,7 @@ void PASTEMAC(ch,varname)( \
 \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                } \
@@ -598,7 +598,7 @@ void PASTEMAC(ch,varname)( \
                        { \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                /* Handle interior and edge cases separately. */ \
                                if ( m_cur == MR && n_cur == NR ) \
@@ -631,7 +631,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
                                                                                        (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                } \
@@ -639,7 +639,7 @@ void PASTEMAC(ch,varname)( \
                } \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_end_mr = lib_clock64();  \
+                       counter_end_mr = lib_clock_read();  \
                        bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                        (counter_end_mr-counter_start_mr), 2*mc_new*k*n_cur); \
                } \
@@ -680,7 +680,7 @@ void PASTEMAC(ch,varname)( \
        } \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
                                                                (counter_end_nr-counter_start_nr), 2*mc_new*k*n); \
        } \
index 12eafec36d962631133597d5823c890d48cfd13d..8cb6ae02962695233b7fdb8b1b8346c46523dc55 100644 (file)
@@ -219,8 +219,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_herk_profile_data; \
 \
        /*
@@ -356,7 +356,7 @@ void PASTEMAC(ch,varname)( \
 \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
 \
        if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
@@ -428,7 +428,7 @@ void PASTEMAC(ch,varname)( \
 \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_start_mr = lib_clock64();  \
+                       counter_start_mr = lib_clock_read();  \
                } \
 \
                for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \
@@ -485,7 +485,7 @@ void PASTEMAC(ch,varname)( \
 \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
 \
                                /* Invoke the gemm micro-kernel. */ \
@@ -506,7 +506,7 @@ void PASTEMAC(ch,varname)( \
 \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                    bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
                                                                                (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                } \
@@ -517,7 +517,7 @@ void PASTEMAC(ch,varname)( \
 \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
 \
                                /* Handle interior and edge cases separately. */ \
@@ -551,7 +551,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
                                                                                        (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                } \
@@ -559,7 +559,7 @@ void PASTEMAC(ch,varname)( \
                } \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_end_mr = lib_clock64();  \
+                       counter_end_mr = lib_clock_read();  \
                        bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                        (counter_end_mr-counter_start_mr), 2*m*k*n_cur); \
                } \
@@ -596,7 +596,7 @@ void PASTEMAC(ch,varname)( \
 \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND],\
                                                                (counter_end_nr-counter_start_nr), 2*m*k*n); \
        } \
index 71f38f9ed094affc089595d45f112145dba737cb..cf4a566f02d94e1728380aa521e4cb5784c055d1 100644 (file)
@@ -101,8 +101,8 @@ void bli_trmm_int( obj_t*  alpha,
        FUNCPTR_T f;
 
 #if defined(BLIS_ENABLE_PROFILE)
-    volatile uint64_t counter_start;
-    volatile uint64_t counter_end;
+    uint64_t counter_start;
+    uint64_t counter_end;
     extern profile_data_t *bli_trmm_profile_data;
     dim_t m_var, k_var, n_var;
     dim_t index;
@@ -187,8 +187,8 @@ void bli_trmm_int( obj_t*  alpha,
        bli_set_dim_with_side( side, m_var, n_var, k_var );
 
 #if defined(BLIS_ENABLE_C66X_BUILD)
-       TSCL = 0;
-    counter_start = lib_clock64();
+    lib_clock_enable();
+    counter_start = lib_clock_read();
 #else
     counter_start = (uint64_t) (bli_clock()*1.2e9);
 #endif
@@ -203,7 +203,7 @@ void bli_trmm_int( obj_t*  alpha,
 
 #if defined(BLIS_ENABLE_PROFILE)
 #if defined(BLIS_ENABLE_C66X_BUILD)
-       counter_end = lib_clock64();
+       counter_end = lib_clock_read();
 #else
        counter_end = (uint64_t) (bli_clock()*1.2e9);
 #endif
index 883ae2a71980718fd6a75e4a8fb2bc3f337991e0..0d288a99cf75370a8b2701c6c0a45122b93efb16 100644 (file)
@@ -225,8 +225,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_trmm_profile_data; \
        /*
           Assumptions/assertions:
@@ -381,7 +381,7 @@ void PASTEMAC(ch,varname)( \
 \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
 \
        n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
@@ -455,7 +455,7 @@ void PASTEMAC(ch,varname)( \
 \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_start_mr = lib_clock64();  \
+                       counter_start_mr = lib_clock_read();  \
                } \
 \
                /* Loop over the m dimension (MR rows at a time). */ \
@@ -548,7 +548,7 @@ void PASTEMAC(ch,varname)( \
                                    /* Handle interior and edge cases separately. */ \
                                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                        { \
-                                               counter_start_ker = lib_clock64();  \
+                                               counter_start_ker = lib_clock_read();  \
                                        } \
                                    if ( m_cur == MR && n_cur == NR ) \
                                    { \
@@ -584,7 +584,7 @@ void PASTEMAC(ch,varname)( \
                                    } \
                                    if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                    { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
                                                                                        (counter_end_ker-counter_start_ker), 2*k_a1011*m_cur*n_cur); \
                                    } \
@@ -640,7 +640,7 @@ void PASTEMAC(ch,varname)( \
                                    /* Handle interior and edge cases separately. */ \
                                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                        { \
-                                               counter_start_ker = lib_clock64();  \
+                                               counter_start_ker = lib_clock_read();  \
                                        } \
                                    if ( m_cur == MR && n_cur == NR ) \
                                    { \
@@ -671,7 +671,7 @@ void PASTEMAC(ch,varname)( \
                                        } \
                                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                        { \
-                                               counter_end_ker = lib_clock64();  \
+                                               counter_end_ker = lib_clock_read();  \
                                                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                                (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                        } \
@@ -687,7 +687,7 @@ void PASTEMAC(ch,varname)( \
 \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_end_mr = lib_clock64();  \
+                       counter_end_mr = lib_clock_read();  \
                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND],\
                                                                        (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
                } \
@@ -727,7 +727,7 @@ void PASTEMAC(ch,varname)( \
 \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND],\
                                                                (counter_end_nr-counter_start_nr), 2*k*m*n); \
        } \
index e5c36e417d8d6bf4ce641549c3e5d72c9073f76e..24c65d79e20253f12e5e1da6143cb3ebab6fc193 100644 (file)
@@ -226,8 +226,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_trmm_profile_data; \
 \
        /*
@@ -390,7 +390,7 @@ void PASTEMAC(ch,varname)( \
 \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
 \
        n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
@@ -464,7 +464,7 @@ void PASTEMAC(ch,varname)( \
 \
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_start_mr = lib_clock64();  \
+                       counter_start_mr = lib_clock_read();  \
                } \
 \
                /* Loop over the m dimension (MR rows at a time). */ \
@@ -548,7 +548,7 @@ void PASTEMAC(ch,varname)( \
 \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
 \
                                /* Handle interior and edge cases separately. */ \
@@ -586,7 +586,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker), 2*k_a1112*m_cur*n_cur); \
                                } \
@@ -641,7 +641,7 @@ void PASTEMAC(ch,varname)( \
 \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
 \
                                /* Handle interior and edge cases separately. */ \
@@ -675,7 +675,7 @@ void PASTEMAC(ch,varname)( \
 \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                } \
@@ -690,7 +690,7 @@ void PASTEMAC(ch,varname)( \
                } /*for ( i = 0; i < m_iter; ++i )*/\
                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                { \
-                       counter_end_mr = lib_clock64();  \
+                       counter_end_mr = lib_clock_read();  \
                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                        (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
                } \
@@ -731,7 +731,7 @@ void PASTEMAC(ch,varname)( \
 \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
                                                                (counter_end_nr-counter_start_nr), 2*k*m*n); \
        } \
index 1a102e40d6890459898860b55a22ba14ff31a9f0..d66e6e016001bebd556c86604f20120b27c35757 100644 (file)
@@ -231,8 +231,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_trmm_profile_data; \
 \
        /*
@@ -396,7 +396,7 @@ void PASTEMAC(ch,varname)( \
        /* Loop over the n dimension (NR columns at a time). */ \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
        /* Transfering MC(=m)xNR*/ \
        if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
@@ -491,7 +491,7 @@ void PASTEMAC(ch,varname)( \
                        /* Loop over the m dimension (MR rows at a time). */ \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_mr = lib_clock64();  \
+                               counter_start_mr = lib_clock_read();  \
                        } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
@@ -541,8 +541,7 @@ void PASTEMAC(ch,varname)( \
                                /* Handle interior and edge cases separately. */ \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       /*TSCL=0;*/ \
-                                       counter_start_ker = TSCL;  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
@@ -578,7 +577,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker),2*k_b1121*m_cur*n_cur); \
                                } \
@@ -589,7 +588,7 @@ void PASTEMAC(ch,varname)( \
                        } \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_mr = TSCL;  \
+                               counter_end_mr = lib_clock_read();  \
                                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                                (counter_end_mr-counter_start_mr), 2*k_b1121*m*n_cur); \
                        } \
@@ -608,7 +607,7 @@ void PASTEMAC(ch,varname)( \
                        /* Loop over the m dimension (MR rows at a time). */ \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_mr = lib_clock64();  \
+                               counter_start_mr = lib_clock_read();  \
                        } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
@@ -654,7 +653,7 @@ void PASTEMAC(ch,varname)( \
                                /* Handle interior and edge cases separately. */ \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
@@ -685,7 +684,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
                                                                                        (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                        /*printf("gemm %d %d %d %ld\n", MR, NR, k, (counter_end_ker-counter_start_ker));*/ \
@@ -697,7 +696,7 @@ void PASTEMAC(ch,varname)( \
                        } /*for i*/\
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_mr = lib_clock64();  \
+                               counter_end_mr = lib_clock_read();  \
                                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                                (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
                        } \
@@ -739,7 +738,7 @@ void PASTEMAC(ch,varname)( \
        } \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
                                                                (counter_end_nr-counter_start_nr), 2*k*m*n); \
        } \
index 8d23b56e30ceaefa99c3b2d58cc11f52322c58aa..17cf468cf979cdbdfcf8a247cfd4326e41b9bc25 100644 (file)
@@ -225,8 +225,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_trmm_profile_data; \
 \
        /*
@@ -392,7 +392,7 @@ void PASTEMAC(ch,varname)( \
        /* Loop over the n dimension (NR columns at a time). */ \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
        /* Transfering MC(=m)xNR*/ \
        if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
@@ -486,7 +486,7 @@ void PASTEMAC(ch,varname)( \
                        /* Loop over the m dimension (MR rows at a time). */ \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_mr = lib_clock64();  \
+                               counter_start_mr = lib_clock_read();  \
                        } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
@@ -537,7 +537,7 @@ void PASTEMAC(ch,varname)( \
                                /* Handle interior and edge cases separately. */ \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
@@ -573,7 +573,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker), 2*k_b0111*m_cur*n_cur); \
                                } \
@@ -584,7 +584,7 @@ void PASTEMAC(ch,varname)( \
                        } \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_mr = lib_clock64();  \
+                               counter_end_mr = lib_clock_read();  \
                                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                                (counter_end_mr-counter_start_mr), 2*k_b0111*m*n_cur); \
                        } \
@@ -603,7 +603,7 @@ void PASTEMAC(ch,varname)( \
                        /* Loop over the m dimension (MR rows at a time). */ \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_mr = lib_clock64();  \
+                               counter_start_mr = lib_clock_read();  \
                        } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
@@ -650,7 +650,7 @@ void PASTEMAC(ch,varname)( \
                                /* Handle interior and edge cases separately. */ \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
@@ -681,7 +681,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
                                                                                        (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                } \
@@ -692,7 +692,7 @@ void PASTEMAC(ch,varname)( \
                        } \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_mr = lib_clock64();  \
+                               counter_end_mr = lib_clock_read();  \
                                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                                (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
                        } \
@@ -735,7 +735,7 @@ void PASTEMAC(ch,varname)( \
        } \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
                                                                (counter_end_nr-counter_start_nr), 2*k*m*n); \
        } \
index 9dc5bead6acd9ebaa9a5f334fd8c1d229e3e2352..c6154d12a717428956ad12bbc5812e3510643566 100644 (file)
@@ -60,8 +60,10 @@ void bli_trsm_blk_var1b( obj_t*  a,
 #ifdef BLIS_ENABLE_C66X_EDMA
        dim_t b_alg_next;
 #endif
-        volatile int counter_start;
-           volatile int counter_end;
+#ifdef BLIS_ENABLE_CYCLE_COUNT
+    uint64_t counter_start;
+    uint64_t counter_end;
+#endif
 //     printf("blk_var1b\n");
 
     // Initialize object for packing B.
@@ -210,8 +212,10 @@ void bli_trsm_blk_var1b( obj_t*  a,
 #endif
 
                // Perform trsm subproblem.
-               //TSCL = 0;
-               //counter_start = TSCL;
+#ifdef BLIS_ENABLE_CYCLE_COUNT
+               lib_clock_enable();
+               counter_start = lib_clock_read();
+#endif
                bli_trsm_int( &BLIS_ONE,
                              a1_pack,
                              b_pack,
@@ -220,12 +224,13 @@ void bli_trsm_blk_var1b( obj_t*  a,
                              cntl_sub_trsm( cntl ),
                       trsm_thread_sub_trsm( thread ) );
 
-               //counter_end = TSCL;
-               //if(lib_get_coreID()==0)
+#ifdef BLIS_ENABLE_CYCLE_COUNT
+               counter_end = lib_clock_read();
+               if(lib_get_coreID()==0)
                {
-                       //printf("%d\n", (counter_end-counter_start));
+                       printf("%d\n", (counter_end-counter_start));
                }
-
+#endif
 #ifdef BLIS_ENABLE_C66X_EDMA
                bli_obj_alias_to(c2, c1);
 #endif
index 050e962ae286542ae9d2601cb1c43fc5c3795c06..fb64717e9f575d8a378aa7ba807ef286b20a17fa 100644 (file)
@@ -101,8 +101,8 @@ void bli_trsm_int( obj_t*  alpha,
        FUNCPTR_T f;
 
 #if defined(BLIS_ENABLE_PROFILE)
-    volatile uint64_t counter_start;
-    volatile uint64_t counter_end;
+    uint64_t counter_start;
+    uint64_t counter_end;
     extern profile_data_t *bli_trsm_profile_data;
     dim_t m_var, k_var, n_var;
     dim_t index;
@@ -206,8 +206,8 @@ void bli_trsm_int( obj_t*  alpha,
        bli_set_dim_with_side( side, m_var, n_var, k_var );
 
 #if defined(BLIS_ENABLE_C66X_BUILD)
-       TSCL = 0;
-    counter_start = lib_clock64();
+    lib_clock_enable();
+    counter_start = lib_clock_read();
 #else
     counter_start = (uint64_t) (bli_clock()*1.2e9);
 #endif
@@ -222,7 +222,7 @@ void bli_trsm_int( obj_t*  alpha,
 
 #if defined(BLIS_ENABLE_PROFILE)
 #if defined(BLIS_ENABLE_C66X_BUILD)
-       counter_end = lib_clock64();
+       counter_end = lib_clock_read();
 #else
        counter_end = (uint64_t) (bli_clock()*1.2e9);
 #endif
index ac0b20eee6e1ddd354b7a940a0bcda476d2ee7dd..2bd31192b152f6d17db5615fbb561f1cb20ad3e1 100644 (file)
@@ -251,8 +251,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_trsm_profile_data; \
        /*
           Assumptions/assertions:
@@ -472,7 +472,7 @@ void PASTEMAC(ch,varname)( \
        n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
        \
        if(rs_c == 1) \
@@ -637,7 +637,7 @@ void PASTEMAC(ch,varname)( \
 \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_mr = lib_clock64();  \
+                               counter_start_mr = lib_clock_read();  \
                        } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
@@ -715,7 +715,7 @@ void PASTEMAC(ch,varname)( \
                                /* Handle interior and edge cases separately. */ \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
@@ -748,7 +748,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker), 2*k_b21*m_cur*n_cur); \
                                } \
@@ -773,7 +773,7 @@ void PASTEMAC(ch,varname)( \
                        } /*MR loop*/\
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_mr = lib_clock64();  \
+                               counter_end_mr = lib_clock_read();  \
                                bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                                (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
                        } \
@@ -790,7 +790,7 @@ void PASTEMAC(ch,varname)( \
                        /* Loop over the m dimension (MR rows at a time). */ \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_mr = lib_clock64();  \
+                               counter_start_mr = lib_clock_read();  \
                        } \
 \
                        for ( i = 0; i < m_iter; ++i ) \
@@ -850,7 +850,7 @@ void PASTEMAC(ch,varname)( \
                                /* Handle interior and edge cases separately. */ \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                if (BLIS_ENABLE_C66X_IDMA_KERVAR2 == 1) \
                                { \
@@ -916,7 +916,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                } \
@@ -929,7 +929,7 @@ void PASTEMAC(ch,varname)( \
                        \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_mr = lib_clock64();  \
+                               counter_end_mr = lib_clock_read();  \
                                bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                                (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
                        } \
@@ -997,7 +997,7 @@ void PASTEMAC(ch,varname)( \
        \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
                                                                (counter_end_nr-counter_start_nr), 2*k*m*n); \
        } \
index 14848dd60d9d1684d95b68de99c9d88a0d895013..9bcfbdba9fad469ac3eebda1d574ed43725bfe12 100644 (file)
@@ -239,8 +239,8 @@ void PASTEMAC(ch,varname)( \
        lib_emt_Handle emt_handle_c1 = NULL; \
 \
        /*For DSP timing*/ \
-       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
-       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
        extern profile_data_t *bli_trsm_profile_data; \
        /*
           Assumptions/assertions:
@@ -445,7 +445,7 @@ void PASTEMAC(ch,varname)( \
  \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_start_nr = lib_clock64();  \
+               counter_start_nr = lib_clock_read();  \
        } \
        n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
        if(rs_c == 1) \
@@ -599,7 +599,7 @@ void PASTEMAC(ch,varname)( \
                        /* Loop over the m dimension (MR rows at a time). */ \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_mr = lib_clock64();  \
+                               counter_start_mr = lib_clock_read();  \
                        } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
@@ -656,7 +656,7 @@ void PASTEMAC(ch,varname)( \
                                /* Handle interior and edge cases separately. */ \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
@@ -689,7 +689,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker), 2*k_b01*m_cur*n_cur); \
                                } \
@@ -703,7 +703,7 @@ void PASTEMAC(ch,varname)( \
                        } \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_mr = lib_clock64();  \
+                               counter_end_mr = lib_clock_read();  \
                                bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                                (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
                        } \
@@ -720,7 +720,7 @@ void PASTEMAC(ch,varname)( \
                        /* Loop over the m dimension (MR rows at a time). */ \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_start_mr = lib_clock64();  \
+                               counter_start_mr = lib_clock_read();  \
                        } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
@@ -770,7 +770,7 @@ void PASTEMAC(ch,varname)( \
                                /* Handle interior and edge cases separately. */ \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_start_ker = lib_clock64();  \
+                                       counter_start_ker = lib_clock_read();  \
                                } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
@@ -802,7 +802,7 @@ void PASTEMAC(ch,varname)( \
                                } \
                                if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                                { \
-                                       counter_end_ker = lib_clock64();  \
+                                       counter_end_ker = lib_clock_read();  \
                                        bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
                                                                                        (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
                                } \
@@ -814,7 +814,7 @@ void PASTEMAC(ch,varname)( \
                        } \
                        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
                        { \
-                               counter_end_mr = lib_clock64();  \
+                               counter_end_mr = lib_clock_read();  \
                                bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
                                                                                (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
                        } \
@@ -880,7 +880,7 @@ void PASTEMAC(ch,varname)( \
        } \
        if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
        { \
-               counter_end_nr = lib_clock64();  \
+               counter_end_nr = lib_clock_read();  \
                bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
                                                                (counter_end_nr-counter_start_nr), 2*k*m*n); \
        } \
index bf521d0267571b90bc4aef50c00e1023bb30ca27..f1ee815792b243b45024469d9e30cd50672219bb 100755 (executable)
@@ -485,8 +485,8 @@ void bli_dma_var1( obj_t*   a,
        void* ptr_dest;
 
 #ifdef BLIS_ENABLE_CYCLE_COUNT
-       volatile int counter_start;
-       volatile int counter_end;
+    uint64_t counter_start;
+    uint64_t counter_end;
 #endif
 
        m_root     = bli_obj_length( *(bli_obj_root( *a )) );
@@ -613,11 +613,9 @@ void bli_dma_var1( obj_t*   a,
        }
 
 #ifdef BLIS_ENABLE_CYCLE_COUNT
-       TSCL = 0;
-       counter_start = TSCL;
-#endif
-#ifdef BLIS_ENABLE_CYCLE_COUNT
-       counter_end = TSCL;
+       lib_clock_enable();
+       counter_start = lib_clock_read();
+       counter_end = lib_clock_read();
        printf("Cache invalidate %d \n", counter_end-counter_start);
 #endif
 
@@ -628,8 +626,8 @@ void bli_dma_var1( obj_t*   a,
                {
                        int status = -100;
 #ifdef BLIS_ENABLE_CYCLE_COUNT
-                       TSCL = 0;
-                       counter_start = TSCL;
+                       lib_clock_enable();
+                       counter_start = lib_clock_read();
 #endif
                        // The destination object contains the EDMA handle
                        status = lib_emt_copy2D2D       (       p->emt_handle,
@@ -641,7 +639,7 @@ void bli_dma_var1( obj_t*   a,
                                        ld_dest
                        );
 #ifdef BLIS_ENABLE_CYCLE_COUNT
-                       counter_end = TSCL;
+                       counter_end = lib_clock_read();
                        printf("DMA start %d \n", counter_end-counter_start);
 #endif
 
index 27e3bc1a6c803352a540e274dd7077f46bd33c02..25420bc7b5034fdabd0cf61ed6016f49a16d11e4 100644 (file)
@@ -17,12 +17,12 @@ c       # Vector storage scheme(s) to test:
         #   'r' = rowvec / unit stride; 'i' = rowvec / non-unit stride
 0       # Test all combinations of storage schemes?
 32      # General stride spacing (for cases when testing general stride)
-sdcz       # Datatype(s) to test:
+sdcz    # Datatype(s) to test:
         #   's' = single real; 'c' = single complex;
         #   'd' = double real; 'z' = double complex
-500    # Problem size: first to test
+500     # Problem size: first to test
 2500    # Problem size: maximum to test
-500    # Problem size: increment between experiments
+500     # Problem size: increment between experiments
         # Complex level-3 implementations
 0       #   3mh  ('1' = enable; '0' = disable)
 0       #   3m   ('1' = enable; '0' = disable)
@@ -32,5 +32,5 @@ sdcz       # Datatype(s) to test:
         #   '0' = disable error checking; '1' = full error checking
 i       # Reaction to test failure:
         #   'i' = ignore; 's' = sleep() and continue; 'a' = abort
-1       # Output results in matlab/octave format? ('1' = yes; '0' = no)
+0       # Output results in matlab/octave format? ('1' = yes; '0' = no)
 1       # Output results to stdout AND files? ('1' = yes; '0' = no)