]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - dense-linear-algebra-libraries/linalg.git/blobdiff - blis/frame/3/trmm/bli_trmm_ru_ker_var2.c
LINALG 1.2.0 iteration 1.
[dense-linear-algebra-libraries/linalg.git] / blis / frame / 3 / trmm / bli_trmm_ru_ker_var2.c
index ee0a25cafd7661de90513e4c0cff2e90c092eb10..8d23b56e30ceaefa99c3b2d58cc11f52322c58aa 100644 (file)
@@ -220,9 +220,14 @@ void PASTEMAC(ch,varname)( \
        ctype *cNew0, *cNew1, *cNew2, *cNewTemp; \
 \
     /*EDMA Declarations */ \
-       EdmaMgr_Handle edma_handle_b = NULL; \
-       EdmaMgr_Handle edma_handle_c0 = NULL; \
-       EdmaMgr_Handle edma_handle_c1 = NULL; \
+       lib_emt_Handle emt_handle_b = NULL; \
+       lib_emt_Handle emt_handle_c0 = NULL; \
+       lib_emt_Handle emt_handle_c1 = NULL; \
+\
+       /*For DSP timing*/ \
+       volatile uint64_t counter_start_ker, counter_start_nr, counter_start_mr; \
+       volatile uint64_t counter_end_ker, counter_end_nr, counter_end_mr; \
+       extern profile_data_t *bli_trmm_profile_data; \
 \
        /*
           Assumptions/assertions:
@@ -364,31 +369,35 @@ void PASTEMAC(ch,varname)( \
     cNew2 = bli_mem_buffer( &c2_L2_mem ); \
 \
        /*Acquiring an EDMA  handle from the pool*/ \
-       bli_dma_channel_acquire(&(edma_handle_b), CSL_chipReadDNUM()); \
-       if(edma_handle_b == NULL) \
+       bli_dma_channel_acquire(&(emt_handle_b), lib_get_coreID()); \
+       if(emt_handle_b == NULL) \
        { \
-               printf("ker_var2 Failed to alloc edma handle CoreID %d \n", CSL_chipReadDNUM()); \
+               printf("ker_var2 Failed to alloc edma handle CoreID %d \n", lib_get_coreID()); \
        } \
 \
     /*Acquiring an EDMA  handle from the pool*/ \
-    bli_dma_channel_acquire(&(edma_handle_c0), CSL_chipReadDNUM()); \
-    if(edma_handle_c0 == NULL) \
+    bli_dma_channel_acquire(&(emt_handle_c0), lib_get_coreID()); \
+    if(emt_handle_c0 == NULL) \
     { \
-           printf("ker_var2 Failed to alloc edma handle for C0 CoreID %d \n", CSL_chipReadDNUM()); \
+           printf("ker_var2 Failed to alloc edma handle for C0 CoreID %d \n", lib_get_coreID()); \
     } \
     /*Acquiring an EDMA  handle from the pool*/ \
-    bli_dma_channel_acquire(&(edma_handle_c1), CSL_chipReadDNUM()); \
-    if(edma_handle_c1 == NULL) \
+    bli_dma_channel_acquire(&(emt_handle_c1), lib_get_coreID()); \
+    if(emt_handle_c1 == NULL) \
     { \
-           printf("ker_var2 Failed to alloc edma handle for C1 CoreID %d \n", CSL_chipReadDNUM()); \
+           printf("ker_var2 Failed to alloc edma handle for C1 CoreID %d \n", lib_get_coreID()); \
     } \
 \
        n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
        /* Loop over the n dimension (NR columns at a time). */ \
+       if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+       { \
+               counter_start_nr = lib_clock64();  \
+       } \
        /* Transfering MC(=m)xNR*/ \
        if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
        { \
-               EdmaMgr_copy2D2DSep(edma_handle_c0, c1, \
+               lib_emt_copy2D2D(emt_handle_c0, c1, \
                                                cNew1, m*sizeof(ctype), \
                                                n_cur, cs_c*sizeof(ctype), cs_c11*sizeof(ctype)); \
        } \
@@ -430,14 +439,14 @@ void PASTEMAC(ch,varname)( \
                /* Initialize our next panel of B to be the current panel of B. */ \
                b2 = b1; \
 \
-               EdmaMgr_copy1D1D(edma_handle_b, b1, b1_L1, k_b0111*NR*sizeof(ctype)); \
+               lib_emt_copy1D1D(emt_handle_b, b1, b1_L1, k_b0111*NR*sizeof(ctype)); \
 \
-               EdmaMgr_wait(edma_handle_c0); \
+               lib_emt_wait(emt_handle_c0); \
                if(j < n_iter-1) /* no transfer for last iteration */ \
                { \
                        if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
                        { \
-                               EdmaMgr_copy2D2DSep(edma_handle_c0, c1+cstep_c, \
+                               lib_emt_copy2D2D(emt_handle_c0, c1+cstep_c, \
                                                                        cNew0, m*sizeof(ctype), \
                                                                        n_next, cs_c*sizeof(ctype), \
                                                                        cs_c11*sizeof(ctype)); \
@@ -475,6 +484,10 @@ void PASTEMAC(ch,varname)( \
                        bli_auxinfo_set_is_b( PACKNR * k_b0111, aux ); \
 \
                        /* Loop over the m dimension (MR rows at a time). */ \
+                       if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+                       { \
+                               counter_start_mr = lib_clock64();  \
+                       } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
                                if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
@@ -486,18 +499,18 @@ void PASTEMAC(ch,varname)( \
 \
                                if(i == 0) \
                                { \
-                                       idma1_setup(a2_L1, a1 + ( off_b0111 * PACKMR ) / off_scl, k_b0111*MR*sizeof(ctype), 0, 0, 7); \
+                                       lib_imt_copy(a1 + ( off_b0111 * PACKMR ) / off_scl, a2_L1, k_b0111*MR*sizeof(ctype)); \
                                } \
 \
                                /* Compute the addresses of the next panels of A and B. */ \
                                a2 = a1 + rstep_a; \
-                               while(!idma1_done()){;} \
+                               lib_imt_wait(); \
                                temp = a1_L1; \
                                a1_L1 = a2_L1; \
                                a2_L1 = temp; \
                                if(i == 0) \
                                { \
-                                       EdmaMgr_wait(edma_handle_b);\
+                                       lib_emt_wait(emt_handle_b);\
                                } \
 \
                                /*a1_i = a1_L1 + ( off_b0111 * PACKMR ) / off_scl;*/ \
@@ -513,7 +526,7 @@ void PASTEMAC(ch,varname)( \
                                else \
                                { \
                                        /*Start next panel*/ \
-                                       idma1_setup(a2_L1, a2 + ( off_b0111 * PACKMR ) / off_scl, k_b0111*MR*sizeof(ctype), 0, 0, 7); \
+                                       lib_imt_copy(a2 + ( off_b0111 * PACKMR ) / off_scl, a2_L1, k_b0111*MR*sizeof(ctype)); \
                                } \
 \
                                /* Save addresses of next panels of A and B to the auxinfo_t
@@ -522,6 +535,10 @@ void PASTEMAC(ch,varname)( \
                                bli_auxinfo_set_next_b( b2, aux ); \
 \
                                /* Handle interior and edge cases separately. */ \
+                               if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+                               { \
+                                       counter_start_ker = lib_clock64();  \
+                               } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
                                        /* Invoke the gemm micro-kernel. */ \
@@ -554,11 +571,23 @@ void PASTEMAC(ch,varname)( \
                                                                ct,  rs_ct, cs_ct, \
                                                                c11, rs_c11,  cs_c11 ); \
                                } \
+                               if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+                               { \
+                                       counter_end_ker = lib_clock64();  \
+                                       bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
+                                                                                       (counter_end_ker-counter_start_ker), 2*k_b0111*m_cur*n_cur); \
+                               } \
                                } \
 \
                                a1  += rstep_a; \
                                c11 += rstep_c11; \
                        } \
+                       if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+                       { \
+                               counter_end_mr = lib_clock64();  \
+                               bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
+                                                                               (counter_end_mr-counter_start_mr), 2*k_b0111*m*n_cur); \
+                       } \
                        } \
 \
                        b1 += ps_b_cur; \
@@ -572,6 +601,10 @@ void PASTEMAC(ch,varname)( \
                        bli_auxinfo_set_is_b( istep_b, aux ); \
 \
                        /* Loop over the m dimension (MR rows at a time). */ \
+                       if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+                       { \
+                               counter_start_mr = lib_clock64();  \
+                       } \
                        for ( i = 0; i < m_iter; ++i ) \
                        { \
                                if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
@@ -582,18 +615,18 @@ void PASTEMAC(ch,varname)( \
 \
                                if(i == 0) \
                                { \
-                                       idma1_setup(a2_L1, a1, k_b0111*MR*sizeof(ctype), 0, 0, 7); \
+                                       lib_imt_copy(a1, a2_L1, k_b0111*MR*sizeof(ctype)); \
                                } \
 \
                                /* Compute the addresses of the next panels of A and B. */ \
                    a2 = a1 + rstep_a; \
-                               while(!idma1_done()){;} \
+                               lib_imt_wait(); \
                        temp = a1_L1; \
                                a1_L1 = a2_L1; \
                                a2_L1 = temp; \
                                if(i == 0) \
                                { \
-                                       EdmaMgr_wait(edma_handle_b);\
+                                       lib_emt_wait(emt_handle_b);\
                                } \
 \
                                if ( bli_is_last_iter( i, m_iter, 0, 1 ) ) \
@@ -606,7 +639,7 @@ void PASTEMAC(ch,varname)( \
                                else \
                                { \
                        /*Start next panel*/ \
-                                       idma1_setup(a2_L1, a2, k_b0111*MR*sizeof(ctype), 0, 0, 7); \
+                                       lib_imt_copy(a2, a2_L1, k_b0111*MR*sizeof(ctype)); \
                                } \
 \
                                /* Save addresses of next panels of A and B to the auxinfo_t
@@ -615,6 +648,10 @@ void PASTEMAC(ch,varname)( \
                                bli_auxinfo_set_next_b( b2, aux ); \
 \
                                /* Handle interior and edge cases separately. */ \
+                               if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+                               { \
+                                       counter_start_ker = lib_clock64();  \
+                               } \
                                if ( m_cur == MR && n_cur == NR ) \
                                { \
                                        /* Invoke the gemm micro-kernel. */ \
@@ -642,11 +679,23 @@ void PASTEMAC(ch,varname)( \
                                                               ct,  rs_ct, cs_ct, \
                                                               c11, rs_c11,  cs_c11 ); \
                                } \
+                               if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+                               { \
+                                       counter_end_ker = lib_clock64();  \
+                                       bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
+                                                                                       (counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
+                               } \
                                } \
 \
                                a1  += rstep_a; \
                                c11 += rstep_c11; \
                        } \
+                       if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+                       { \
+                               counter_end_mr = lib_clock64();  \
+                               bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
+                                                                               (counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
+                       } \
                        } \
 \
                        b1 += cstep_b; \
@@ -659,12 +708,12 @@ void PASTEMAC(ch,varname)( \
                cNew1 = cNewTemp; \
                if(j != 0) /* wait for save c to complete; skip first iteration */ \
                { \
-                       EdmaMgr_wait(edma_handle_c1); \
+                       lib_emt_wait(emt_handle_c1); \
                } \
                /* save updated c*/ \
                if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
                { \
-                       EdmaMgr_copy2D2DSep(edma_handle_c1, cNew2, c1, m*sizeof(ctype),  \
+                       lib_emt_copy2D2D(emt_handle_c1, cNew2, c1, m*sizeof(ctype),  \
                                                                 n_cur, cs_c11*sizeof(ctype), cs_c*sizeof(ctype)); \
                }\
                else \
@@ -684,6 +733,12 @@ void PASTEMAC(ch,varname)( \
 \
                c1 += cstep_c; \
        } \
+       if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
+       { \
+               counter_end_nr = lib_clock64();  \
+               bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
+                                                               (counter_end_nr-counter_start_nr), 2*k*m*n); \
+       } \
 \
     bli_mem_release( &c2_L2_mem ); \
     bli_mem_release( &c1_L2_mem ); \
@@ -691,21 +746,21 @@ void PASTEMAC(ch,varname)( \
        bli_mem_release( &a2_L1_mem ); \
        bli_mem_release( &a1_L1_mem ); \
        bli_mem_release( &b1_L1_mem ); \
-       if ( edma_handle_b != NULL ) \
+       if ( emt_handle_b != NULL ) \
        { \
-               bli_dma_channel_release(edma_handle_b, CSL_chipReadDNUM()); \
-               edma_handle_b = NULL; \
+               bli_dma_channel_release(emt_handle_b, lib_get_coreID()); \
+               emt_handle_b = NULL; \
        } \
-    if ( edma_handle_c0 != NULL ) \
+    if ( emt_handle_c0 != NULL ) \
     { \
-               bli_dma_channel_release(edma_handle_c0, CSL_chipReadDNUM()); \
-               edma_handle_c0 = NULL; \
+               bli_dma_channel_release(emt_handle_c0, lib_get_coreID()); \
+               emt_handle_c0 = NULL; \
     } \
-       if ( edma_handle_c1 != NULL ) \
+       if ( emt_handle_c1 != NULL ) \
     { \
-        EdmaMgr_wait(edma_handle_c1); /* wait for save c to complete */ \
-               bli_dma_channel_release(edma_handle_c1, CSL_chipReadDNUM()); \
-               edma_handle_c1 = NULL; \
+        lib_emt_wait(emt_handle_c1); /* wait for save c to complete */ \
+               bli_dma_channel_release(emt_handle_c1, lib_get_coreID()); \
+               emt_handle_c1 = NULL; \
     } \
 /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2: a1", MR, k_b0111, a1, 1, MR, "%4.1f", "" );*/ \
 /*PASTEMAC(ch,fprintm)( stdout, "trmm_ru_ker_var2: b1", k_b0111, NR, b1_i, NR, 1, "%4.1f", "" );*/ \