Cleaned up examples.
authorJianzhong Xu <a0869574@ti.com>
Wed, 22 Apr 2015 13:06:36 +0000 (09:06 -0400)
committerJianzhong Xu <a0869574@ti.com>
Wed, 22 Apr 2015 13:06:36 +0000 (09:06 -0400)
12 files changed:
build/tar_files_list.txt
examples/dgemm_test/Makefile
examples/dgemm_test/dgemm_test.c
examples/dsyrk_test/Makefile [new file with mode: 0644]
examples/dsyrk_test/dsyrk_test.c [new file with mode: 0644]
examples/ludinv/main.c
examples/ztrmm_test/Makefile [new file with mode: 0644]
examples/ztrmm_test/ztrmm_test.c [new file with mode: 0644]
examples/ztrsm_test/Makefile [new file with mode: 0644]
examples/ztrsm_test/ztrsm_test.c [new file with mode: 0644]
readme.txt
tuning/Makefile

index d7796962320d44ced26949fb7dfe93b9a9133a2b..a3cb737b780a21ce79bd34d3120e444b5323817e 100644 (file)
@@ -4,14 +4,14 @@ readme.txt
 debian
 examples/make.inc
 examples/Makefile
-examples/eig
-examples/ludinv
 examples/matmpy
-examples/sgemm_tune
-examples/dgemm_tune
-examples/cgemm_tune
-examples/zgemm_tune
 examples/dgemm_test
+examples/dsyrk_test
+examples/ztrmm_test
+examples/ztrsm_test
+examples/eig
+examples/ludinv
+tuning
 blis/version
 blis/build
 blis/CHANGELOG
index b7c549218de63bce068ce99701202e152af83d5e..503045316cdc27517c55ab2d91c2f52b2452ccb0 100644 (file)
@@ -6,3 +6,7 @@ include ../make.inc
 $(EXE): dgemm_test.o
        $(CC) $(CFLAGS) dgemm_test.o $(BLASLIB) -o $@
 
+run: $(EXE)
+       export TI_CBLAS_OFFLOAD=000;./$(EXE);cp dgemm_time.dat dgemm_time_ARM.dat; cp dgemm_gflops.dat dgemm_gflops_ARM.dat;\
+       export TI_CBLAS_OFFLOAD=001;./$(EXE);cp dgemm_time.dat dgemm_time_DSP.dat; cp dgemm_gflops.dat dgemm_gflops_DSP.dat;\
+       export TI_CBLAS_OFFLOAD=002;./$(EXE);cp dgemm_time.dat dgemm_time_OPT.dat; cp dgemm_gflops.dat dgemm_gflops_OPT.dat;
\ No newline at end of file
index 35e56bc5f8d8ca38e7a6bd3e996fe1c17cce0907..88b6de21de918fe1a5e93e56dde390cb08d390c1 100644 (file)
@@ -1,5 +1,5 @@
 /******************************************************************************
- * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
+ * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -39,7 +39,7 @@ extern "C" {
 }
 #endif
 
-#define TUNING_START_SIZE_RECTAN_MATRIX 64
+#define TUNING_START_SIZE_RECTAN_MATRIX 128
 #define NUM_MATRIX_SIZE_TO_BENCHMARK 4
 #define HAS_MEMORY   1
 #define NO_MEMORY    0
@@ -82,15 +82,15 @@ int main()
     int M, N, K, m, n, k;
     int M_pre, N_pre, K_pre, M_start_size, N_start_size;
     float time_secs_arm, gflops_arm, time_secs_dsp, gflops_dsp, time_secs_opt, gflops_opt;
-    FILE *fp_time, *fp_flops;  
+    FILE *fp_time, *fp_gflops;  
   
     fp_time = fopen("dgemm_time.dat","w");
-    fp_flops = fopen("dgemm_flops.dat","w");
+    fp_gflops = fopen("dgemm_gflops.dat","w");
 
     srand(12345);
     
        /* setting up TI CBLAS during first call */
-       run_dgemm(100, 100, 100, &time_secs_arm, &gflops_arm);
+       run_dgemm(1000, 1000, 1000, &time_secs_arm, &gflops_arm);
        
     /* sweep M, K, and N */    
     for (M=TUNING_START_SIZE_RECTAN_MATRIX,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2) 
@@ -101,30 +101,21 @@ int main()
             {
                 printf("Running DGEMM for (M,N,K) = (%d,%d,%d).\n", M,N,K);
                                
-                               TI_CBLAS_L3_OFFLOAD = 0;
-                dgemm_err = run_dgemm(M, N, K, &time_secs_arm, &gflops_arm);
+                dgemm_err = run_dgemm(M, N, K, &time_secs, &gflops);
           
                 if(dgemm_err == -1) {  /* out of memory for DSP offloading */
                     printf("Out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
                 }
                 else {
-                               TI_CBLAS_L3_OFFLOAD = 1;
-                    dgemm_err = run_dgemm(M, N, K, &time_secs_dsp, &gflops_dsp);
-    
-                               TI_CBLAS_L3_OFFLOAD = 2;
-                    dgemm_err = run_dgemm(M, N, K, &time_secs_opt, &gflops_opt);
-
-                    fprintf(fp_time, "%6d\t%6d\t%6d\t%10.8e\t%10.8e\t%10.8e\n", 
-                                               M, N, K, time_secs_arm, time_secs_dsp, time_secs_opt);
-                    fprintf(fp_flops, "%6d\t%6d\t%6d\t%10.8e\t%10.8e\t%10.8e\n", 
-                            M, N, K, gflops_arm, gflops_dsp, gflops_opt);
+                    fprintf(fp_time,  "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, time_secs);
+                    fprintf(fp_gflops, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, gflops);
                 }
             }
         }
     }
         
     fclose(fp_time);
-    fclose(fp_flops);
+    fclose(fp_gflops);
     
     return 0;
 }
diff --git a/examples/dsyrk_test/Makefile b/examples/dsyrk_test/Makefile
new file mode 100644 (file)
index 0000000..cca9c9a
--- /dev/null
@@ -0,0 +1,12 @@
+
+EXE = dsyrk_test
+
+include ../make.inc
+
+$(EXE): dsyrk_test.o 
+       $(CC) $(CFLAGS) dsyrk_test.o $(BLASLIB) -o $@
+
+run: $(EXE)
+       export TI_CBLAS_OFFLOAD=000;./$(EXE);cp dsyrk_time.dat dsyrk_time_ARM.dat; cp dsyrk_gflops.dat dsyrk_gflops_ARM.dat;\
+       export TI_CBLAS_OFFLOAD=001;./$(EXE);cp dsyrk_time.dat dsyrk_time_DSP.dat; cp dsyrk_gflops.dat dsyrk_gflops_DSP.dat;\
+       export TI_CBLAS_OFFLOAD=002;./$(EXE);cp dsyrk_time.dat dsyrk_time_OPT.dat; cp dsyrk_gflops.dat dsyrk_gflops_OPT.dat;
\ No newline at end of file
diff --git a/examples/dsyrk_test/dsyrk_test.c b/examples/dsyrk_test/dsyrk_test.c
new file mode 100644 (file)
index 0000000..4b14824
--- /dev/null
@@ -0,0 +1,184 @@
+/******************************************************************************
+ * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions are met:
+ *       * Redistributions of source code must retain the above copyright
+ *         notice, this list of conditions and the following disclaimer.
+ *       * Redistributions in binary form must reproduce the above copyright
+ *         notice, this list of conditions and the following disclaimer in the
+ *         documentation and/or other materials provided with the distribution.
+ *       * Neither the name of Texas Instruments Incorporated nor the
+ *         names of its contributors may be used to endorse or promote products
+ *         derived from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *   THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+
+#include "cblas.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "cblas.h"
+#ifdef __cplusplus
+}
+#endif
+
+#define START_SIZE_RECTAN_MATRIX 128
+#define NUM_MATRIX_SIZE_TO_TEST  4
+#define NUM_TEST_RUN 5
+
+/*-----------------------------------------------------------------------------
+* Timing Setup
+*----------------------------------------------------------------------------*/
+struct timespec t0,t1;
+#define tick()  clock_gettime(CLOCK_MONOTONIC, &t0);
+#define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
+                        t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
+
+/*-----------------------------------------------------------------------------
+* Global Variables
+*----------------------------------------------------------------------------*/
+double alpha           = 0.7; 
+double beta            = 0.3;
+enum CBLAS_ORDER     order  = CblasColMajor; 
+enum CBLAS_TRANSPOSE transA = CblasNoTrans;
+enum CBLAS_UPLO      uplo   = CblasUpper;
+
+extern int TI_CBLAS_L3_OFFLOAD;
+/*-----------------------------------------------------------------------------
+* Prototypes
+*----------------------------------------------------------------------------*/
+int check_results(const double *C1, const double *C2, int N, int K);
+int run_dsyrk(int N, int K, float *time, float *gflops);
+
+/*-----------------------------------------------------------------------------
+* MAIN
+*----------------------------------------------------------------------------*/
+int main()
+{
+    int dsyrk_err;
+    int N, K, n, k;
+    float time_secs, gflops;
+    FILE *fp_time;  
+    FILE *fp_gflops;  
+  
+    fp_time = fopen("dsyrk_time.dat","w");
+    fp_gflops = fopen("dsyrk_gflops.dat","w");
+    
+    srand(12345);
+    
+    /* setting up TI CBLAS during first call */
+    run_dsyrk(1000, 1000, &time_secs, &gflops);
+    
+    /* sweep K, and N */    
+    for (N=START_SIZE_RECTAN_MATRIX,n=0; n<NUM_MATRIX_SIZE_TO_TEST; n++,N*=2) 
+    {
+        for (K=START_SIZE_RECTAN_MATRIX,k=0; k<NUM_MATRIX_SIZE_TO_TEST; k++,K*=2) 
+        {
+            printf("Running DSYRK for (N,K)=(%d,%d)\n", N,K);
+                
+            dsyrk_err = run_dsyrk(N, K, &time_secs, &gflops);
+          
+            if(dsyrk_err == -1) {  /* out of memory for DSP offloading */
+                printf("Out of memory for (N,K) = (%d,%d).\n", N,K);
+                fprintf(fp_time, "%6d\t%6d\t%10.8e\n", N, K, -1.0);
+                fprintf(fp_gflops, "%6d\t%6d\t%10.8e\n", N, K, -1.0);
+            }
+            else {
+                fprintf(fp_time, "%6d\t%6d\t%10.8e\n", N, K, time_secs);
+                fprintf(fp_gflops, "%6d\t%6d\t%10.8e\n", N, K, gflops);
+            }
+        }
+    }        
+    fclose(fp_time);
+    fclose(fp_gflops);
+    
+    return 0;
+}
+
+
+int run_dsyrk(int N, int K, float *time, float *gflops)
+{
+    long long i, size_A, size_C;
+    int    iter;
+    float  time_secs, total_time, total_gflops, gflops_iter;
+    float  operation_count = 1.0*(double)N*(double)N*(double)K;
+    int    err_code = 0;
+    
+    total_time= 0.0;
+       total_gflops = 0.0;
+    size_A = (long long)N*(long long)K;
+    size_C = (long long)N*(long long)N;
+    
+    if(  (size_A*sizeof(double)>(long long)0x0ffffffff) 
+       ||(size_C*sizeof(double)>(long long)0x0ffffffff) ) {
+        return (-1);
+    }
+    
+    for (iter = 0; iter < NUM_TEST_RUN; iter++)
+    {      
+      /*-------------------------------------------------------------------------
+      * Allocate space for the matrices.  The matrices that will be passed to 
+      * the DSP are allocated using device memory.  
+      *------------------------------------------------------------------------*/
+      double *A = (double *) __malloc_ddr(size_A*sizeof(double));
+      double *C = (double *) __malloc_ddr(size_C*sizeof(double));
+  
+      if (!A || !C)
+      {
+          printf("Could not allocate enough space for the arrays!");
+          if(A) __free_ddr(A);
+          if(C) __free_ddr(C);
+         
+          return (-1);
+      }
+  
+      /*-------------------------------------------------------------------------
+      * Initialize matrices 
+      *------------------------------------------------------------------------*/
+      for (i = 0; i < (long long)N*K; ++i) A[i] = (double)rand()/RAND_MAX;
+      for (i = 0; i < (long long)N*N; ++i) C[i] = (double)rand()/RAND_MAX;
+  
+      int lda = ((order == CblasColMajor && transA == CblasNoTrans) ||
+              (order == CblasRowMajor && transA == CblasTrans)) ? N : K;
+  
+      int ldc = N;
+  
+      fflush(stdout);
+      
+      /*------------------------------------------------------------------------
+      * Time dsyrk
+      *-----------------------------------------------------------------------*/
+      tick();
+      cblas_dsyrk(order,uplo,transA,N,K,alpha,A,lda,beta,C,ldc);
+      time_secs = tock();
+      total_time += time_secs;
+      gflops_iter = operation_count/time_secs*1e-9;
+      total_gflops += gflops_iter;
+  
+      __free_ddr(A);
+      __free_ddr(C);
+    }
+    
+    *gflops = total_gflops / (double)NUM_TEST_RUN;
+    *time   = total_time / (double)NUM_TEST_RUN;
+    
+    return err_code;
+}
+
index e99932856ff483f61f274a1abf3d2118e0acc340..7660bdcfc794da0e6a9d2a3c77dd9eb92758ad69 100644 (file)
@@ -82,7 +82,7 @@ int main(int argc, char *argv[]) {
     if(argc == 1) { /* no command line arguments, use default */
         num_test = 3;
         n_min    = 1000;
-        n_inc    = 1000;
+        n_inc    = 500;
         num_run  = 1;
         print_data = 0;
     }
diff --git a/examples/ztrmm_test/Makefile b/examples/ztrmm_test/Makefile
new file mode 100644 (file)
index 0000000..c96afc1
--- /dev/null
@@ -0,0 +1,12 @@
+
+EXE = ztrmm_test
+
+include ../make.inc
+
+$(EXE): ztrmm_test.o 
+       $(CC) $(CFLAGS) ztrmm_test.o $(BLASLIB) -o $@
+
+run: $(EXE)
+       export TI_CBLAS_OFFLOAD=000;./$(EXE);cp ztrmm_time.dat ztrmm_time_ARM.dat; cp ztrmm_gflops.dat ztrmm_gflops_ARM.dat;\
+       export TI_CBLAS_OFFLOAD=001;./$(EXE);cp ztrmm_time.dat ztrmm_time_DSP.dat; cp ztrmm_gflops.dat ztrmm_gflops_DSP.dat;\
+       export TI_CBLAS_OFFLOAD=002;./$(EXE);cp ztrmm_time.dat ztrmm_time_OPT.dat; cp ztrmm_gflops.dat ztrmm_gflops_OPT.dat;
\ No newline at end of file
diff --git a/examples/ztrmm_test/ztrmm_test.c b/examples/ztrmm_test/ztrmm_test.c
new file mode 100644 (file)
index 0000000..0728573
--- /dev/null
@@ -0,0 +1,194 @@
+/******************************************************************************
+ * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions are met:
+ *       * Redistributions of source code must retain the above copyright
+ *         notice, this list of conditions and the following disclaimer.
+ *       * Redistributions in binary form must reproduce the above copyright
+ *         notice, this list of conditions and the following disclaimer in the
+ *         documentation and/or other materials provided with the distribution.
+ *       * Neither the name of Texas Instruments Incorporated nor the
+ *         names of its contributors may be used to endorse or promote products
+ *         derived from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *   THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include <complex.h>
+
+#include "cblas.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "cblas.h"
+#ifdef __cplusplus
+}
+#endif
+
+#define START_SIZE_RECTAN_MATRIX 128
+#define NUM_MATRIX_SIZE_TO_TEST  4
+#define NUM_TEST_RUN 5
+
+/*-----------------------------------------------------------------------------
+* Timing Setup
+*----------------------------------------------------------------------------*/
+struct timespec t0,t1;
+#define tick()  clock_gettime(CLOCK_MONOTONIC, &t0);
+#define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
+                        t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
+
+/*-----------------------------------------------------------------------------
+* Global Variables
+*----------------------------------------------------------------------------*/
+double complex alpha = 0.7 - 0.3*I; 
+enum CBLAS_ORDER     order  = CblasColMajor; 
+enum CBLAS_TRANSPOSE transA = CblasNoTrans;
+enum CBLAS_SIDE side  = CblasLeft;
+enum CBLAS_UPLO uplo  = CblasUpper;
+enum CBLAS_DIAG diag  = CblasUnit;
+
+extern int TI_CBLAS_L3_OFFLOAD;
+
+/*-----------------------------------------------------------------------------
+* Prototypes
+*----------------------------------------------------------------------------*/
+int check_results(const double complex *C1, const double complex *C2, int M, int N);
+int run_ztrmm(int M, int N, float *time, float *gflops);
+
+/*-----------------------------------------------------------------------------
+* MAIN
+*----------------------------------------------------------------------------*/
+int main()
+{
+    int ztrmm_err;
+    int M, N, m, n;
+    float time_secs, gflops;
+    FILE *fp_time, *fp_gflops;  
+  
+    fp_time = fopen("ztrmm_time.dat","w");
+    fp_gflops = fopen("ztrmm_gflops.dat","w");
+    
+    srand(12345);
+    
+    /* setting up TI CBLAS during first call */
+    run_ztrmm(1000, 1000, &time_secs, &gflops);
+    
+    /* sweep M, and N */    
+    for (M=START_SIZE_RECTAN_MATRIX,m=0; m<NUM_MATRIX_SIZE_TO_TEST; m++,M*=2) 
+    {
+        for (N=START_SIZE_RECTAN_MATRIX,n=0; n<NUM_MATRIX_SIZE_TO_TEST; n++,N*=2) 
+        {    
+            printf("Running ZTRMM for (M,N)=(%d,%d)\n", M,N);
+            ztrmm_err = run_ztrmm(M, N, &time_secs, &gflops);
+          
+            if(ztrmm_err == -1) {  /* out of memory for DSP offloading */
+                printf("Out of memory for (M,N) = (%d,%d).\n", M,N);
+                fprintf(fp_time, "%6d\t%6d\t%10.8e\n", M, N, -1.0);
+                fprintf(fp_gflops, "%6d\t%6d\t%10.8e\n", M, N, -1.0);
+            }
+            else {
+                fprintf(fp_time, "%6d\t%6d\t%10.8e\n", M, N, time_secs);
+                fprintf(fp_gflops, "%6d\t%6d\t%10.8e\n", M, N, gflops);
+            }
+        }
+    }
+       
+    fclose(fp_time);
+    fclose(fp_gflops);
+    
+    return 0;
+}
+
+int run_ztrmm(int M, int N, float *time, float *gflops)
+{
+    long long i, size_A, size_B;
+    int   iter;
+    float time_secs, total_time, total_gflops, gflops_iter;
+    float operation_count;
+    int   err_code = 0;
+
+    total_time= 0.0;
+       total_gflops = 0.0;
+       if(side == CblasLeft) {
+         size_A = (long long)M*(long long)M;
+      operation_count = 4.0*(double)M*(double)M*(double)N;    
+       }
+       else {
+         size_A = (long long)N*(long long)N;
+      operation_count = 4.0*(double)M*(double)N*(double)N;    
+       }
+       size_B = (long long)M*(long long)N;
+    if(  (size_A*sizeof(double complex)>(long long)0x0ffffffff) 
+       ||(size_B*sizeof(double complex)>(long long)0x0ffffffff) ) {
+        return (-1);
+    }
+       
+    for (iter = 0; iter < NUM_TEST_RUN; iter++)
+    {      
+      /*-------------------------------------------------------------------------
+      * Allocate space for the matrices.  The matrices that will be passed to 
+      * the DSP are allocated using device memory.  
+      *------------------------------------------------------------------------*/
+      double complex *A = (double complex *) __malloc_ddr(size_A*sizeof(double complex));
+      double complex *B = (double complex *) __malloc_ddr(size_B*sizeof(double complex));
+  
+      if (!A || !B)
+      {
+          printf("Could not allocate enough space for the arrays!");
+          if(A) __free_ddr(A);
+          if(B) __free_ddr(B);
+          
+          return (-1);
+      }
+  
+      /*-------------------------------------------------------------------------
+      * Initialize matrices 
+      *------------------------------------------------------------------------*/
+      int lda = (side == CblasLeft) ? M : N;  
+      int ldb = M;  
+      for (i = 0; i < size_A; ++i) 
+         {
+          A[i] = (double)rand()/RAND_MAX + (double)rand()/RAND_MAX * I;
+         }
+
+      for (i = 0; i < size_B; ++i)
+         {
+          B[i] = (double)rand()/RAND_MAX + (double)rand()/RAND_MAX * I;
+      }
+    
+      /*------------------------------------------------------------------------
+      * Time ztrmm
+      *-----------------------------------------------------------------------*/     
+      tick();
+      cblas_ztrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,B,ldb);
+      time_secs = tock();
+      total_time += time_secs;
+      gflops_iter = operation_count/time_secs*1e-9;
+      total_gflops += gflops_iter;
+  
+      __free_ddr(A);
+      __free_ddr(B);
+    }
+    
+    *gflops = total_gflops / (double)NUM_TEST_RUN;
+    *time   = total_time / (double)NUM_TEST_RUN;
+    
+    return err_code;
+}
+
+
diff --git a/examples/ztrsm_test/Makefile b/examples/ztrsm_test/Makefile
new file mode 100644 (file)
index 0000000..c5c9a44
--- /dev/null
@@ -0,0 +1,12 @@
+
+EXE = ztrsm_test
+
+include ../make.inc
+
+$(EXE): ztrsm_test.o 
+       $(CC) $(CFLAGS) ztrsm_test.o $(BLASLIB) -o $@
+
+run: $(EXE)
+       export TI_CBLAS_OFFLOAD=000;./$(EXE);cp ztrsm_time.dat ztrsm_time_ARM.dat; cp ztrsm_gflops.dat ztrsm_gflops_ARM.dat;\
+       export TI_CBLAS_OFFLOAD=001;./$(EXE);cp ztrsm_time.dat ztrsm_time_DSP.dat; cp ztrsm_gflops.dat ztrsm_gflops_DSP.dat;\
+       export TI_CBLAS_OFFLOAD=002;./$(EXE);cp ztrsm_time.dat ztrsm_time_OPT.dat; cp ztrsm_gflops.dat ztrsm_gflops_OPT.dat;
\ No newline at end of file
diff --git a/examples/ztrsm_test/ztrsm_test.c b/examples/ztrsm_test/ztrsm_test.c
new file mode 100644 (file)
index 0000000..d4192b5
--- /dev/null
@@ -0,0 +1,202 @@
+/******************************************************************************
+ * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions are met:
+ *       * Redistributions of source code must retain the above copyright
+ *         notice, this list of conditions and the following disclaimer.
+ *       * Redistributions in binary form must reproduce the above copyright
+ *         notice, this list of conditions and the following disclaimer in the
+ *         documentation and/or other materials provided with the distribution.
+ *       * Neither the name of Texas Instruments Incorporated nor the
+ *         names of its contributors may be used to endorse or promote products
+ *         derived from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ *   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ *   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *   THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <time.h>
+#include <complex.h>
+
+#include "cblas.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "cblas.h"
+#ifdef __cplusplus
+}
+#endif
+
+#define START_SIZE_RECTAN_MATRIX 128
+#define NUM_MATRIX_SIZE_TO_TEST  4
+#define NUM_TEST_RUN 5
+
+/*-----------------------------------------------------------------------------
+* Timing Setup
+*----------------------------------------------------------------------------*/
+struct timespec t0,t1;
+#define tick()  clock_gettime(CLOCK_MONOTONIC, &t0);
+#define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
+                        t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
+
+/*-----------------------------------------------------------------------------
+* Global Variables
+*----------------------------------------------------------------------------*/
+double complex alpha = 0.7 - 0.3*I; 
+enum CBLAS_ORDER     order  = CblasColMajor; 
+enum CBLAS_TRANSPOSE transA = CblasNoTrans;
+enum CBLAS_SIDE side  = CblasLeft;
+enum CBLAS_UPLO uplo  = CblasUpper;
+enum CBLAS_DIAG diag  = CblasUnit;
+
+extern int TI_CBLAS_L3_OFFLOAD;
+
+/*-----------------------------------------------------------------------------
+* Prototypes
+*----------------------------------------------------------------------------*/
+int check_results(const double complex *C1, const double complex *C2, int M, int N);
+int run_ztrsm(int M, int N, float *time, float *gflops);
+
+/*-----------------------------------------------------------------------------
+* MAIN
+*----------------------------------------------------------------------------*/
+int main()
+{
+    int ztrsm_err;
+    int M, N, m, n;
+    float time_secs, gflops;
+    FILE *fp_time, *fp_gflops;  
+  
+    fp_time = fopen("ztrsm_time.dat","w");
+    fp_gflops = fopen("ztrsm_gflops.dat","w");
+    
+    srand(12345);
+    
+    /* setting up TI CBLAS during first call */
+    run_ztrsm(1000, 1000, &time_secs, &gflops);
+    
+    /* sweep M, and N */    
+    for (M=START_SIZE_RECTAN_MATRIX,m=0; m<NUM_MATRIX_SIZE_TO_TEST; m++,M*=2) 
+    {
+        for (N=START_SIZE_RECTAN_MATRIX,n=0; n<NUM_MATRIX_SIZE_TO_TEST; n++,N*=2) 
+        {    
+            printf("Running ZTRSM for (M,N)=(%d,%d)\n", M,N);
+            ztrsm_err = run_ztrsm(M, N, &time_secs, &gflops);
+          
+            if(ztrsm_err == -1) {  /* out of memory for DSP offloading */
+                printf("Out of memory for (M,N) = (%d,%d).\n", M,N);
+                fprintf(fp_time, "%6d\t%6d\t%10.8e\n", M, N, -1.0);
+                fprintf(fp_gflops, "%6d\t%6d\t%10.8e\n", M, N, -1.0);
+            }
+            else {
+                fprintf(fp_time, "%6d\t%6d\t%10.8e\n", M, N, time_secs);
+                fprintf(fp_gflops, "%6d\t%6d\t%10.8e\n", M, N, gflops);
+            }
+        }
+    }
+       
+    fclose(fp_time);
+    fclose(fp_gflops);
+    
+    return 0;
+}
+
+int run_ztrsm(int M, int N, float *time, float *gflops)
+{
+    long long i, size_A, size_B;
+    int   iter, j, k;
+    float time_secs, total_time, total_gflops, gflops_iter;
+    float operation_count;
+    int   err_code = 0;
+    
+    total_time= 0.0;
+       total_gflops = 0.0;
+       if(side == CblasLeft) {
+         size_A = (long long)M*(long long)M;
+      operation_count = 4.0*(double)M*(double)M*(double)N;    
+       }
+       else {
+         size_A = (long long)N*(long long)N;
+      operation_count = 4.0*(double)M*(double)N*(double)N;    
+       }
+       size_B = (long long)M*(long long)N;
+    if(  (size_A*sizeof(double complex)>(long long)0x0ffffffff) 
+       ||(size_B*sizeof(double complex)>(long long)0x0ffffffff) ) {
+        return (-1);
+    }
+       
+    for (iter = 0; iter < NUM_TEST_RUN; iter++)
+    {      
+      /*-------------------------------------------------------------------------
+      * Allocate space for the matrices.  The matrices that will be passed to 
+      * the DSP are allocated using device memory.  
+      *------------------------------------------------------------------------*/
+      double complex *A = (double complex *) __malloc_ddr(size_A*sizeof(double complex));
+      double complex *B = (double complex *) __malloc_ddr(size_B*sizeof(double complex));
+  
+      if (!A || !B)
+      {
+          printf("Could not allocate enough space for the arrays!");
+          if(A) __free_ddr(A);
+          if(B) __free_ddr(B);
+          
+          return (-1);
+      }
+  
+      /*-------------------------------------------------------------------------
+      * Initialize matrices 
+      *------------------------------------------------------------------------*/
+      int lda = (side == CblasLeft) ? M : N;  
+      int ldb = M;  
+         for(j=0;j<lda;j++)
+         {
+             for(k=0;k<lda;k++)
+                 {
+                     if (j==k)
+                             A[j*lda+k] = 1.0+j + 0.0*I;
+              else if (j<k)
+                             A[j*lda+k] = 0.0 + 0.0*I;
+                         else
+                               A[j*lda+k] = (double)rand()/RAND_MAX + (double)rand()/RAND_MAX * I;
+                 }
+         }
+
+      for (i = 0; i < size_B; ++i)
+         {
+          B[i] = (double)rand()/RAND_MAX + (double)rand()/RAND_MAX * I;
+      }          
+    
+      /*------------------------------------------------------------------------
+      * Time ztrsm
+      *-----------------------------------------------------------------------*/     
+      tick();
+      cblas_ztrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,B,ldb);
+      time_secs = tock();
+      total_time += time_secs;
+      gflops_iter = operation_count/time_secs*1e-9;
+      total_gflops += gflops_iter;
+  
+      __free_ddr(A);
+      __free_ddr(B);
+    }
+    
+    *gflops = total_gflops / (double)NUM_TEST_RUN;
+    *time   = total_time / (double)NUM_TEST_RUN;
+    
+    return err_code;
+}
+
+
index 8a480382755ec6a7648a53e34718dbd9c5ecae05..5364fc33ae71fbce82871658664e66b0c7343cb8 100644 (file)
@@ -1,35 +1 @@
-
-========== Run applications with LINALG libraries: ==========
-1. include following headers files located at /usr/include after MCSDK-HPC installation:
-   - BLAS:   cblas.h
-   - LAPACK: f2c.h, blaswrap.h, clapack.h 
-   - Note: fc2.h has a complex type which is different from the complex type in
-           C99 complex.h. If f2c.h is included, C99 complex.h should not be used. 
-2. link following LINALG libraries located at /usr/lib after MCSDK-HPC installation:
-   - BLAS:   libblis.a, libcblas_armplusdsp.a
-   - LAPACK: libcblaswr.a, liblapack.a, libf2c.a
-   - Note: cblas calls are not thread safe
-3. setup environment variables:
-   - BLIS_IC_NT for number of ARM threads: 1 through 4     
-   - TI_CBLAS_OFFLOAD for BLAS offloading (level 1, 2, or 3) configuration:
-          - set to xyz, where x,y,z correspond to level 1, level 2, and level 3 and
-         can take any of 3 values:
-              - 0: no offloading to DSP, ie. always running on ARM
-                  - 1: forced offloading to DSP, ie. always running on DSP
-                  - 2: conditional offloading to DSP based on matrix sizes
-          - example: TI_CBLAS_OFFLOAD=001 means level 1&2 functions will always run 
-                     on ARM, and level 3 functions will always run on DSP. 
-       - default offloading configuration if TI_CBLAS_OFFLOAD is not set: 
-              - 002 (level 1&2 no offloading, level 3 offloading based on sizes)                
-       - Note: in this release, conditional offloading (value 2) is not available 
-                  for level 1 and level 2. If this option is configured for level 1
-                          and level 2, functions will be offloaded to DSP. 
-   
-========== Rebuild LINALG libraries: ==========
-1. rebuild ARM without rebuilding DSP code: make ARMonly
-2. rebuild both DSP and ARM code: make ARMplusDSP
-
-========== Install LINALG libraries: ==========
-1. install libraries to /usr/lib: make install
-2. install libraries to another directory: make install DESTDIR=<directory name>
-
+For information about how to use LINALG library, please go to: http://processors.wiki.ti.com/index.php/MCSDK_HPC_3.x_Linear_Algebra_Library. 
\ No newline at end of file
index 5aad157bce26297c391bd5d96462237fc78a2e96..4de0d0bc31aa63937c042fe2bc76065de685e1ac 100644 (file)
@@ -14,7 +14,7 @@ tune:
          echo "=============== " $$dir " =================" ; \
          $(MAKE) -C $$dir tune; \
        done; \
-       mkdir ../ofld_tbls; find . -iname "ofld_tbl*.c" -exec cp {} ../ofld_tbls \;
+       mkdir ofld_tbls; find *_tune -name *ofld*.c -exec cp {} ofld_tbls \;
 
 clean:
        for dir in $(DIRS); do \