summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 2f9884f)
raw | patch | inline | side by side (parent: 2f9884f)
author | Jianzhong Xu <a0869574@ti.com> | |
Mon, 8 Jun 2015 20:17:34 +0000 (16:17 -0400) | ||
committer | Jianzhong Xu <a0869574@ti.com> | |
Mon, 8 Jun 2015 20:17:34 +0000 (16:17 -0400) |
17 files changed:
index 25fad388222d6ea59b563b80729ab5cfdf05494a..358eaefadb5e1e9ab9d3a0d7dcdac9e5e06f5bc2 100644 (file)
@@ -181,7 +181,7 @@ int run_cgemm_dsp_and_arm(int M, int K, int N, float *time_dsp, float *time_arm,
total_time_dsp = 0.0;
total_time_arm = 0.0;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
@@ -238,7 +238,7 @@ int run_cgemm_dsp_and_arm(int M, int K, int N, float *time_dsp, float *time_arm,
tick();
cblas_cgemm(order,transA,transB,M,N,K,&alpha,A,lda,B,ldb,&beta,Cdsp,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
@@ -253,7 +253,7 @@ int run_cgemm_dsp_and_arm(int M, int K, int N, float *time_dsp, float *time_arm,
tick();
cblas_cgemm(order,transA,transB,M,N,K,&alpha,A,lda,B,ldb,&beta,Carm,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index f802bbd4edb6c9c4be583694a607793e5ee6776b..851a47b8c90b322345c03612614f58e6dc89e82e 100644 (file)
--- a/tuning/common/tune_com.h
+++ b/tuning/common/tune_com.h
#define OFFLOAD 1
#define NO_OFFLOAD 0
-#define NUM_TEST_RUN 6 /* first run not counted */
+#define NUM_TEST_RUN 5
/*-----------------------------------------------------------------------------
* Timing Setup
index e02878ce816148016acfa42365bf6de8124dabb3..09b4b5bfb466c505229430ed5a28f2e8cefa9400 100644 (file)
return (-1);
}
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
tick();
cblas_csyrk(order,uplo,transA,N,K,&alpha,A,lda,&beta,Cdsp,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_csyrk(order,uplo,transA,N,K,&alpha,A,lda,&beta,Carm,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index c47c3c5ac6f90aa481fc2fb8ea9b1e39f3ccbc37..125b7fcf4f9c8a80d5336a3cfcd30886e550d43c 100644 (file)
size_A = (long long)N*(long long)N;
}
size_B = (long long)M*(long long)N;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
{
Bdsp[i] = Barm[i] = (float)rand()/RAND_MAX + (float)rand()/RAND_MAX * I;
}
-
+/*
if(M==8 && N==8) {
FILE *file_a = fopen("mat_a.dat","w");
FILE *file_b = fopen("mat_b.dat","w");
fclose(file_b);
fclose(file_c);
}
-
+*/
int lda = (side == CblasLeft) ? M : N;
int ldb = M;
tick();
cblas_ctrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_ctrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Barm,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index 1e5ad68a121ef75e48b1bf60459aab2243cb0986..abf02cfed8931345b15589ba4c8323b753fbafdd 100644 (file)
size_A = (long long)N*(long long)N;
}
size_B = (long long)M*(long long)N;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
{
Bdsp[i] = Barm[i] = (float)rand()/RAND_MAX + (float)rand()/RAND_MAX * I;
}
-
+/*
if(M==8 && N==8) {
FILE *file_a = fopen("mat_a.dat","w");
FILE *file_b = fopen("mat_b.dat","w");
fclose(file_b);
fclose(file_c);
}
-
+*/
/*============ BLAS tuning: running on DSP and then on ARM =============*/
/*------------------------------------------------------------------------
* Time DSP ctrsm
tick();
cblas_ctrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_ctrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Barm,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index cafd30d8191eb7a435ba66cc7555dd0e4a8b1a3a..5f88529ad662f58d14b5b5ff556b3ce1b44fc187 100644 (file)
@@ -179,7 +179,7 @@ int run_dgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
total_time_dsp = 0.0;
total_time_arm = 0.0;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
@@ -230,7 +230,7 @@ int run_dgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_dgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,Cdsp,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
@@ -256,7 +256,7 @@ int run_dgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_dgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,Carm,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index 2d23aa7faed4b8aa7d71fd7a4c20be5d499d3a05..e19c26fadda468bdcb879a0d8fd3baf23072871d 100644 (file)
return (-1);
}
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
tick();
cblas_dsyrk(order,uplo,transA,N,K,alpha,A,lda,beta,Cdsp,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_dsyrk(order,uplo,transA,N,K,alpha,A,lda,beta,Carm,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index 83c5b428fed239ecbe8c549335c7d71d47408960..7293c67093b8cec5b98f5a84cc57dcd8b31d358b 100644 (file)
size_A = (long long)N*(long long)N;
}
size_B = (long long)M*(long long)N;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
tick();
cblas_dtrmm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_dtrmm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Barm,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index 456c1d7062deeb781d1c91a0ec1fe5fe1fdb4e10..aec15f1a3dc3e203043e229d21ef0831182c6063 100644 (file)
size_A = (long long)N*(long long)N;
}
size_B = (long long)M*(long long)N;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
tick();
cblas_dtrsm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_dtrsm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Barm,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index cc586b07249391c2b70fb2f762086b9046668a9f..7e11ea8181fc1c582907e546940b5af294331099 100644 (file)
@@ -179,7 +179,7 @@ int run_sgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
total_time_dsp = 0.0;
total_time_arm = 0.0;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
@@ -228,7 +228,7 @@ int run_sgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_sgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,Cdsp,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
@@ -254,7 +254,7 @@ int run_sgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_sgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,Carm,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index 30fc02181841857cea3fd1706df635aaa8ff39f5..14913da95b745feb5a91324369546d8c23a1655c 100644 (file)
return (-1);
}
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
tick();
cblas_ssyrk(order,uplo,transA,N,K,alpha,A,lda,beta,Cdsp,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_ssyrk(order,uplo,transA,N,K,alpha,A,lda,beta,Carm,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index b0492a44d28c59d82e48118787bacf2d96d7e4b4..2e25e0bf820c3e64c426c023694b8fd3fbc7b4cb 100644 (file)
size_A = (long long)N*(long long)N;
}
size_B = (long long)M*(long long)N;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
for (i = 0; i < size_A; ++i) A[i] = (float)rand()/RAND_MAX;
for (i = 0; i < (long long)M*N; ++i) Bdsp[i] = Barm[i] = (float)rand()/RAND_MAX;
//for (i = 0; i < (long long)M*N; ++i) Barm[i] = Bdsp[i];
-
+/*
if(M==8 && N==8) {
FILE *file_a = fopen("mat_a.dat","w");
FILE *file_b = fopen("mat_b.dat","w");
fclose(file_b);
fclose(file_c);
}
-
+*/
int lda = (side == CblasLeft) ? M : N;
int ldb = M;
tick();
cblas_strmm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_strmm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Barm,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index 649e33feb09bba5cb0b12c2259b2aaf3e4f65583..0dd16bc7c11c6f6bff23854c4342d99c79226dd4 100644 (file)
}
size_B = (long long)M*(long long)N;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
}
}
for (i = 0; i < (long long)M*N; ++i) Bdsp[i] = Barm[i] = (float)rand()/RAND_MAX;
-
+/*
if(M==256 && N==128) {
FILE *file_a = fopen("mat_a.dat","w");
FILE *file_b = fopen("mat_b.dat","w");
fclose(file_b);
fclose(file_c);
}
-
+*/
/*============ BLAS tuning: running on DSP and then on ARM =============*/
/*------------------------------------------------------------------------
* Time DSP strsm
tick();
cblas_strsm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_strsm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Barm,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index 852d17890c80694850db3966634f070416ebf0e6..5d8cee50ab7a7fea9b42dc40c7b73f90eaeeb4e1 100644 (file)
@@ -179,7 +179,7 @@ int run_zgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
total_time_dsp = 0.0;
total_time_arm = 0.0;
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
@@ -236,7 +236,7 @@ int run_zgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_zgemm(order,transA,transB,M,N,K,&alpha,A,lda,B,ldb,&beta,Cdsp,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
@@ -251,7 +251,7 @@ int run_zgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_zgemm(order,transA,transB,M,N,K,&alpha,A,lda,B,ldb,&beta,Carm,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index d695c4a44992cad4da80834c47ce1e2cd16c85f4..5fee5e1efda02f272bf7c11118c6f84144e48c23 100644 (file)
return (-1);
}
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
tick();
cblas_zsyrk(order,uplo,transA,N,K,&alpha,A,lda,&beta,Cdsp,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_zsyrk(order,uplo,transA,N,K,&alpha,A,lda,&beta,Carm,ldc);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index 39655a6ffaad51ab1e1ccf952d8e203b16559319..0d261ddbc2c5bdbc31b4dbc2548a01c400bf3832 100644 (file)
return (-1);
}
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
tick();
cblas_ztrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_ztrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Barm,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;
index eb6da3ee04a8a3ed79a7acc66fd80ed86f2e0ec9..ed6fc621635f3037ce5ba926cf823d906fa7717d 100644 (file)
return (-1);
}
- for (iter = 0; iter < NUM_TEST_RUN; iter++)
+ for (iter = 0; iter <= NUM_TEST_RUN; iter++)
{
/*-------------------------------------------------------------------------
* Allocate space for the matrices. The matrices that will be passed to
tick();
cblas_ztrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_dsp += time_secs;
gflops_DSP = operation_count/time_secs*1e-9;
total_GFLOPS_DSP += gflops_DSP;
tick();
cblas_ztrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Barm,ldb);
time_secs = tock();
- if(iter==0) { /* skip first iteration */
+ if(iter > 0) { /* skip first iteration */
total_time_arm += time_secs;
gflops_ARM = operation_count/time_secs*1e-9;
total_GFLOPS_ARM += gflops_ARM;