summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 53b2bf8)
raw | patch | inline | side by side (parent: 53b2bf8)
author | Jianzhong Xu <a0869574@ti.com> | |
Thu, 23 Apr 2015 19:27:02 +0000 (15:27 -0400) | ||
committer | Jianzhong Xu <a0869574@ti.com> | |
Thu, 23 Apr 2015 19:27:02 +0000 (15:27 -0400) |
30 files changed:
diff --git a/examples/Makefile b/examples/Makefile
index 7152704c091b9b79b80cf70f5f08f581f6bc14d4..6609f0f7acac7077836a0a0e1deb6a496c72d363 100644 (file)
--- a/examples/Makefile
+++ b/examples/Makefile
test:
for dir in $(DIRS); do \
echo "=============== " $$dir " =================" ; \
- $(MAKE) -C $$dir test; \
- done
-
-cross:
- for dir in $(DIRS); do \
- echo "=============== " $$dir " =================" ; \
- $(MAKE) -C $$dir cross; \
+ $(MAKE) -C $$dir run; \
done
clean:
index 503045316cdc27517c55ab2d91c2f52b2452ccb0..c8f6e1139394b8e33a018aa195ed6699d01a5361 100644 (file)
$(EXE): dgemm_test.o
$(CC) $(CFLAGS) dgemm_test.o $(BLASLIB) -o $@
-run: $(EXE)
- export TI_CBLAS_OFFLOAD=000;./$(EXE);cp dgemm_time.dat dgemm_time_ARM.dat; cp dgemm_gflops.dat dgemm_gflops_ARM.dat;\
- export TI_CBLAS_OFFLOAD=001;./$(EXE);cp dgemm_time.dat dgemm_time_DSP.dat; cp dgemm_gflops.dat dgemm_gflops_DSP.dat;\
- export TI_CBLAS_OFFLOAD=002;./$(EXE);cp dgemm_time.dat dgemm_time_OPT.dat; cp dgemm_gflops.dat dgemm_gflops_OPT.dat;
\ No newline at end of file
+run: ARMtest DSPtest OPTtest
+
+ARMtest: $(EXE)
+ @echo "Forcing BLAS level 3 execution on ARM."; \
+ export TI_CBLAS_OFFLOAD=000; \
+ ./$(EXE); \
+ cp dgemm_time.dat dgemm_time_ARM.dat; cp dgemm_gflops.dat dgemm_gflops_ARM.dat;
+
+DSPtest: $(EXE)
+ @echo "Forcing BLAS level 3 execution on DSP."; \
+ export TI_CBLAS_OFFLOAD=001; \
+ ./$(EXE); \
+ cp dgemm_time.dat dgemm_time_DSP.dat; cp dgemm_gflops.dat dgemm_gflops_DSP.dat;
+
+OPTtest: $(EXE)
+ @echo "Optimal BLAS level 3 execution on ARM or DSP."; \
+ export TI_CBLAS_OFFLOAD=002; \
+ ./$(EXE); \
+ cp dgemm_time.dat dgemm_time_OPT.dat; cp dgemm_gflops.dat dgemm_gflops_OPT.dat;
index 88b6de21de918fe1a5e93e56dde390cb08d390c1..5ea020a8b258a2d097e3090c587a86fe9ccc9f5d 100644 (file)
{
int num_size, dgemm_err;
int M, N, K, m, n, k;
- int M_pre, N_pre, K_pre, M_start_size, N_start_size;
- float time_secs_arm, gflops_arm, time_secs_dsp, gflops_dsp, time_secs_opt, gflops_opt;
+ float time_secs, gflops;
FILE *fp_time, *fp_gflops;
fp_time = fopen("dgemm_time.dat","w");
srand(12345);
/* setting up TI CBLAS during first call */
- run_dgemm(1000, 1000, 1000, &time_secs_arm, &gflops_arm);
+ run_dgemm(1000, 1000, 1000, &time_secs, &gflops);
/* sweep M, K, and N */
for (M=TUNING_START_SIZE_RECTAN_MATRIX,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2)
index cca9c9abe594391373c59f2005bd5e3dff65cbf0..8528b464690866073ca048ac16f6198c02c5dafd 100644 (file)
$(EXE): dsyrk_test.o
$(CC) $(CFLAGS) dsyrk_test.o $(BLASLIB) -o $@
-run: $(EXE)
- export TI_CBLAS_OFFLOAD=000;./$(EXE);cp dsyrk_time.dat dsyrk_time_ARM.dat; cp dsyrk_gflops.dat dsyrk_gflops_ARM.dat;\
- export TI_CBLAS_OFFLOAD=001;./$(EXE);cp dsyrk_time.dat dsyrk_time_DSP.dat; cp dsyrk_gflops.dat dsyrk_gflops_DSP.dat;\
- export TI_CBLAS_OFFLOAD=002;./$(EXE);cp dsyrk_time.dat dsyrk_time_OPT.dat; cp dsyrk_gflops.dat dsyrk_gflops_OPT.dat;
\ No newline at end of file
+run: ARMtest DSPtest OPTtest
+
+ARMtest: $(EXE)
+ @echo "Forcing BLAS level 3 execution on ARM."; \
+ export TI_CBLAS_OFFLOAD=000; \
+ ./$(EXE); \
+ cp dsyrk_time.dat dsyrk_time_ARM.dat; cp dsyrk_gflops.dat dsyrk_gflops_ARM.dat;
+
+DSPtest: $(EXE)
+ @echo "Forcing BLAS level 3 execution on DSP."; \
+ export TI_CBLAS_OFFLOAD=001; \
+ ./$(EXE); \
+ cp dsyrk_time.dat dsyrk_time_DSP.dat; cp dsyrk_gflops.dat dsyrk_gflops_DSP.dat;
+
+OPTtest: $(EXE)
+ @echo "Optimal BLAS level 3 execution on ARM or DSP."; \
+ export TI_CBLAS_OFFLOAD=002; \
+ ./$(EXE); \
+ cp dsyrk_time.dat dsyrk_time_OPT.dat; cp dsyrk_gflops.dat dsyrk_gflops_OPT.dat;
diff --git a/examples/eig/Makefile b/examples/eig/Makefile
index 7570ce6e817406fea300b38a279fff3a633efe9d..571ab20cc32c6ac03aaca1eeb6f4633a71a8245b 100644 (file)
--- a/examples/eig/Makefile
+++ b/examples/eig/Makefile
$(EXE): main.o dlaran.o dlarnd.o dlatm1.o dlatm2.o dlatm3.o dlatmr.o
$(CC) $(CFLAGS) main.o dlaran.o dlarnd.o dlatm1.o dlatm2.o dlatm3.o dlatmr.o $(LAPACKLIB) -o $@
+run: ARMtest DSPtest OPTtest
+
ARMtest: $(EXE)
@echo "Forcing BLAS level 3 execution on ARM."; \
export TI_CBLAS_OFFLOAD=000; \
- ./$(EXE);
+ ./$(EXE);
DSPtest: $(EXE)
@echo "Forcing BLAS level 3 execution on DSP."; \
index f520ac5fe570cd6380c3275f0e61b6aa328d78e1..7c97c8299327ebba248a02338892bffcbdf08cf1 100644 (file)
--- a/examples/ludinv/Makefile
+++ b/examples/ludinv/Makefile
$(EXE): main.o dlaran.o dlarnd.o dlatm1.o dlatm2.o dlatm3.o dlatmr.o
$(CC) $(CFLAGS) main.o dlaran.o dlarnd.o dlatm1.o dlatm2.o dlatm3.o dlatmr.o $(LAPACKLIB) -o $@
+run: ARMtest DSPtest OPTtest
+
ARMtest: $(EXE)
@echo "Forcing BLAS level 3 execution on ARM."; \
export TI_CBLAS_OFFLOAD=000; \
- ./$(EXE);
+ ./$(EXE);
DSPtest: $(EXE)
@echo "Forcing BLAS level 3 execution on DSP."; \
diff --git a/examples/make.inc b/examples/make.inc
index ae1ce4087b805a9ab9238e77dd8fc8667df014ae..ee058bed93a24abb4c5428493c6cd9b90871d2a5 100644 (file)
--- a/examples/make.inc
+++ b/examples/make.inc
-
CC = gcc
CFLAGS = -g -O2 -I/usr/include
$(EXE):
-cross: $(EXE)
-
clean::
- @rm -f $(EXE) *.o *.obj *.out *.asm *.if *.opt *.bc *.objc *.map *.bin *.dsp_h
+ @rm -f $(EXE) *.o *.dat
-test: clean $(EXE)
- @echo Running $(EXE)
- @./$(EXE) >> /dev/null
- @if [ $$? -ne 0 ] ; then echo "FAILED !!!" ; fi
index bbf05fe3236f4051802ae0af7011582ba2705459..973abb608f49a177dd2099d84067664f5d3e902b 100644 (file)
--- a/examples/matmpy/Makefile
+++ b/examples/matmpy/Makefile
$(EXE): main.o
$(CC) $(CFLAGS) main.o $(BLASLIB) -o $@
-alltests: ARMtest DSPtest OPTtest
+run: ARMtest DSPtest OPTtest
ARMtest: $(EXE)
@echo "Forcing BLAS level 3 execution on ARM."; \
export TI_CBLAS_OFFLOAD=000; \
- ./$(EXE);
+ ./$(EXE);
DSPtest: $(EXE)
@echo "Forcing BLAS level 3 execution on DSP."; \
index c96afc1873091c0b352101cd89673131da4d4073..7a6708afc921107785d36f0a60cdb8644595d104 100644 (file)
$(EXE): ztrmm_test.o
$(CC) $(CFLAGS) ztrmm_test.o $(BLASLIB) -o $@
-run: $(EXE)
- export TI_CBLAS_OFFLOAD=000;./$(EXE);cp ztrmm_time.dat ztrmm_time_ARM.dat; cp ztrmm_gflops.dat ztrmm_gflops_ARM.dat;\
- export TI_CBLAS_OFFLOAD=001;./$(EXE);cp ztrmm_time.dat ztrmm_time_DSP.dat; cp ztrmm_gflops.dat ztrmm_gflops_DSP.dat;\
- export TI_CBLAS_OFFLOAD=002;./$(EXE);cp ztrmm_time.dat ztrmm_time_OPT.dat; cp ztrmm_gflops.dat ztrmm_gflops_OPT.dat;
\ No newline at end of file
+run: ARMtest DSPtest OPTtest
+
+ARMtest: $(EXE)
+ @echo "Forcing BLAS level 3 execution on ARM."; \
+ export TI_CBLAS_OFFLOAD=000; \
+ ./$(EXE); \
+ cp zdtrmm_time.dat zdtrmm_time_ARM.dat; cp zdtrmm_gflops.dat zdtrmm_gflops_ARM.dat;
+
+DSPtest: $(EXE)
+ @echo "Forcing BLAS level 3 execution on DSP."; \
+ export TI_CBLAS_OFFLOAD=001; \
+ ./$(EXE); \
+ cp zdtrmm_time.dat zdtrmm_time_DSP.dat; cp zdtrmm_gflops.dat zdtrmm_gflops_DSP.dat;
+
+OPTtest: $(EXE)
+ @echo "Optimal BLAS level 3 execution on ARM or DSP."; \
+ export TI_CBLAS_OFFLOAD=002; \
+ ./$(EXE); \
+ cp zdtrmm_time.dat zdtrmm_time_OPT.dat; cp zdtrmm_gflops.dat zdtrmm_gflops_OPT.dat;
index c5c9a445c7f214b0a9434586ec6fcfeeffff5165..9476df7c3ec417c706d5ed60d498ce373be114ce 100644 (file)
$(EXE): ztrsm_test.o
$(CC) $(CFLAGS) ztrsm_test.o $(BLASLIB) -o $@
-run: $(EXE)
- export TI_CBLAS_OFFLOAD=000;./$(EXE);cp ztrsm_time.dat ztrsm_time_ARM.dat; cp ztrsm_gflops.dat ztrsm_gflops_ARM.dat;\
- export TI_CBLAS_OFFLOAD=001;./$(EXE);cp ztrsm_time.dat ztrsm_time_DSP.dat; cp ztrsm_gflops.dat ztrsm_gflops_DSP.dat;\
- export TI_CBLAS_OFFLOAD=002;./$(EXE);cp ztrsm_time.dat ztrsm_time_OPT.dat; cp ztrsm_gflops.dat ztrsm_gflops_OPT.dat;
\ No newline at end of file
+run: ARMtest DSPtest OPTtest
+
+ARMtest: $(EXE)
+ @echo "Forcing BLAS level 3 execution on ARM."; \
+ export TI_CBLAS_OFFLOAD=000; \
+ ./$(EXE); \
+ cp ztrsm_time.dat ztrsm_time_ARM.dat; cp ztrsm_gflops.dat ztrsm_gflops_ARM.dat;
+
+DSPtest: $(EXE)
+ @echo "Forcing BLAS level 3 execution on DSP."; \
+ export TI_CBLAS_OFFLOAD=001; \
+ ./$(EXE); \
+ cp ztrsm_time.dat ztrsm_time_DSP.dat; cp ztrsm_gflops.dat ztrsm_gflops_DSP.dat;
+
+OPTtest: $(EXE)
+ @echo "Optimal BLAS level 3 execution on ARM or DSP."; \
+ export TI_CBLAS_OFFLOAD=002; \
+ ./$(EXE); \
+ cp ztrsm_time.dat ztrsm_time_OPT.dat; cp ztrsm_gflops.dat ztrsm_gflops_OPT.dat;
diff --git a/readme.txt b/readme.txt
--- a/readme.txt
+++ /dev/null
@@ -1 +0,0 @@
-For information about how to use LINALG library, please go to: http://processors.wiki.ti.com/index.php/MCSDK_HPC_3.x_Linear_Algebra_Library.
\ No newline at end of file
index c796c2e3b00fccdf5df68ac9fdf959672ddd2575..25fad388222d6ea59b563b80729ab5cfdf05494a 100644 (file)
time_ARM = t_arm;
if (cgemm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n][k] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
@@ -238,9 +238,11 @@ int run_cgemm_dsp_and_arm(int M, int K, int N, float *time_dsp, float *time_arm,
tick();
cblas_cgemm(order,transA,transB,M,N,K,&alpha,A,lda,B,ldb,&beta,Cdsp,ldc);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM cgemm
@@ -251,9 +253,11 @@ int run_cgemm_dsp_and_arm(int M, int K, int N, float *time_dsp, float *time_arm,
tick();
cblas_cgemm(order,transA,transB,M,N,K,&alpha,A,lda,B,ldb,&beta,Carm,ldc);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
/*-------------------------------------------------------------------------
index 6ca8c4866aa7c8b7ba0b88b7edaed1f1e5745213..f802bbd4edb6c9c4be583694a607793e5ee6776b 100644 (file)
--- a/tuning/common/tune_com.h
+++ b/tuning/common/tune_com.h
#define OFFLOAD 1
#define NO_OFFLOAD 0
-#define NUM_TEST_RUN 5
+#define NUM_TEST_RUN 6 /* first run not counted */
/*-----------------------------------------------------------------------------
* Timing Setup
#define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
#define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
-
+
+#define TIME_MARGIN (float)1.05 /* 5% margin to guard against error */
+#define DSP_FASTER_THAN_ARM(tdsp,tarm) (tdsp < tarm*TIME_MARGIN)
+
extern void print_file_header(FILE *fp_tbl);
index 4bf76e55bd7506b81d2ff176d08f302b94af1c0d..e02878ce816148016acfa42365bf6de8124dabb3 100644 (file)
time_DSP = t_dsp;
time_ARM = t_arm;
if (csyrk_err == 0){
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[n][k] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_csyrk(order,uplo,transA,N,K,&alpha,A,lda,&beta,Cdsp,ldc);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM csyrk
tick();
cblas_csyrk(order,uplo,transA,N,K,&alpha,A,lda,&beta,Carm,ldc);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
/*-------------------------------------------------------------------------
* Verify Results
index 9823cab52c38b1ace7cd292d52e7a7f3f4ce8c01..c47c3c5ac6f90aa481fc2fb8ea9b1e39f3ccbc37 100644 (file)
time_ARM = t_arm;
if (ctrmm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_ctrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM ctrmm
tick();
cblas_ctrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Barm,ldb);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
if(M==8 && N==8) {
index e2d526eac44f76ea660f50eae3440251f6ce65d2..1e5ad68a121ef75e48b1bf60459aab2243cb0986 100644 (file)
time_ARM = t_arm;
if (ctrsm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_ctrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM ctrsm
tick();
cblas_ctrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Barm,ldb);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
if(M==8 && N==8) {
index e81e68cf01a23bdbed09c3762e52ab8e4d5bb694..cafd30d8191eb7a435ba66cc7555dd0e4a8b1a3a 100644 (file)
* Prototypes
*----------------------------------------------------------------------------*/
int check_results(const double *C1, const double *C2, int M, int N);
-int run_dgemm_dsp_and_arm(int M, int N, int K, double *time_dsp, double *time_arm,
- double *gflops_dsp, double *gflops_arm);
+int run_dgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
+ float *gflops_dsp, float *gflops_arm);
/*-----------------------------------------------------------------------------
* MAIN
int M, N, K, m, n, k;
int M_pre, N_pre, K_pre, M_start_size, N_start_size;
int offload_threshold_1, offload_threshold_2;
- double total_GFLOPS_DSP, total_GFLOPS_ARM;
- double time_DSP, time_ARM, t_dsp, t_arm;
+ float total_GFLOPS_DSP, total_GFLOPS_ARM;
+ float time_DSP, time_ARM, t_dsp, t_arm;
char ofld_flag[NUM_MATRIX_SIZE_TO_BENCHMARK][NUM_MATRIX_SIZE_TO_BENCHMARK][NUM_MATRIX_SIZE_TO_BENCHMARK];
char mem_flag[NUM_MATRIX_SIZE_TO_BENCHMARK][NUM_MATRIX_SIZE_TO_BENCHMARK][NUM_MATRIX_SIZE_TO_BENCHMARK];
int skip_next_point;
time_ARM = t_arm;
if (dgemm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n][k] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
}
-int run_dgemm_dsp_and_arm(int M, int N, int K, double *time_dsp, double *time_arm,
- double *gflops_dsp, double *gflops_arm)
+int run_dgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
+ float *gflops_dsp, float *gflops_arm)
{
int iter;
long long i;
- double time_secs, total_time_dsp, total_time_arm;
- double gflops_ARM, gflops_DSP;
- double operation_count = 2.0*(double)M*(double)N*(double)K;
- double total_GFLOPS_DSP = 0.0f;
- double total_GFLOPS_ARM = 0.0f;
+ float time_secs, total_time_dsp, total_time_arm;
+ float gflops_ARM, gflops_DSP;
+ float operation_count = 2.0*(double)M*(double)N*(double)K;
+ float total_GFLOPS_DSP = 0.0f;
+ float total_GFLOPS_ARM = 0.0f;
int err_code = 0;
total_time_dsp = 0.0;
@@ -230,9 +230,11 @@ int run_dgemm_dsp_and_arm(int M, int N, int K, double *time_dsp, double *time_ar
tick();
cblas_dgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,Cdsp,ldc);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*
if(M==4096 && K==256 && N==16) {
FILE *file_a = fopen("mat_a.dat","w");
@@ -254,9 +256,11 @@ int run_dgemm_dsp_and_arm(int M, int N, int K, double *time_dsp, double *time_ar
tick();
cblas_dgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,Carm,ldc);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
fflush(stdout);
index 2d6f99341772a114ba1f53c14a62da0e28817b35..2d23aa7faed4b8aa7d71fd7a4c20be5d499d3a05 100644 (file)
time_ARM = t_arm;
if (dsyrk_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[n][k] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_dsyrk(order,uplo,transA,N,K,alpha,A,lda,beta,Cdsp,ldc);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM dsyrk
tick();
cblas_dsyrk(order,uplo,transA,N,K,alpha,A,lda,beta,Carm,ldc);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
fflush(stdout);
index 017bdb10e2a057b62a8cd6893eccc6a65f5782ca..83c5b428fed239ecbe8c549335c7d71d47408960 100644 (file)
time_ARM = t_arm;
if (dtrmm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_dtrmm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM dtrmm
tick();
cblas_dtrmm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Barm,ldb);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
/*-------------------------------------------------------------------------
* Verify Results
diff --git a/tuning/dtrmm_tune/ofld_tbl_dtrmm.c b/tuning/dtrmm_tune/ofld_tbl_dtrmm.c
+++ /dev/null
@@ -1,257 +0,0 @@
-char ofld_tbl_dtrmm[TRMM_OFFLOAD_TBL_SIZE] = {
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-0,
-0,
-0,
-0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-0,
-0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1};
index a58d4675711fe627e532b686239d1e1cae0e1755..456c1d7062deeb781d1c91a0ec1fe5fe1fdb4e10 100644 (file)
time_ARM = t_arm;
if (dtrsm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_dtrsm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM dtrsm
tick();
cblas_dtrsm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Barm,ldb);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
/*-------------------------------------------------------------------------
* Verify Results
diff --git a/tuning/dtrsm_tune/ofld_tbl_dtrmm.c b/tuning/dtrsm_tune/ofld_tbl_dtrmm.c
+++ /dev/null
@@ -1,257 +0,0 @@
-char ofld_tbl_dtrmm[TRMM_OFFLOAD_TBL_SIZE] = {
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-0,
-0,
-0,
-0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-0,
-0,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-0,
-0,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1,
-1};
index 5f33ded1880723d32713bc290f788de87b444b23..cc586b07249391c2b70fb2f762086b9046668a9f 100644 (file)
time_ARM = t_arm;
if (sgemm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n][k] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
@@ -228,9 +228,11 @@ int run_sgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_sgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,Cdsp,ldc);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*
if(M==4096 && K==256 && N==16) {
FILE *file_a = fopen("mat_a.dat","w");
@@ -252,9 +254,11 @@ int run_sgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_sgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,Carm,ldc);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
fflush(stdout);
index 5a6a2946e77b40be30978a11a566befdf86ae57a..30fc02181841857cea3fd1706df635aaa8ff39f5 100644 (file)
time_ARM = t_arm;
if (ssyrk_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[n][k] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_ssyrk(order,uplo,transA,N,K,alpha,A,lda,beta,Cdsp,ldc);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM ssyrk
tick();
cblas_ssyrk(order,uplo,transA,N,K,alpha,A,lda,beta,Carm,ldc);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
fflush(stdout);
index b7d2d8b199a7d9501091cb42d0a10f9b2abbcb5d..b0492a44d28c59d82e48118787bacf2d96d7e4b4 100644 (file)
time_ARM = t_arm;
if (strmm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_strmm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM strmm
tick();
cblas_strmm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Barm,ldb);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
if(M==8 && N==8) {
index 96c9a4dc1ff88c504fc400614591dbfaa1869edf..649e33feb09bba5cb0b12c2259b2aaf3e4f65583 100644 (file)
time_ARM = t_arm;
if (strsm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_strsm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM strsm
tick();
cblas_strsm(order,side,uplo,transA,diag,M,N,alpha,A,lda,Barm,ldb);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
if(M==256 && N==128) {
index e7c639381e52a62bdb3aa0a4552f36e184138007..852d17890c80694850db3966634f070416ebf0e6 100644 (file)
time_ARM = t_arm;
if (zgemm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n][k] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
@@ -236,9 +236,11 @@ int run_zgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_zgemm(order,transA,transB,M,N,K,&alpha,A,lda,B,ldb,&beta,Cdsp,ldc);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM zgemm
@@ -249,9 +251,11 @@ int run_zgemm_dsp_and_arm(int M, int N, int K, float *time_dsp, float *time_arm,
tick();
cblas_zgemm(order,transA,transB,M,N,K,&alpha,A,lda,B,ldb,&beta,Carm,ldc);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
/*-------------------------------------------------------------------------
index 97e3ca3c4491e90f2b57226cea56353c4200aca1..d695c4a44992cad4da80834c47ce1e2cd16c85f4 100644 (file)
time_DSP = t_dsp;
time_ARM = t_arm;
if (zsyrk_err == 0){
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[n][k] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_zsyrk(order,uplo,transA,N,K,&alpha,A,lda,&beta,Cdsp,ldc);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM zsyrk
tick();
cblas_zsyrk(order,uplo,transA,N,K,&alpha,A,lda,&beta,Carm,ldc);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
/*-------------------------------------------------------------------------
* Verify Results
index 65b9c96b0966e1b74eedc25817bbe719ad8e2aa0..39655a6ffaad51ab1e1ccf952d8e203b16559319 100644 (file)
time_ARM = t_arm;
if (ztrmm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_ztrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM ztrmm
tick();
cblas_ztrmm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Barm,ldb);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
/* if(M==8 && N==8) {
index 021e3b3e98c067cd5b9e6331ae1551dc642491ae..eb6da3ee04a8a3ed79a7acc66fd80ed86f2e0ec9 100644 (file)
time_ARM = t_arm;
if (ztrsm_err == 0){
//if(total_GFLOPS_DSP - total_GFLOPS_ARM > 1.0) {
- if(t_dsp < t_arm) {
+ if(DSP_FASTER_THAN_ARM(t_dsp,t_arm)) {
ofld_flag[m][n] = OFFLOAD;
printf("Offloading to DSP for this point. Skipping next point.\n");
}
tick();
cblas_ztrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Bdsp,ldb);
time_secs = tock();
- total_time_dsp += time_secs;
- gflops_DSP = operation_count/time_secs*1e-9;
- total_GFLOPS_DSP += gflops_DSP;
+ if(iter==0) { /* skip first iteration */
+ total_time_dsp += time_secs;
+ gflops_DSP = operation_count/time_secs*1e-9;
+ total_GFLOPS_DSP += gflops_DSP;
+ }
/*-------------------------------------------------------------------------
* Time ARM ztrsm
tick();
cblas_ztrsm(order,side,uplo,transA,diag,M,N,&alpha,A,lda,Barm,ldb);
time_secs = tock();
- total_time_arm += time_secs;
- gflops_ARM = operation_count/time_secs*1e-9;
- total_GFLOPS_ARM += gflops_ARM;
+ if(iter==0) { /* skip first iteration */
+ total_time_arm += time_secs;
+ gflops_ARM = operation_count/time_secs*1e-9;
+ total_GFLOPS_ARM += gflops_ARM;
+ }
//printf(" %6.3f %6.3f %9.6fs %9.6fs\n", gflops_DSP, gflops_ARM, time_dsp, time_arm);
/* if(M==8 && N==8) {