aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJianzhong Xu2016-02-05 16:22:07 -0600
committerJianzhong Xu2016-02-05 16:22:07 -0600
commit7ac1b6ebdf9323256e7b65c617b0589043f70c61 (patch)
tree93b224e126f0e0791e70191fd932076a2d62b81c
parent78190f2d822cb8690be217aef8f6db2358dea266 (diff)
downloadlinalg-7ac1b6ebdf9323256e7b65c617b0589043f70c61.tar.gz
linalg-7ac1b6ebdf9323256e7b65c617b0589043f70c61.tar.xz
linalg-7ac1b6ebdf9323256e7b65c617b0589043f70c61.zip
1. Added time(latency) to BLIS benchmarking raw data. 2. Combined libblis.a and libcblas_armplusdsp.a. 3. Added doxygen documentation.
-rw-r--r--Makefile12
-rw-r--r--blasblisacc/src/ti_cblas_mem_config.c18
-rwxr-xr-xblis/testsuite/parselog.pl15
-rw-r--r--blis/testsuite/src/test_addm.c9
-rw-r--r--blis/testsuite/src/test_addv.c11
-rw-r--r--blis/testsuite/src/test_axpy2v.c9
-rw-r--r--blis/testsuite/src/test_axpyf.c9
-rw-r--r--blis/testsuite/src/test_axpym.c9
-rw-r--r--blis/testsuite/src/test_axpyv.c13
-rw-r--r--blis/testsuite/src/test_copym.c9
-rw-r--r--blis/testsuite/src/test_copyv.c13
-rw-r--r--blis/testsuite/src/test_dotaxpyv.c9
-rw-r--r--blis/testsuite/src/test_dotv.c13
-rw-r--r--blis/testsuite/src/test_dotxaxpyf.c9
-rw-r--r--blis/testsuite/src/test_dotxf.c9
-rw-r--r--blis/testsuite/src/test_dotxv.c9
-rw-r--r--blis/testsuite/src/test_gemm.c21
-rw-r--r--blis/testsuite/src/test_gemm_ukr.c9
-rw-r--r--blis/testsuite/src/test_gemmtrsm_ukr.c9
-rw-r--r--blis/testsuite/src/test_gemv.c13
-rw-r--r--blis/testsuite/src/test_ger.c13
-rw-r--r--blis/testsuite/src/test_hemm.c13
-rw-r--r--blis/testsuite/src/test_hemv.c13
-rw-r--r--blis/testsuite/src/test_her.c13
-rw-r--r--blis/testsuite/src/test_her2.c13
-rw-r--r--blis/testsuite/src/test_her2k.c14
-rw-r--r--blis/testsuite/src/test_herk.c14
-rw-r--r--blis/testsuite/src/test_libblis.c97
-rw-r--r--blis/testsuite/src/test_libblis.h12
-rw-r--r--blis/testsuite/src/test_normfm.c9
-rw-r--r--blis/testsuite/src/test_normfv.c13
-rw-r--r--blis/testsuite/src/test_randm.c9
-rw-r--r--blis/testsuite/src/test_randv.c9
-rw-r--r--blis/testsuite/src/test_scal2m.c9
-rw-r--r--blis/testsuite/src/test_scal2v.c9
-rw-r--r--blis/testsuite/src/test_scalm.c9
-rw-r--r--blis/testsuite/src/test_scalv.c13
-rw-r--r--blis/testsuite/src/test_setm.c9
-rw-r--r--blis/testsuite/src/test_setv.c9
-rw-r--r--blis/testsuite/src/test_subm.c9
-rw-r--r--blis/testsuite/src/test_subv.c9
-rw-r--r--blis/testsuite/src/test_symm.c14
-rw-r--r--blis/testsuite/src/test_symv.c13
-rw-r--r--blis/testsuite/src/test_syr.c13
-rw-r--r--blis/testsuite/src/test_syr2.c13
-rw-r--r--blis/testsuite/src/test_syr2k.c14
-rw-r--r--blis/testsuite/src/test_syrk.c13
-rw-r--r--blis/testsuite/src/test_trmm.c14
-rw-r--r--blis/testsuite/src/test_trmm3.c9
-rw-r--r--blis/testsuite/src/test_trmv.c13
-rw-r--r--blis/testsuite/src/test_trsm.c14
-rw-r--r--blis/testsuite/src/test_trsm_ukr.c9
-rw-r--r--blis/testsuite/src/test_trsv.c13
-rw-r--r--docs/doxygen/doxycfg.txt1781
-rw-r--r--docs/doxygen/mainpage.dox25
l---------docs/linalg_user_guide.html1
-rw-r--r--examples/make.inc3
-rw-r--r--examples/matmpy/main.c2
-rw-r--r--ticblas/src/ticblas.c52
-rw-r--r--ticblas/ticblas.h74
60 files changed, 2319 insertions, 317 deletions
diff --git a/Makefile b/Makefile
index b03b973..c8d56f4 100644
--- a/Makefile
+++ b/Makefile
@@ -27,9 +27,9 @@ DSPlibs:
27 cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); make install; \ 27 cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); make install; \
28 cd ../$(LINALG_TICBLAS_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); cd ../lib; \ 28 cd ../$(LINALG_TICBLAS_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); cd ../lib; \
29 echo "combining BLIS, CBLAS, and TICBLAS libraries into one: libcblas.ae66"; \ 29 echo "combining BLIS, CBLAS, and TICBLAS libraries into one: libcblas.ae66"; \
30 mkdir -p objs; cd objs; rm *; ar x ../../../blis/install/$(BLIS_CFG)/lib/libblis.ae66; mmv 'cblas*.o' 'blis_cblas#1.o'; \ 30 mkdir -p objs; cd objs; rm -f *; ar x ../../../blis/install/$(BLIS_CFG)/lib/libblis.ae66; mmv 'cblas*.o' 'blis_cblas#1.o'; \
31 ar -x ../../../cblas/lib/C66/libcblas.ae66; ar -x ../libticblas.ae66; chmod +rw *;cd ../../..; \ 31 ar -x ../../../cblas/lib/C66/libcblas.ae66; ar -x ../libticblas.ae66; chmod +rw *;cd ../../..; \
32 mkdir -p lib; cd lib; rm *; ar -cr libcblas.ae66 ../ticblas/lib/objs/*; cd .. 32 mkdir -p lib; cd lib; rm -f *; ar -cr libcblas.ae66 ../ticblas/lib/objs/*; cd ..
33 33
34ARMlibs: 34ARMlibs:
35 cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; \ 35 cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; \
@@ -38,8 +38,11 @@ ARMlibs:
38 38
39ARMplusDSP: DSPlibs ARMlibs 39ARMplusDSP: DSPlibs ARMlibs
40 cd $(LINALG_BLASACC_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET); cd ../..; \ 40 cd $(LINALG_BLASACC_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET); cd ../..; \
41 cp $(LINALG_BLASACC_DIR)/lib/libcblas_armplusdsp.a ./lib; \ 41 cd lib; rm -f *; \
42 cp $(LINALG_BLIS_DIR)/install/arm/lib/libblis.a ./lib; \ 42 cp ../$(LINALG_BLASACC_DIR)/lib/libcblas_armplusdsp.a .; \
43 cp ../$(LINALG_BLIS_DIR)/install/arm/lib/libblis.a .; \
44 ar -x libblis.a; mmv "cblas_*.o" "blis_cblas_#1.o"; ar -x libcblas_armplusdsp.a; rm *.a; \
45 ar -cr libcblas_armplusdsp.a *.o; rm *.o; cd ..; \
43 cp $(LINALG_CLAPACK_DIR)/lapack_ARM.a ./lib/liblapack.a; \ 46 cp $(LINALG_CLAPACK_DIR)/lapack_ARM.a ./lib/liblapack.a; \
44 cp $(LINALG_CLAPACK_DIR)/libcblaswr_ARM.a ./lib/libcblaswr.a; \ 47 cp $(LINALG_CLAPACK_DIR)/libcblaswr_ARM.a ./lib/libcblaswr.a; \
45 cp $(LINALG_CLAPACK_DIR)/F2CLIBS/libf2c_ARM.a ./lib/libf2c.a 48 cp $(LINALG_CLAPACK_DIR)/F2CLIBS/libf2c_ARM.a ./lib/libf2c.a
@@ -86,7 +89,6 @@ installARMplusDSPlib:
86 install -m 755 -d ${DESTDIR}/lib 89 install -m 755 -d ${DESTDIR}/lib
87 cp $(CBLAS_HEADERS) ${DESTDIR}/include 90 cp $(CBLAS_HEADERS) ${DESTDIR}/include
88 cp $(CLAPACK_HEADERS) ${DESTDIR}/include 91 cp $(CLAPACK_HEADERS) ${DESTDIR}/include
89 cp ./lib/libblis.a ${DESTDIR}/lib
90 cp ./lib/libcblas_armplusdsp.a ${DESTDIR}/lib 92 cp ./lib/libcblas_armplusdsp.a ${DESTDIR}/lib
91 cp ./lib/liblapack.a ${DESTDIR}/lib 93 cp ./lib/liblapack.a ${DESTDIR}/lib
92 cp ./lib/libcblaswr.a ${DESTDIR}/lib 94 cp ./lib/libcblaswr.a ${DESTDIR}/lib
diff --git a/blasblisacc/src/ti_cblas_mem_config.c b/blasblisacc/src/ti_cblas_mem_config.c
index 7b9c5ab..2c72d94 100644
--- a/blasblisacc/src/ti_cblas_mem_config.c
+++ b/blasblisacc/src/ti_cblas_mem_config.c
@@ -61,7 +61,7 @@ int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_
61 ||(smem_size_med > msmc_buf_size) /* provided MSMC memory */ 61 ||(smem_size_med > msmc_buf_size) /* provided MSMC memory */
62 ||(smem_size_slow > ddr_buf_size) /* provided DDR memory */ 62 ||(smem_size_slow > ddr_buf_size) /* provided DDR memory */
63 ) { 63 ) {
64 return(TICBLAS_INIT_ERROR); 64 return(TICBLAS_ERROR);
65 } 65 }
66 66
67 /* Configure L1D if necessary */ 67 /* Configure L1D if necessary */
@@ -105,7 +105,7 @@ int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_
105 } 105 }
106 106
107 if(l1d_cfg_err || l2_cfg_err) { 107 if(l1d_cfg_err || l2_cfg_err) {
108 return(TICBLAS_INIT_ERROR); 108 return(TICBLAS_ERROR);
109 } 109 }
110 110
111#ifdef TI_CBLAS_DEBUG 111#ifdef TI_CBLAS_DEBUG
@@ -179,7 +179,9 @@ int bli_l3_mem_reconfig(size_t l1D_SRAM_size_orig, size_t l2_SRAM_size_orig)
179 return(TICBLAS_SUCCESS); 179 return(TICBLAS_SUCCESS);
180} /* bli_l3_mem_reconfig */ 180} /* bli_l3_mem_reconfig */
181 181
182/* This function will be removed. Function tiCblasNew() will be used instead. */ 182/*==============================================================================
183 * This function initializes BLIS before first CBLAS call is made.
184 *============================================================================*/
183void ti_bli_init_dsp(char *l3_buf, char *l2_buf) 185void ti_bli_init_dsp(char *l3_buf, char *l2_buf)
184{ 186{
185#ifdef TI_CBLAS_DEBUG 187#ifdef TI_CBLAS_DEBUG
@@ -189,15 +191,19 @@ void ti_bli_init_dsp(char *l3_buf, char *l2_buf)
189 printf("Before calling bli_init, malloc_size is %d.\n", malloc_size); 191 printf("Before calling bli_init, malloc_size is %d.\n", malloc_size);
190#endif 192#endif
191 193
192 bli_init(); 194 tiCblasNew();
193 195
194#ifdef TI_CBLAS_DEBUG 196#ifdef TI_CBLAS_DEBUG
195 printf("After calling bli_init, malloc_size is %d.\n", malloc_size); 197 printf("After calling bli_init, malloc_size is %d.\n", malloc_size);
196#endif 198#endif
197} 199}
198 200
199/* This function will be removed. Function tiCblasDelete() will be used instead. */ 201/*==============================================================================
202 * This function frees all memories allocated by ti_bli_init_dsp.
203 *============================================================================*/
200void ti_bli_finalize_dsp(void) 204void ti_bli_finalize_dsp(void)
201{ 205{
202 bli_finalize(); 206 tiCblasDelete();
203} 207}
208
209/* Nothing after this line */
diff --git a/blis/testsuite/parselog.pl b/blis/testsuite/parselog.pl
new file mode 100755
index 0000000..e52e2a9
--- /dev/null
+++ b/blis/testsuite/parselog.pl
@@ -0,0 +1,15 @@
1#!/usr/bin/perl -sw
2
3my $input_file = $ARGV[0];
4my $output_file = $ARGV[1];
5open( my $fh_in, '<', $input_file ) or die "Can't open $input_file: $!";
6open( my $fh_out, '>', $output_file);
7
8while ( my $line = <$fh_in> ) {
9 if ( $line =~ /blis_/ ) {
10 print $fh_out $line;
11 }
12}
13
14close $fh_in;
15close $fh_out \ No newline at end of file
diff --git a/blis/testsuite/src/test_addm.c b/blis/testsuite/src/test_addm.c
index da27a3a..cd6bd16 100644
--- a/blis/testsuite/src/test_addm.c
+++ b/blis/testsuite/src/test_addm.c
@@ -56,7 +56,7 @@ void libblis_test_addm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_addm_impl( iface_t iface, 62void libblis_test_addm_impl( iface_t iface,
@@ -115,7 +115,7 @@ void libblis_test_addm_experiment( test_params_t* params,
115 char* pc_str, 115 char* pc_str,
116 char* sc_str, 116 char* sc_str,
117 unsigned int p_cur, 117 unsigned int p_cur,
118 double* perf, 118 perf_t* perf,
119 double* resid ) 119 double* resid )
120{ 120{
121 double time_min = 1e9; 121 double time_min = 1e9;
@@ -168,8 +168,9 @@ void libblis_test_addm_experiment( test_params_t* params,
168 } 168 }
169 169
170 // Estimate the performance of the best experiment repeat. 170 // Estimate the performance of the best experiment repeat.
171 *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 171 perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
172 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 172 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
173 perf->time = time_min;
173 174
174 // Perform checks. 175 // Perform checks.
175 libblis_test_addm_check( &alpha, &beta, &x, &y, resid ); 176 libblis_test_addm_check( &alpha, &beta, &x, &y, resid );
diff --git a/blis/testsuite/src/test_addv.c b/blis/testsuite/src/test_addv.c
index b890994..afa54dc 100644
--- a/blis/testsuite/src/test_addv.c
+++ b/blis/testsuite/src/test_addv.c
@@ -56,7 +56,7 @@ void libblis_test_addv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_addv_impl( iface_t iface, 62void libblis_test_addv_impl( iface_t iface,
@@ -114,7 +114,7 @@ void libblis_test_addv_experiment( test_params_t* params,
114 char* pc_str, 114 char* pc_str,
115 char* sc_str, 115 char* sc_str,
116 unsigned int p_cur, 116 unsigned int p_cur,
117 double* perf, 117 perf_t* perf,
118 double* resid ) 118 double* resid )
119{ 119{
120 double time_min = 1e9; 120 double time_min = 1e9;
@@ -164,9 +164,10 @@ void libblis_test_addv_experiment( test_params_t* params,
164 } 164 }
165 165
166 // Estimate the performance of the best experiment repeat. 166 // Estimate the performance of the best experiment repeat.
167 *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 167 perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
168 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 168 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
169 169 perf->time = time_min;
170
170 // Perform checks. 171 // Perform checks.
171 libblis_test_addv_check( &alpha, &beta, &x, &y, resid ); 172 libblis_test_addv_check( &alpha, &beta, &x, &y, resid );
172 173
diff --git a/blis/testsuite/src/test_axpy2v.c b/blis/testsuite/src/test_axpy2v.c
index a622a1b..155e2d4 100644
--- a/blis/testsuite/src/test_axpy2v.c
+++ b/blis/testsuite/src/test_axpy2v.c
@@ -56,7 +56,7 @@ void libblis_test_axpy2v_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_axpy2v_impl( iface_t iface, 62void libblis_test_axpy2v_impl( iface_t iface,
@@ -124,7 +124,7 @@ void libblis_test_axpy2v_experiment( test_params_t* params,
124 char* pc_str, 124 char* pc_str,
125 char* sc_str, 125 char* sc_str,
126 unsigned int p_cur, 126 unsigned int p_cur,
127 double* perf, 127 perf_t* perf,
128 double* resid ) 128 double* resid )
129{ 129{
130 unsigned int n_repeats = params->n_repeats; 130 unsigned int n_repeats = params->n_repeats;
@@ -193,8 +193,9 @@ void libblis_test_axpy2v_experiment( test_params_t* params,
193 } 193 }
194 194
195 // Estimate the performance of the best experiment repeat. 195 // Estimate the performance of the best experiment repeat.
196 *perf = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 196 perf->gflops = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
197 if ( bli_obj_is_complex( z ) ) *perf *= 4.0; 197 if ( bli_obj_is_complex( z ) ) perf->gflops *= 4.0;
198 perf->time = time_min;
198 199
199 // Perform checks. 200 // Perform checks.
200 libblis_test_axpy2v_check( &alpha1, &alpha2, &x, &y, &z, &z_save, resid ); 201 libblis_test_axpy2v_check( &alpha1, &alpha2, &x, &y, &z, &z_save, resid );
diff --git a/blis/testsuite/src/test_axpyf.c b/blis/testsuite/src/test_axpyf.c
index e85defc..6968708 100644
--- a/blis/testsuite/src/test_axpyf.c
+++ b/blis/testsuite/src/test_axpyf.c
@@ -56,7 +56,7 @@ void libblis_test_axpyf_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_axpyf_impl( iface_t iface, 62void libblis_test_axpyf_impl( iface_t iface,
@@ -122,7 +122,7 @@ void libblis_test_axpyf_experiment( test_params_t* params,
122 char* pc_str, 122 char* pc_str,
123 char* sc_str, 123 char* sc_str,
124 unsigned int p_cur, 124 unsigned int p_cur,
125 double* perf, 125 perf_t* perf,
126 double* resid ) 126 double* resid )
127{ 127{
128 unsigned int n_repeats = params->n_repeats; 128 unsigned int n_repeats = params->n_repeats;
@@ -196,8 +196,9 @@ void libblis_test_axpyf_experiment( test_params_t* params,
196 } 196 }
197 197
198 // Estimate the performance of the best experiment repeat. 198 // Estimate the performance of the best experiment repeat.
199 *perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; 199 perf->gflops = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF;
200 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 200 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
201 perf->time = time_min;
201 202
202 // Perform checks. 203 // Perform checks.
203 libblis_test_axpyf_check( &alpha, &a, &x, &y, &y_save, resid ); 204 libblis_test_axpyf_check( &alpha, &a, &x, &y, &y_save, resid );
diff --git a/blis/testsuite/src/test_axpym.c b/blis/testsuite/src/test_axpym.c
index da5124b..c1d2c04 100644
--- a/blis/testsuite/src/test_axpym.c
+++ b/blis/testsuite/src/test_axpym.c
@@ -56,7 +56,7 @@ void libblis_test_axpym_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_axpym_impl( iface_t iface, 62void libblis_test_axpym_impl( iface_t iface,
@@ -120,7 +120,7 @@ void libblis_test_axpym_experiment( test_params_t* params,
120 char* pc_str, 120 char* pc_str,
121 char* sc_str, 121 char* sc_str,
122 unsigned int p_cur, 122 unsigned int p_cur,
123 double* perf, 123 perf_t* perf,
124 double* resid ) 124 double* resid )
125{ 125{
126 unsigned int n_repeats = params->n_repeats; 126 unsigned int n_repeats = params->n_repeats;
@@ -182,8 +182,9 @@ void libblis_test_axpym_experiment( test_params_t* params,
182 } 182 }
183 183
184 // Estimate the performance of the best experiment repeat. 184 // Estimate the performance of the best experiment repeat.
185 *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 185 perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
186 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 186 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
187 perf->time = time_min;
187 188
188 // Perform checks. 189 // Perform checks.
189 libblis_test_axpym_check( &alpha, &x, &y, &y_save, resid ); 190 libblis_test_axpym_check( &alpha, &x, &y, &y_save, resid );
diff --git a/blis/testsuite/src/test_axpyv.c b/blis/testsuite/src/test_axpyv.c
index ee237cf..76ff570 100644
--- a/blis/testsuite/src/test_axpyv.c
+++ b/blis/testsuite/src/test_axpyv.c
@@ -56,7 +56,7 @@ void libblis_test_axpyv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_axpyv_impl( iface_t iface, 62void libblis_test_axpyv_impl( iface_t iface,
@@ -120,7 +120,7 @@ void libblis_test_axpyv_experiment( test_params_t* params,
120 char* pc_str, 120 char* pc_str,
121 char* sc_str, 121 char* sc_str,
122 unsigned int p_cur, 122 unsigned int p_cur,
123 double* perf, 123 perf_t* perf,
124 double* resid ) 124 double* resid )
125{ 125{
126 unsigned int n_repeats = params->n_repeats; 126 unsigned int n_repeats = params->n_repeats;
@@ -233,13 +233,14 @@ void libblis_test_axpyv_experiment( test_params_t* params,
233 } 233 }
234#ifdef BLIS_ENABLE_MULTITHREAD_TEST 234#ifdef BLIS_ENABLE_MULTITHREAD_TEST
235 // Estimate the performance of the best experiment repeat. 235 // Estimate the performance of the best experiment repeat.
236 *perf = ( 2.0 * m )*test_way / time_min / FLOPS_PER_UNIT_PERF; 236 perf->gflops = ( 2.0 * m )*test_way / time_min / FLOPS_PER_UNIT_PERF;
237 if ( bli_obj_is_complex( y[0] ) ) *perf *= 4.0; 237 if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 4.0;
238#else 238#else
239 // Estimate the performance of the best experiment repeat. 239 // Estimate the performance of the best experiment repeat.
240 *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 240 perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
241 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 241 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
242#endif 242#endif
243 perf->time = time_min;
243 244
244#ifdef BLIS_ENABLE_MULTITHREAD_TEST 245#ifdef BLIS_ENABLE_MULTITHREAD_TEST
245 // Check output of each thread, and send max residue to main 246 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_copym.c b/blis/testsuite/src/test_copym.c
index 66a7bbd..3a01242 100644
--- a/blis/testsuite/src/test_copym.c
+++ b/blis/testsuite/src/test_copym.c
@@ -56,7 +56,7 @@ void libblis_test_copym_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_copym_impl( iface_t iface, 62void libblis_test_copym_impl( iface_t iface,
@@ -114,7 +114,7 @@ void libblis_test_copym_experiment( test_params_t* params,
114 char* pc_str, 114 char* pc_str,
115 char* sc_str, 115 char* sc_str,
116 unsigned int p_cur, 116 unsigned int p_cur,
117 double* perf, 117 perf_t* perf,
118 double* resid ) 118 double* resid )
119{ 119{
120 double time_min = 1e9; 120 double time_min = 1e9;
@@ -158,8 +158,9 @@ void libblis_test_copym_experiment( test_params_t* params,
158 } 158 }
159 159
160 // Estimate the performance of the best experiment repeat. 160 // Estimate the performance of the best experiment repeat.
161 *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 161 perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
162 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 162 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
163 perf->time = time_min;
163 164
164 // Perform checks. 165 // Perform checks.
165 libblis_test_copym_check( &x, &y, resid ); 166 libblis_test_copym_check( &x, &y, resid );
diff --git a/blis/testsuite/src/test_copyv.c b/blis/testsuite/src/test_copyv.c
index e854d5d..fb6e2af 100644
--- a/blis/testsuite/src/test_copyv.c
+++ b/blis/testsuite/src/test_copyv.c
@@ -56,7 +56,7 @@ void libblis_test_copyv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_copyv_impl( iface_t iface, 62void libblis_test_copyv_impl( iface_t iface,
@@ -114,7 +114,7 @@ void libblis_test_copyv_experiment( test_params_t* params,
114 char* pc_str, 114 char* pc_str,
115 char* sc_str, 115 char* sc_str,
116 unsigned int p_cur, 116 unsigned int p_cur,
117 double* perf, 117 perf_t* perf,
118 double* resid ) 118 double* resid )
119{ 119{
120 double time_min = 1e9; 120 double time_min = 1e9;
@@ -187,13 +187,14 @@ void libblis_test_copyv_experiment( test_params_t* params,
187 } 187 }
188#ifdef BLIS_ENABLE_MULTITHREAD_TEST 188#ifdef BLIS_ENABLE_MULTITHREAD_TEST
189 // Estimate the performance of the best experiment repeat. 189 // Estimate the performance of the best experiment repeat.
190 *perf = ( 1.0 * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 190 perf->gflops = ( 1.0 * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
191 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 191 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
192#else 192#else
193 // Estimate the performance of the best experiment repeat. 193 // Estimate the performance of the best experiment repeat.
194 *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 194 perf->gflops = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
195 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 195 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
196#endif 196#endif
197 perf->time = time_min;
197 198
198#ifdef BLIS_ENABLE_MULTITHREAD_TEST 199#ifdef BLIS_ENABLE_MULTITHREAD_TEST
199 // Check output of each thread, and send max residue to main 200 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_dotaxpyv.c b/blis/testsuite/src/test_dotaxpyv.c
index 4fa0fbb..dbce8de 100644
--- a/blis/testsuite/src/test_dotaxpyv.c
+++ b/blis/testsuite/src/test_dotaxpyv.c
@@ -56,7 +56,7 @@ void libblis_test_dotaxpyv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_dotaxpyv_impl( iface_t iface, 62void libblis_test_dotaxpyv_impl( iface_t iface,
@@ -126,7 +126,7 @@ void libblis_test_dotaxpyv_experiment( test_params_t* params,
126 char* pc_str, 126 char* pc_str,
127 char* sc_str, 127 char* sc_str,
128 unsigned int p_cur, 128 unsigned int p_cur,
129 double* perf, 129 perf_t* perf,
130 double* resid ) 130 double* resid )
131{ 131{
132 unsigned int n_repeats = params->n_repeats; 132 unsigned int n_repeats = params->n_repeats;
@@ -213,8 +213,9 @@ void libblis_test_dotaxpyv_experiment( test_params_t* params,
213 } 213 }
214 214
215 // Estimate the performance of the best experiment repeat. 215 // Estimate the performance of the best experiment repeat.
216 *perf = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 216 perf->gflops = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
217 if ( bli_obj_is_complex( z ) ) *perf *= 4.0; 217 if ( bli_obj_is_complex( z ) ) perf->gflops *= 4.0;
218 perf->time = time_min;
218 219
219 // Perform checks. 220 // Perform checks.
220 libblis_test_dotaxpyv_check( &alpha, &xt, &x, &y, &rho, &z, &z_save, resid ); 221 libblis_test_dotaxpyv_check( &alpha, &xt, &x, &y, &rho, &z, &z_save, resid );
diff --git a/blis/testsuite/src/test_dotv.c b/blis/testsuite/src/test_dotv.c
index 0fac9b9..f6b7ea9 100644
--- a/blis/testsuite/src/test_dotv.c
+++ b/blis/testsuite/src/test_dotv.c
@@ -56,7 +56,7 @@ void libblis_test_dotv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_dotv_impl( iface_t iface, 62void libblis_test_dotv_impl( iface_t iface,
@@ -116,7 +116,7 @@ void libblis_test_dotv_experiment( test_params_t* params,
116 char* pc_str, 116 char* pc_str,
117 char* sc_str, 117 char* sc_str,
118 unsigned int p_cur, 118 unsigned int p_cur,
119 double* perf, 119 perf_t* perf,
120 double* resid ) 120 double* resid )
121{ 121{
122 unsigned int n_repeats = params->n_repeats; 122 unsigned int n_repeats = params->n_repeats;
@@ -210,13 +210,14 @@ void libblis_test_dotv_experiment( test_params_t* params,
210 } 210 }
211#ifdef BLIS_ENABLE_MULTITHREAD_TEST 211#ifdef BLIS_ENABLE_MULTITHREAD_TEST
212 // Estimate the performance of the best experiment repeat. 212 // Estimate the performance of the best experiment repeat.
213 *perf = ( 2.0 * m ) *test_way / time_min / FLOPS_PER_UNIT_PERF; 213 perf->gflops = ( 2.0 * m ) *test_way / time_min / FLOPS_PER_UNIT_PERF;
214 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 214 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
215#else 215#else
216 // Estimate the performance of the best experiment repeat. 216 // Estimate the performance of the best experiment repeat.
217 *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 217 perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
218 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 218 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
219#endif 219#endif
220 perf->time = time_min;
220 221
221#ifdef BLIS_ENABLE_MULTITHREAD_TEST 222#ifdef BLIS_ENABLE_MULTITHREAD_TEST
222 // Check output of each thread, and send max residue to main 223 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_dotxaxpyf.c b/blis/testsuite/src/test_dotxaxpyf.c
index b436147..6a7a55e 100644
--- a/blis/testsuite/src/test_dotxaxpyf.c
+++ b/blis/testsuite/src/test_dotxaxpyf.c
@@ -56,7 +56,7 @@ void libblis_test_dotxaxpyf_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_dotxaxpyf_impl( iface_t iface, 62void libblis_test_dotxaxpyf_impl( iface_t iface,
@@ -132,7 +132,7 @@ void libblis_test_dotxaxpyf_experiment( test_params_t* params,
132 char* pc_str, 132 char* pc_str,
133 char* sc_str, 133 char* sc_str,
134 unsigned int p_cur, 134 unsigned int p_cur,
135 double* perf, 135 perf_t* perf,
136 double* resid ) 136 double* resid )
137{ 137{
138 unsigned int n_repeats = params->n_repeats; 138 unsigned int n_repeats = params->n_repeats;
@@ -225,8 +225,9 @@ void libblis_test_dotxaxpyf_experiment( test_params_t* params,
225 } 225 }
226 226
227 // Estimate the performance of the best experiment repeat. 227 // Estimate the performance of the best experiment repeat.
228 *perf = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; 228 perf->gflops = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF;
229 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 229 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
230 perf->time = time_min;
230 231
231 // Perform checks. 232 // Perform checks.
232 libblis_test_dotxaxpyf_check( &alpha, &at, &a, &w, &x, &beta, &y, &z, &y_save, &z_save, resid ); 233 libblis_test_dotxaxpyf_check( &alpha, &at, &a, &w, &x, &beta, &y, &z, &y_save, &z_save, resid );
diff --git a/blis/testsuite/src/test_dotxf.c b/blis/testsuite/src/test_dotxf.c
index d9a21c4..6167182 100644
--- a/blis/testsuite/src/test_dotxf.c
+++ b/blis/testsuite/src/test_dotxf.c
@@ -56,7 +56,7 @@ void libblis_test_dotxf_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_dotxf_impl( iface_t iface, 62void libblis_test_dotxf_impl( iface_t iface,
@@ -124,7 +124,7 @@ void libblis_test_dotxf_experiment( test_params_t* params,
124 char* pc_str, 124 char* pc_str,
125 char* sc_str, 125 char* sc_str,
126 unsigned int p_cur, 126 unsigned int p_cur,
127 double* perf, 127 perf_t* perf,
128 double* resid ) 128 double* resid )
129{ 129{
130 unsigned int n_repeats = params->n_repeats; 130 unsigned int n_repeats = params->n_repeats;
@@ -201,8 +201,9 @@ void libblis_test_dotxf_experiment( test_params_t* params,
201 } 201 }
202 202
203 // Estimate the performance of the best experiment repeat. 203 // Estimate the performance of the best experiment repeat.
204 *perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; 204 perf->gflops = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF;
205 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 205 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
206 perf->time = time_min;
206 207
207 // Perform checks. 208 // Perform checks.
208 libblis_test_dotxf_check( &alpha, &a, &x, &beta, &y, &y_save, resid ); 209 libblis_test_dotxf_check( &alpha, &a, &x, &beta, &y, &y_save, resid );
diff --git a/blis/testsuite/src/test_dotxv.c b/blis/testsuite/src/test_dotxv.c
index fc1aa0a..95f6c5e 100644
--- a/blis/testsuite/src/test_dotxv.c
+++ b/blis/testsuite/src/test_dotxv.c
@@ -56,7 +56,7 @@ void libblis_test_dotxv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_dotxv_impl( iface_t iface, 62void libblis_test_dotxv_impl( iface_t iface,
@@ -121,7 +121,7 @@ void libblis_test_dotxv_experiment( test_params_t* params,
121 char* pc_str, 121 char* pc_str,
122 char* sc_str, 122 char* sc_str,
123 unsigned int p_cur, 123 unsigned int p_cur,
124 double* perf, 124 perf_t* perf,
125 double* resid ) 125 double* resid )
126{ 126{
127 unsigned int n_repeats = params->n_repeats; 127 unsigned int n_repeats = params->n_repeats;
@@ -202,8 +202,9 @@ void libblis_test_dotxv_experiment( test_params_t* params,
202 } 202 }
203 203
204 // Estimate the performance of the best experiment repeat. 204 // Estimate the performance of the best experiment repeat.
205 *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 205 perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
206 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 206 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
207 perf->time = time_min;
207 208
208 // Perform checks. 209 // Perform checks.
209 libblis_test_dotxv_check( &alpha, &x, &y, &beta, &rho, &rho_save, resid ); 210 libblis_test_dotxv_check( &alpha, &x, &y, &beta, &rho, &rho_save, resid );
diff --git a/blis/testsuite/src/test_gemm.c b/blis/testsuite/src/test_gemm.c
index f384490..33ef0c8 100644
--- a/blis/testsuite/src/test_gemm.c
+++ b/blis/testsuite/src/test_gemm.c
@@ -55,7 +55,7 @@ void libblis_test_gemm_experiment( test_params_t* params,
55 char* pc_str, 55 char* pc_str,
56 char* sc_str, 56 char* sc_str,
57 unsigned int p_cur, 57 unsigned int p_cur,
58 double* perf, 58 perf_t* perf,
59 double* resid ); 59 double* resid );
60 60
61void libblis_test_gemm_impl( iface_t iface, 61void libblis_test_gemm_impl( iface_t iface,
@@ -126,7 +126,7 @@ void libblis_test_gemm_experiment( test_params_t* params,
126 char* pc_str, 126 char* pc_str,
127 char* sc_str, 127 char* sc_str,
128 unsigned int p_cur, 128 unsigned int p_cur,
129 double* perf, 129 perf_t* perf,
130 double* resid ) 130 double* resid )
131{ 131{
132 unsigned int n_repeats = params->n_repeats; 132 unsigned int n_repeats = params->n_repeats;
@@ -170,8 +170,11 @@ void libblis_test_gemm_experiment( test_params_t* params,
170 // Create test operands (vectors and/or matrices). 170 // Create test operands (vectors and/or matrices).
171 libblis_test_mobj_create( params, datatype, transa, 171 libblis_test_mobj_create( params, datatype, transa,
172 sc_str[0], m, k, &a ); 172 sc_str[0], m, k, &a );
173 //printf("Created object a, buffer address is 0x%x.\n", (unsigned int)bli_obj_buffer(a));
174
173 libblis_test_mobj_create( params, datatype, transb, 175 libblis_test_mobj_create( params, datatype, transb,
174 sc_str[1], k, n, &b ); 176 sc_str[1], k, n, &b );
177 //printf("Created object b, buffer address is 0x%x.\n", (unsigned int)bli_obj_buffer(b));
175 178
176#ifdef BLIS_ENABLE_MULTITHREAD_TEST 179#ifdef BLIS_ENABLE_MULTITHREAD_TEST
177 for(i = 0; i < test_way; i++) 180 for(i = 0; i < test_way; i++)
@@ -187,6 +190,8 @@ void libblis_test_gemm_experiment( test_params_t* params,
187 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, 190 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
188 sc_str[2], m, n, &c_save ); 191 sc_str[2], m, n, &c_save );
189#endif 192#endif
193 //printf("Created object c, buffer address is 0x%x.\n", (unsigned int)bli_obj_buffer(c[0]));
194 //printf("Created object c_save, buffer address is 0x%x.\n", (unsigned int)bli_obj_buffer(c_save[0]));
190 195
191 // Set alpha and beta. 196 // Set alpha and beta.
192#ifdef BLIS_ENABLE_MULTITHREAD_TEST 197#ifdef BLIS_ENABLE_MULTITHREAD_TEST
@@ -284,12 +289,13 @@ void libblis_test_gemm_experiment( test_params_t* params,
284 289
285 // Estimate the performance of the best experiment repeat. 290 // Estimate the performance of the best experiment repeat.
286#ifdef BLIS_ENABLE_MULTITHREAD_TEST 291#ifdef BLIS_ENABLE_MULTITHREAD_TEST
287 *perf = ( 2.0 * m * n * k ) / time_min * test_way / FLOPS_PER_UNIT_PERF; 292 perf->gflops = ( 2.0 * m * n * k ) / time_min * test_way / FLOPS_PER_UNIT_PERF;
288 if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; 293 if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0;
289#else 294#else
290 *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; 295 perf->gflops = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF;
291 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 296 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
292#endif 297#endif
298 perf->time = time_min;
293 299
294 // Perform checks. 300 // Perform checks.
295#ifdef BLIS_ENABLE_MULTITHREAD_TEST 301#ifdef BLIS_ENABLE_MULTITHREAD_TEST
@@ -432,7 +438,7 @@ void libblis_test_gemm_impl( iface_t iface,
432 cblas_b = (float *) bli_obj_buffer( *b ); 438 cblas_b = (float *) bli_obj_buffer( *b );
433 cblas_c = (float *) bli_obj_buffer( *c ); 439 cblas_c = (float *) bli_obj_buffer( *c );
434 440
435// printf("test_gemm %d %d %d %d %d\n", order, transA, transB, lda, ldb); 441 //printf("test_gemm %d %d %d %d %d, 0x%x, 0x%x, 0x%x\n", order, transA, transB, lda, ldb, (unsigned int)cblas_a,(unsigned int)cblas_b,(unsigned int)cblas_c);
436 cblas_sgemm(order, transA, transB, m, n, k, *cblas_alpha, cblas_a, lda, cblas_b, ldb, *cblas_beta, cblas_c, ldc); 442 cblas_sgemm(order, transA, transB, m, n, k, *cblas_alpha, cblas_a, lda, cblas_b, ldb, *cblas_beta, cblas_c, ldc);
437 443
438 } 444 }
@@ -447,6 +453,7 @@ void libblis_test_gemm_impl( iface_t iface,
447 cblas_b = (double *) bli_obj_buffer( *b ); 453 cblas_b = (double *) bli_obj_buffer( *b );
448 cblas_c = (double *) bli_obj_buffer( *c ); 454 cblas_c = (double *) bli_obj_buffer( *c );
449 455
456 //printf("test_gemm %d %d %d %d %d, 0x%x, 0x%x, 0x%x\n", order, transA, transB, lda, ldb, (unsigned int)cblas_a,(unsigned int)cblas_b,(unsigned int)cblas_c);
450 cblas_dgemm(order, transA, transB, m, n, k, *cblas_alpha, cblas_a, lda, cblas_b, ldb, *cblas_beta, cblas_c, ldc); 457 cblas_dgemm(order, transA, transB, m, n, k, *cblas_alpha, cblas_a, lda, cblas_b, ldb, *cblas_beta, cblas_c, ldc);
451 458
452 } 459 }
diff --git a/blis/testsuite/src/test_gemm_ukr.c b/blis/testsuite/src/test_gemm_ukr.c
index 5506bed..9cf8623 100644
--- a/blis/testsuite/src/test_gemm_ukr.c
+++ b/blis/testsuite/src/test_gemm_ukr.c
@@ -56,7 +56,7 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_gemm_ukr_impl( iface_t iface, 62void libblis_test_gemm_ukr_impl( iface_t iface,
@@ -131,7 +131,7 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params,
131 char* pc_str, 131 char* pc_str,
132 char* sc_str, 132 char* sc_str,
133 unsigned int p_cur, 133 unsigned int p_cur,
134 double* perf, 134 perf_t* perf,
135 double* resid ) 135 double* resid )
136{ 136{
137 unsigned int n_repeats = params->n_repeats; 137 unsigned int n_repeats = params->n_repeats;
@@ -238,8 +238,9 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params,
238 } 238 }
239 239
240 // Estimate the performance of the best experiment repeat. 240 // Estimate the performance of the best experiment repeat.
241 *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; 241 perf->gflops = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF;
242 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 242 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
243 perf->time = time_min;
243 244
244 // Perform checks. 245 // Perform checks.
245 libblis_test_gemm_ukr_check( &alpha, &a, &b, &beta, &c, &c_save, resid ); 246 libblis_test_gemm_ukr_check( &alpha, &a, &b, &beta, &c, &c_save, resid );
diff --git a/blis/testsuite/src/test_gemmtrsm_ukr.c b/blis/testsuite/src/test_gemmtrsm_ukr.c
index 87d7f1b..eab4d44 100644
--- a/blis/testsuite/src/test_gemmtrsm_ukr.c
+++ b/blis/testsuite/src/test_gemmtrsm_ukr.c
@@ -56,7 +56,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_gemmtrsm_ukr_impl( iface_t iface, 62void libblis_test_gemmtrsm_ukr_impl( iface_t iface,
@@ -143,7 +143,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params,
143 char* pc_str, 143 char* pc_str,
144 char* sc_str, 144 char* sc_str,
145 unsigned int p_cur, 145 unsigned int p_cur,
146 double* perf, 146 perf_t* perf,
147 double* resid ) 147 double* resid )
148{ 148{
149 unsigned int n_repeats = params->n_repeats; 149 unsigned int n_repeats = params->n_repeats;
@@ -289,8 +289,9 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params,
289 } 289 }
290 290
291 // Estimate the performance of the best experiment repeat. 291 // Estimate the performance of the best experiment repeat.
292 *perf = ( 2.0 * m * n * k + 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 292 perf->gflops = ( 2.0 * m * n * k + 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
293 if ( bli_obj_is_complex( b ) ) *perf *= 4.0; 293 if ( bli_obj_is_complex( b ) ) perf->gflops *= 4.0;
294 perf->time = time_min;
294 295
295 // Perform checks. 296 // Perform checks.
296 libblis_test_gemmtrsm_ukr_check( side, &alpha, 297 libblis_test_gemmtrsm_ukr_check( side, &alpha,
diff --git a/blis/testsuite/src/test_gemv.c b/blis/testsuite/src/test_gemv.c
index 7d61148..f59d1d4 100644
--- a/blis/testsuite/src/test_gemv.c
+++ b/blis/testsuite/src/test_gemv.c
@@ -56,7 +56,7 @@ void libblis_test_gemv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_gemv_impl( iface_t iface, 62void libblis_test_gemv_impl( iface_t iface,
@@ -124,7 +124,7 @@ void libblis_test_gemv_experiment( test_params_t* params,
124 char* pc_str, 124 char* pc_str,
125 char* sc_str, 125 char* sc_str,
126 unsigned int p_cur, 126 unsigned int p_cur,
127 double* perf, 127 perf_t* perf,
128 double* resid ) 128 double* resid )
129{ 129{
130 unsigned int n_repeats = params->n_repeats; 130 unsigned int n_repeats = params->n_repeats;
@@ -260,13 +260,14 @@ void libblis_test_gemv_experiment( test_params_t* params,
260 } 260 }
261#ifdef BLIS_ENABLE_MULTITHREAD_TEST 261#ifdef BLIS_ENABLE_MULTITHREAD_TEST
262 // Estimate the performance of the best experiment repeat. 262 // Estimate the performance of the best experiment repeat.
263 *perf = ( 2.0 * m * n ) * test_way/ time_min / FLOPS_PER_UNIT_PERF; 263 perf->gflops = ( 2.0 * m * n ) * test_way/ time_min / FLOPS_PER_UNIT_PERF;
264 if ( bli_obj_is_complex( y[0] ) ) *perf *= 4.0; 264 if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 4.0;
265#else 265#else
266 // Estimate the performance of the best experiment repeat. 266 // Estimate the performance of the best experiment repeat.
267 *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 267 perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
268 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 268 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
269#endif 269#endif
270 perf->time = time_min;
270 271
271#ifdef BLIS_ENABLE_MULTITHREAD_TEST 272#ifdef BLIS_ENABLE_MULTITHREAD_TEST
272 // Check output of each thread, and send max residue to main 273 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_ger.c b/blis/testsuite/src/test_ger.c
index d938533..2139260 100644
--- a/blis/testsuite/src/test_ger.c
+++ b/blis/testsuite/src/test_ger.c
@@ -56,7 +56,7 @@ void libblis_test_ger_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_ger_impl( iface_t iface, 62void libblis_test_ger_impl( iface_t iface,
@@ -122,7 +122,7 @@ void libblis_test_ger_experiment( test_params_t* params,
122 char* pc_str, 122 char* pc_str,
123 char* sc_str, 123 char* sc_str,
124 unsigned int p_cur, 124 unsigned int p_cur,
125 double* perf, 125 perf_t* perf,
126 double* resid ) 126 double* resid )
127{ 127{
128 unsigned int n_repeats = params->n_repeats; 128 unsigned int n_repeats = params->n_repeats;
@@ -254,13 +254,14 @@ void libblis_test_ger_experiment( test_params_t* params,
254 } 254 }
255#ifdef BLIS_ENABLE_MULTITHREAD_TEST 255#ifdef BLIS_ENABLE_MULTITHREAD_TEST
256 // Estimate the performance of the best experiment repeat. 256 // Estimate the performance of the best experiment repeat.
257 *perf = ( 2.0 * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 257 perf->gflops = ( 2.0 * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
258 if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0; 258 if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0;
259#else 259#else
260 // Estimate the performance of the best experiment repeat. 260 // Estimate the performance of the best experiment repeat.
261 *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 261 perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
262 if ( bli_obj_is_complex( a ) ) *perf *= 4.0; 262 if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0;
263#endif 263#endif
264 perf->time = time_min;
264 265
265 // Perform checks. 266 // Perform checks.
266#ifdef BLIS_ENABLE_MULTITHREAD_TEST 267#ifdef BLIS_ENABLE_MULTITHREAD_TEST
diff --git a/blis/testsuite/src/test_hemm.c b/blis/testsuite/src/test_hemm.c
index a77cada..1e7a528 100644
--- a/blis/testsuite/src/test_hemm.c
+++ b/blis/testsuite/src/test_hemm.c
@@ -56,7 +56,7 @@ void libblis_test_hemm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_hemm_impl( iface_t iface, 62void libblis_test_hemm_impl( iface_t iface,
@@ -130,7 +130,7 @@ void libblis_test_hemm_experiment( test_params_t* params,
130 char* pc_str, 130 char* pc_str,
131 char* sc_str, 131 char* sc_str,
132 unsigned int p_cur, 132 unsigned int p_cur,
133 double* perf, 133 perf_t* perf,
134 double* resid ) 134 double* resid )
135{ 135{
136 unsigned int n_repeats = params->n_repeats; 136 unsigned int n_repeats = params->n_repeats;
@@ -280,13 +280,14 @@ void libblis_test_hemm_experiment( test_params_t* params,
280 // Estimate the performance of the best experiment repeat. 280 // Estimate the performance of the best experiment repeat.
281 281
282#ifdef BLIS_ENABLE_MULTITHREAD_TEST 282#ifdef BLIS_ENABLE_MULTITHREAD_TEST
283 *perf = ( 2.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 283 perf->gflops = ( 2.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
284 if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; 284 if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0;
285#else 285#else
286 *perf = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 286 perf->gflops = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
287 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 287 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
288#endif 288#endif
289 289
290 perf->time = time_min;
290 291
291 // Perform checks. 292 // Perform checks.
292#ifdef BLIS_ENABLE_MULTITHREAD_TEST 293#ifdef BLIS_ENABLE_MULTITHREAD_TEST
diff --git a/blis/testsuite/src/test_hemv.c b/blis/testsuite/src/test_hemv.c
index 682761c..7b1a447 100644
--- a/blis/testsuite/src/test_hemv.c
+++ b/blis/testsuite/src/test_hemv.c
@@ -56,7 +56,7 @@ void libblis_test_hemv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_hemv_impl( iface_t iface, 62void libblis_test_hemv_impl( iface_t iface,
@@ -125,7 +125,7 @@ void libblis_test_hemv_experiment( test_params_t* params,
125 char* pc_str, 125 char* pc_str,
126 char* sc_str, 126 char* sc_str,
127 unsigned int p_cur, 127 unsigned int p_cur,
128 double* perf, 128 perf_t* perf,
129 double* resid ) 129 double* resid )
130{ 130{
131 unsigned int n_repeats = params->n_repeats; 131 unsigned int n_repeats = params->n_repeats;
@@ -280,13 +280,14 @@ void libblis_test_hemv_experiment( test_params_t* params,
280 } 280 }
281#ifdef BLIS_ENABLE_MULTITHREAD_TEST 281#ifdef BLIS_ENABLE_MULTITHREAD_TEST
282 // Estimate the performance of the best experiment repeat. 282 // Estimate the performance of the best experiment repeat.
283 *perf = ( 1.0 * m * m ) *test_way / time_min / FLOPS_PER_UNIT_PERF; 283 perf->gflops = ( 1.0 * m * m ) *test_way / time_min / FLOPS_PER_UNIT_PERF;
284 if ( bli_obj_is_complex( y[0] ) ) *perf *= 4.0; 284 if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 4.0;
285#else 285#else
286 // Estimate the performance of the best experiment repeat. 286 // Estimate the performance of the best experiment repeat.
287 *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; 287 perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
288 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 288 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
289#endif 289#endif
290 perf->time = time_min;
290 291
291#ifdef BLIS_ENABLE_MULTITHREAD_TEST 292#ifdef BLIS_ENABLE_MULTITHREAD_TEST
292 // Check output of each thread, and send max residue to main 293 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_her.c b/blis/testsuite/src/test_her.c
index 592860c..660c31f 100644
--- a/blis/testsuite/src/test_her.c
+++ b/blis/testsuite/src/test_her.c
@@ -56,7 +56,7 @@ void libblis_test_her_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_her_impl( iface_t iface, 62void libblis_test_her_impl( iface_t iface,
@@ -122,7 +122,7 @@ void libblis_test_her_experiment( test_params_t* params,
122 char* pc_str, 122 char* pc_str,
123 char* sc_str, 123 char* sc_str,
124 unsigned int p_cur, 124 unsigned int p_cur,
125 double* perf, 125 perf_t* perf,
126 double* resid ) 126 double* resid )
127{ 127{
128 unsigned int n_repeats = params->n_repeats; 128 unsigned int n_repeats = params->n_repeats;
@@ -258,13 +258,14 @@ void libblis_test_her_experiment( test_params_t* params,
258 } 258 }
259#ifdef BLIS_ENABLE_MULTITHREAD_TEST 259#ifdef BLIS_ENABLE_MULTITHREAD_TEST
260 // Estimate the performance of the best experiment repeat. 260 // Estimate the performance of the best experiment repeat.
261 *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 261 perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
262 if ( bli_obj_is_complex( a[0]) ) *perf *= 4.0; 262 if ( bli_obj_is_complex( a[0]) ) perf->gflops *= 4.0;
263#else 263#else
264 // Estimate the performance of the best experiment repeat. 264 // Estimate the performance of the best experiment repeat.
265 *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; 265 perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
266 if ( bli_obj_is_complex( a ) ) *perf *= 4.0; 266 if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0;
267#endif 267#endif
268 perf->time = time_min;
268 269
269#ifdef BLIS_ENABLE_MULTITHREAD_TEST 270#ifdef BLIS_ENABLE_MULTITHREAD_TEST
270 // Check output of each thread, and send max residue to main 271 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_her2.c b/blis/testsuite/src/test_her2.c
index 7bae1e1..c77f813 100644
--- a/blis/testsuite/src/test_her2.c
+++ b/blis/testsuite/src/test_her2.c
@@ -56,7 +56,7 @@ void libblis_test_her2_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_her2_impl( iface_t iface, 62void libblis_test_her2_impl( iface_t iface,
@@ -124,7 +124,7 @@ void libblis_test_her2_experiment( test_params_t* params,
124 char* pc_str, 124 char* pc_str,
125 char* sc_str, 125 char* sc_str,
126 unsigned int p_cur, 126 unsigned int p_cur,
127 double* perf, 127 perf_t* perf,
128 double* resid ) 128 double* resid )
129{ 129{
130 unsigned int n_repeats = params->n_repeats; 130 unsigned int n_repeats = params->n_repeats;
@@ -267,13 +267,14 @@ void libblis_test_her2_experiment( test_params_t* params,
267 } 267 }
268#ifdef BLIS_ENABLE_MULTITHREAD_TEST 268#ifdef BLIS_ENABLE_MULTITHREAD_TEST
269 // Estimate the performance of the best experiment repeat. 269 // Estimate the performance of the best experiment repeat.
270 *perf = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 270 perf->gflops = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
271 if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0; 271 if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0;
272#else 272#else
273 // Estimate the performance of the best experiment repeat. 273 // Estimate the performance of the best experiment repeat.
274 *perf = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; 274 perf->gflops = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
275 if ( bli_obj_is_complex( a ) ) *perf *= 4.0; 275 if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0;
276#endif 276#endif
277 perf->time = time_min;
277 278
278#ifdef BLIS_ENABLE_MULTITHREAD_TEST 279#ifdef BLIS_ENABLE_MULTITHREAD_TEST
279 // Check output of each thread, and send max residue to main 280 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_her2k.c b/blis/testsuite/src/test_her2k.c
index ff60f64..e921367 100644
--- a/blis/testsuite/src/test_her2k.c
+++ b/blis/testsuite/src/test_her2k.c
@@ -56,7 +56,7 @@ void libblis_test_her2k_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_her2k_impl( iface_t iface, 62void libblis_test_her2k_impl( iface_t iface,
@@ -128,7 +128,7 @@ void libblis_test_her2k_experiment( test_params_t* params,
128 char* pc_str, 128 char* pc_str,
129 char* sc_str, 129 char* sc_str,
130 unsigned int p_cur, 130 unsigned int p_cur,
131 double* perf, 131 perf_t* perf,
132 double* resid ) 132 double* resid )
133{ 133{
134 unsigned int n_repeats = params->n_repeats; 134 unsigned int n_repeats = params->n_repeats;
@@ -295,15 +295,15 @@ void libblis_test_her2k_experiment( test_params_t* params,
295 295
296#ifdef BLIS_ENABLE_MULTITHREAD_TEST 296#ifdef BLIS_ENABLE_MULTITHREAD_TEST
297 // Estimate the performance of the best experiment repeat. 297 // Estimate the performance of the best experiment repeat.
298 *perf = ( 2.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 298 perf->gflops = ( 2.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
299 if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; 299 if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0;
300#else 300#else
301 // Estimate the performance of the best experiment repeat. 301 // Estimate the performance of the best experiment repeat.
302 *perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; 302 perf->gflops = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF;
303 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 303 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
304#endif 304#endif
305 305
306 306 perf->time = time_min;
307 307
308 // Perform checks. 308 // Perform checks.
309#ifdef BLIS_ENABLE_MULTITHREAD_TEST 309#ifdef BLIS_ENABLE_MULTITHREAD_TEST
diff --git a/blis/testsuite/src/test_herk.c b/blis/testsuite/src/test_herk.c
index a2e4bfc..56517fd 100644
--- a/blis/testsuite/src/test_herk.c
+++ b/blis/testsuite/src/test_herk.c
@@ -56,7 +56,7 @@ void libblis_test_herk_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_herk_impl( iface_t iface, 62void libblis_test_herk_impl( iface_t iface,
@@ -126,7 +126,7 @@ void libblis_test_herk_experiment( test_params_t* params,
126 char* pc_str, 126 char* pc_str,
127 char* sc_str, 127 char* sc_str,
128 unsigned int p_cur, 128 unsigned int p_cur,
129 double* perf, 129 perf_t* perf,
130 double* resid ) 130 double* resid )
131{ 131{
132 unsigned int n_repeats = params->n_repeats; 132 unsigned int n_repeats = params->n_repeats;
@@ -290,13 +290,15 @@ void libblis_test_herk_experiment( test_params_t* params,
290 290
291 // Estimate the performance of the best experiment repeat. 291 // Estimate the performance of the best experiment repeat.
292#ifdef BLIS_ENABLE_MULTITHREAD_TEST 292#ifdef BLIS_ENABLE_MULTITHREAD_TEST
293 *perf = ( 1.0 * m * m * k ) * test_way/ time_min / FLOPS_PER_UNIT_PERF; 293 perf->gflops = ( 1.0 * m * m * k ) * test_way/ time_min / FLOPS_PER_UNIT_PERF;
294 if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; 294 if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0;
295#else 295#else
296 *perf = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; 296 perf->gflops = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF;
297 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 297 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
298#endif 298#endif
299 299
300 perf->time = time_min;
301
300 // Perform checks. 302 // Perform checks.
301#ifdef BLIS_ENABLE_MULTITHREAD_TEST 303#ifdef BLIS_ENABLE_MULTITHREAD_TEST
302 // Check output of each thread, and send max residue to main 304 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_libblis.c b/blis/testsuite/src/test_libblis.c
index 00249a5..3281c4b 100644
--- a/blis/testsuite/src/test_libblis.c
+++ b/blis/testsuite/src/test_libblis.c
@@ -1125,7 +1125,7 @@ void libblis_test_op_driver( test_params_t* params,
1125 char*, // pc_str (current param string) 1125 char*, // pc_str (current param string)
1126 char*, // sc_str (current storage string) 1126 char*, // sc_str (current storage string)
1127 unsigned int, // p_cur (current problem size) 1127 unsigned int, // p_cur (current problem size)
1128 double*, // perf 1128 perf_t*, // perf
1129 double* ) ) // residual 1129 double* ) ) // residual
1130{ 1130{
1131 unsigned int n_mstorage = params->n_mstorage; 1131 unsigned int n_mstorage = params->n_mstorage;
@@ -1155,7 +1155,8 @@ void libblis_test_op_driver( test_params_t* params,
1155 unsigned int p_cur, pi; 1155 unsigned int p_cur, pi;
1156 unsigned int dt, pci, sci, i, j, o; 1156 unsigned int dt, pci, sci, i, j, o;
1157 1157
1158 double perf, resid; 1158 perf_t perf;
1159 double resid;
1159 char* pass_str; 1160 char* pass_str;
1160 char blank_str[32]; 1161 char blank_str[32];
1161 char funcname_str[64]; 1162 char funcname_str[64];
@@ -1468,6 +1469,8 @@ void libblis_test_op_driver( test_params_t* params,
1468 n_spaces = MAX_FUNC_STRING_LENGTH - strlen( funcname_str ); 1469 n_spaces = MAX_FUNC_STRING_LENGTH - strlen( funcname_str );
1469 fill_string_with_n_spaces( blank_str, n_spaces ); 1470 fill_string_with_n_spaces( blank_str, n_spaces );
1470 1471
1472 strcat(funcname_str,blank_str);
1473
1471 // Print all dimensions to a single string. 1474 // Print all dimensions to a single string.
1472 libblis_test_build_dims_string( op, p_cur, dims_str ); 1475 libblis_test_build_dims_string( op, p_cur, dims_str );
1473 1476
@@ -1478,35 +1481,35 @@ void libblis_test_op_driver( test_params_t* params,
1478 if ( params->output_matlab_format ) 1481 if ( params->output_matlab_format )
1479 { 1482 {
1480 libblis_test_fprintf( stdout, 1483 libblis_test_fprintf( stdout,
1481 "%s%s( %3u, 1:%u ) = [%s %7.3lf %8.2le ]; %c %s\n", 1484 "%s( %3u, 1:%u ) = [%s %8.2le %7.3lf %8.2le ]; %c %s\n",
1482 funcname_str, blank_str, pi, n_dims_print + 2, 1485 funcname_str, pi, n_dims_print + 2,
1483 dims_str, perf, resid, 1486 dims_str, perf.time, perf.gflops, resid,
1484 OUTPUT_COMMENT_CHAR, 1487 OUTPUT_COMMENT_CHAR,
1485 pass_str ); 1488 pass_str );
1486 1489
1487 // Also output to a file if requested (and successfully opened). 1490 // Also output to a file if requested (and successfully opened).
1488 if ( output_stream ) 1491 if ( output_stream )
1489 libblis_test_fprintf( output_stream, 1492 libblis_test_fprintf( output_stream,
1490 "%s%s( %3u, 1:%u ) = [%s %7.3lf %8.2le ]; %c %s\n", 1493 "%s( %3u, 1:%u ) = [%s %8.2le %7.3lf %8.2le ]; %c %s\n",
1491 funcname_str, blank_str, pi, n_dims_print + 2, 1494 funcname_str, pi, n_dims_print + 2,
1492 dims_str, perf, resid, 1495 dims_str, perf.time, perf.gflops, resid,
1493 OUTPUT_COMMENT_CHAR, 1496 OUTPUT_COMMENT_CHAR,
1494 pass_str ); 1497 pass_str );
1495 } 1498 }
1496 else 1499 else
1497 { 1500 {
1498 libblis_test_fprintf( stdout, 1501 libblis_test_fprintf( stdout,
1499 "%s%s %s %7.3lf %8.2le %s\n", 1502 "%s %s %8.2le %7.3lf %8.2le %s\n",
1500 funcname_str, blank_str, 1503 funcname_str,
1501 dims_str, perf, resid, 1504 dims_str, perf.time, perf.gflops, resid,
1502 pass_str ); 1505 pass_str );
1503 1506
1504 // Also output to a file if requested (and successfully opened). 1507 // Also output to a file if requested (and successfully opened).
1505 if ( output_stream ) 1508 if ( output_stream )
1506 libblis_test_fprintf( output_stream, 1509 libblis_test_fprintf( output_stream,
1507 "%s%s %s %7.3lf %8.2le %s\n", 1510 "%s %s %8.2le %7.3lf %8.2le %s\n",
1508 funcname_str, blank_str, 1511 funcname_str,
1509 dims_str, perf, resid, 1512 dims_str, perf.time, perf.gflops, resid,
1510 pass_str ); 1513 pass_str );
1511 } 1514 }
1512 1515
@@ -1600,7 +1603,7 @@ void libblis_test_build_dims_string( test_op_t* op,
1600 if ( op->dimset == BLIS_TEST_DIMS_MF ) 1603 if ( op->dimset == BLIS_TEST_DIMS_MF )
1601 { 1604 {
1602 //sprintf( &dims_str[strlen(dims_str)], " %5u %5u", 1605 //sprintf( &dims_str[strlen(dims_str)], " %5u %5u",
1603 sprintf( dims_str, " %5u %5u", 1606 sprintf( dims_str, " %5u\t %5u\t",
1604 ( unsigned int ) 1607 ( unsigned int )
1605 libblis_test_get_dim_from_prob_size( op->dim_spec[0], 1608 libblis_test_get_dim_from_prob_size( op->dim_spec[0],
1606 p_cur ), 1609 p_cur ),
@@ -1609,7 +1612,7 @@ void libblis_test_build_dims_string( test_op_t* op,
1609 else if ( op->dimset == BLIS_TEST_DIMS_K ) 1612 else if ( op->dimset == BLIS_TEST_DIMS_K )
1610 { 1613 {
1611 //sprintf( &dims_str[strlen(dims_str)], " %5u %5u %5u", 1614 //sprintf( &dims_str[strlen(dims_str)], " %5u %5u %5u",
1612 sprintf( dims_str, " %5u %5u %5u", 1615 sprintf( dims_str, " %5u\t %5u\t %5u\t",
1613 ( unsigned int ) op->dim_aux[0], 1616 ( unsigned int ) op->dim_aux[0],
1614 ( unsigned int ) op->dim_aux[1], 1617 ( unsigned int ) op->dim_aux[1],
1615 ( unsigned int ) 1618 ( unsigned int )
@@ -1619,7 +1622,7 @@ void libblis_test_build_dims_string( test_op_t* op,
1619 else if ( op->dimset == BLIS_TEST_NO_DIMS ) 1622 else if ( op->dimset == BLIS_TEST_NO_DIMS )
1620 { 1623 {
1621 //sprintf( &dims_str[strlen(dims_str)], " %5u %5u", 1624 //sprintf( &dims_str[strlen(dims_str)], " %5u %5u",
1622 sprintf( dims_str, " %5u %5u", 1625 sprintf( dims_str, " %5u\t %5u\t",
1623 ( unsigned int ) op->dim_aux[0], 1626 ( unsigned int ) op->dim_aux[0],
1624 ( unsigned int ) op->dim_aux[1] ); 1627 ( unsigned int ) op->dim_aux[1] );
1625 } 1628 }
@@ -1630,13 +1633,40 @@ void libblis_test_build_dims_string( test_op_t* op,
1630 sprintf( dims_str, "%s", "" ); 1633 sprintf( dims_str, "%s", "" );
1631 1634
1632 // Print all dimensions to a single string. 1635 // Print all dimensions to a single string.
1633 for ( i = 0; i < op->n_dims; ++i ) 1636 if(op->dimset == BLIS_TEST_DIMS_MN) {
1634 { 1637 sprintf( &dims_str[strlen(dims_str)], " %5u\t",
1635 sprintf( &dims_str[strlen(dims_str)], " %5u", 1638 ( unsigned int )
1636 ( unsigned int ) 1639 libblis_test_get_dim_from_prob_size( op->dim_spec[0],
1637 libblis_test_get_dim_from_prob_size( op->dim_spec[i], 1640 p_cur ) );
1641 sprintf( &dims_str[strlen(dims_str)], " %5u\t",
1642 ( unsigned int )
1643 libblis_test_get_dim_from_prob_size( op->dim_spec[1],
1644 p_cur ) );
1645 sprintf( &dims_str[strlen(dims_str)], " \t");
1646
1647 }
1648 else if(op->dimset == BLIS_TEST_DIMS_MK) {
1649 sprintf( &dims_str[strlen(dims_str)], " %5u\t",
1650 ( unsigned int )
1651 libblis_test_get_dim_from_prob_size( op->dim_spec[0],
1652 p_cur ) );
1653 sprintf( &dims_str[strlen(dims_str)], " \t");
1654 sprintf( &dims_str[strlen(dims_str)], " %5u\t",
1655 ( unsigned int )
1656 libblis_test_get_dim_from_prob_size( op->dim_spec[1],
1657 p_cur ) );
1658 }
1659 else {
1660 for ( i = 0; i < op->n_dims; ++i )
1661 {
1662 sprintf( &dims_str[strlen(dims_str)], " %5u\t",
1663 ( unsigned int )
1664 libblis_test_get_dim_from_prob_size( op->dim_spec[i],
1638 p_cur ) ); 1665 p_cur ) );
1666 }
1639 } 1667 }
1668
1669
1640 } 1670 }
1641} 1671}
1642 1672
@@ -1665,7 +1695,7 @@ void libblis_test_build_col_labels_string( test_op_t* op, char* l_str )
1665 n_spaces = 6; 1695 n_spaces = 6;
1666 fill_string_with_n_spaces( blank_str, n_spaces ); 1696 fill_string_with_n_spaces( blank_str, n_spaces );
1667 1697
1668 sprintf( &l_str[strlen(l_str)], "%s", blank_str ); 1698 sprintf( &l_str[strlen(l_str)], "%s\t", blank_str );
1669 1699
1670 if ( op->dimset == BLIS_TEST_DIMS_MNK || 1700 if ( op->dimset == BLIS_TEST_DIMS_MNK ||
1671 op->dimset == BLIS_TEST_DIMS_MN || 1701 op->dimset == BLIS_TEST_DIMS_MN ||
@@ -1674,21 +1704,27 @@ void libblis_test_build_col_labels_string( test_op_t* op, char* l_str )
1674 op->dimset == BLIS_TEST_DIMS_K || 1704 op->dimset == BLIS_TEST_DIMS_K ||
1675 op->dimset == BLIS_TEST_DIMS_MF || 1705 op->dimset == BLIS_TEST_DIMS_MF ||
1676 op->dimset == BLIS_TEST_NO_DIMS ) 1706 op->dimset == BLIS_TEST_NO_DIMS )
1677 sprintf( &l_str[strlen(l_str)], " %5s", "m" ); 1707 sprintf( &l_str[strlen(l_str)], " %5s", "m\t" );
1708 else
1709 sprintf( &l_str[strlen(l_str)], "\t" );
1678 1710
1679 if ( op->dimset == BLIS_TEST_DIMS_MNK || 1711 if ( op->dimset == BLIS_TEST_DIMS_MNK ||
1680 op->dimset == BLIS_TEST_DIMS_MN || 1712 op->dimset == BLIS_TEST_DIMS_MN ||
1681 op->dimset == BLIS_TEST_DIMS_K || 1713 op->dimset == BLIS_TEST_DIMS_K ||
1682 op->dimset == BLIS_TEST_DIMS_MF || 1714 op->dimset == BLIS_TEST_DIMS_MF ||
1683 op->dimset == BLIS_TEST_NO_DIMS ) 1715 op->dimset == BLIS_TEST_NO_DIMS )
1684 sprintf( &l_str[strlen(l_str)], " %5s", "n" ); 1716 sprintf( &l_str[strlen(l_str)], " %5s", "n\t" );
1717 else
1718 sprintf( &l_str[strlen(l_str)], "\t" );
1685 1719
1686 if ( op->dimset == BLIS_TEST_DIMS_MNK || 1720 if ( op->dimset == BLIS_TEST_DIMS_MNK ||
1687 op->dimset == BLIS_TEST_DIMS_MK || 1721 op->dimset == BLIS_TEST_DIMS_MK ||
1688 op->dimset == BLIS_TEST_DIMS_K ) 1722 op->dimset == BLIS_TEST_DIMS_K )
1689 sprintf( &l_str[strlen(l_str)], " %5s", "k" ); 1723 sprintf( &l_str[strlen(l_str)], " %5s", "k\t" );
1724 else
1725 sprintf( &l_str[strlen(l_str)], "\t" );
1690 1726
1691 sprintf( &l_str[strlen(l_str)], "%s", " gflops resid result" ); 1727 sprintf( &l_str[strlen(l_str)], "%s", " \t seconds\t gflops\t resid\t result" );
1692} 1728}
1693 1729
1694 1730
@@ -2037,7 +2073,8 @@ void libblis_test_parse_message( FILE* output_stream, char* message, va_list arg
2037 2073
2038 // Add the final type specifier, and null-terminate the string. 2074 // Add the final type specifier, and null-terminate the string.
2039 format_spec[cf] = message[c]; 2075 format_spec[cf] = message[c];
2040 format_spec[cf+1] = '\0'; 2076 format_spec[cf+1] = '\t';
2077 format_spec[cf+2] = '\0';
2041 2078
2042 // Switch based on type, since we can't predict what will 2079 // Switch based on type, since we can't predict what will
2043 // va_args() will return. 2080 // va_args() will return.
@@ -2156,11 +2193,11 @@ void libblis_test_parse_command_line( int argc, char** argv )
2156 2193
2157 2194
2158 2195
2159void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ) 2196void libblis_test_check_empty_problem( obj_t* c, perf_t* perf, double* resid )
2160{ 2197{
2161 if ( bli_obj_has_zero_dim( *c ) ) 2198 if ( bli_obj_has_zero_dim( *c ) )
2162 { 2199 {
2163 *perf = 0.0; 2200 perf->gflops = 0.0;
2164 *resid = 0.0; 2201 *resid = 0.0;
2165 } 2202 }
2166} 2203}
diff --git a/blis/testsuite/src/test_libblis.h b/blis/testsuite/src/test_libblis.h
index 0afb19c..054bae4 100644
--- a/blis/testsuite/src/test_libblis.h
+++ b/blis/testsuite/src/test_libblis.h
@@ -71,7 +71,7 @@
71#define INPUT_BUFFER_SIZE 256 71#define INPUT_BUFFER_SIZE 256
72#define MAX_FILENAME_LENGTH 1000 72#define MAX_FILENAME_LENGTH 1000
73#define MAX_BINARY_NAME_LENGTH 256 73#define MAX_BINARY_NAME_LENGTH 256
74#define MAX_FUNC_STRING_LENGTH 26 74#define MAX_FUNC_STRING_LENGTH 37
75#define FLOPS_PER_UNIT_PERF 1e9 75#define FLOPS_PER_UNIT_PERF 1e9
76 76
77#define MAX_NUM_MSTORAGE 4 77#define MAX_NUM_MSTORAGE 4
@@ -281,6 +281,12 @@ typedef struct
281 double warnpass; 281 double warnpass;
282} thresh_t; 282} thresh_t;
283 283
284typedef struct
285{
286 double time;
287 unsigned long cycles;
288 double gflops;
289} perf_t;
284 290
285// 291//
286// --- Prototypes -------------------------------------------------------------- 292// --- Prototypes --------------------------------------------------------------
@@ -350,7 +356,7 @@ void libblis_test_op_driver( test_params_t* params,
350 char*, // pc_str (current param string) 356 char*, // pc_str (current param string)
351 char*, // sc_str (current storage string) 357 char*, // sc_str (current storage string)
352 unsigned int, // p_cur (current problem size) 358 unsigned int, // p_cur (current problem size)
353 double*, // perf 359 perf_t*, // perf
354 double* ) ); // residual 360 double* ) ); // residual
355 361
356// --- Generate experiment string labels --- 362// --- Generate experiment string labels ---
@@ -411,7 +417,7 @@ void libblis_test_parse_command_line( int argc, char** argv );
411 417
412// --- Miscellaneous --- 418// --- Miscellaneous ---
413 419
414void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); 420void libblis_test_check_empty_problem( obj_t* c, perf_t* perf, double* resid );
415 421
416 422
417// 423//
diff --git a/blis/testsuite/src/test_normfm.c b/blis/testsuite/src/test_normfm.c
index 487735c..1b08a83 100644
--- a/blis/testsuite/src/test_normfm.c
+++ b/blis/testsuite/src/test_normfm.c
@@ -56,7 +56,7 @@ void libblis_test_normfm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_normfm_impl( iface_t iface, 62void libblis_test_normfm_impl( iface_t iface,
@@ -113,7 +113,7 @@ void libblis_test_normfm_experiment( test_params_t* params,
113 char* pc_str, 113 char* pc_str,
114 char* sc_str, 114 char* sc_str,
115 unsigned int p_cur, 115 unsigned int p_cur,
116 double* perf, 116 perf_t* perf,
117 double* resid ) 117 double* resid )
118{ 118{
119 unsigned int n_repeats = params->n_repeats; 119 unsigned int n_repeats = params->n_repeats;
@@ -162,8 +162,9 @@ void libblis_test_normfm_experiment( test_params_t* params,
162 } 162 }
163 163
164 // Estimate the performance of the best experiment repeat. 164 // Estimate the performance of the best experiment repeat.
165 *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 165 perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
166 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 166 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
167 perf->time = time_min;
167 168
168 // Perform checks. 169 // Perform checks.
169 libblis_test_normfm_check( &beta, &x, &norm, resid ); 170 libblis_test_normfm_check( &beta, &x, &norm, resid );
diff --git a/blis/testsuite/src/test_normfv.c b/blis/testsuite/src/test_normfv.c
index 9b35b9f..49fb8ee 100644
--- a/blis/testsuite/src/test_normfv.c
+++ b/blis/testsuite/src/test_normfv.c
@@ -56,7 +56,7 @@ void libblis_test_normfv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_normfv_impl( iface_t iface, 62void libblis_test_normfv_impl( iface_t iface,
@@ -113,7 +113,7 @@ void libblis_test_normfv_experiment( test_params_t* params,
113 char* pc_str, 113 char* pc_str,
114 char* sc_str, 114 char* sc_str,
115 unsigned int p_cur, 115 unsigned int p_cur,
116 double* perf, 116 perf_t* perf,
117 double* resid ) 117 double* resid )
118{ 118{
119 unsigned int n_repeats = params->n_repeats; 119 unsigned int n_repeats = params->n_repeats;
@@ -191,13 +191,14 @@ void libblis_test_normfv_experiment( test_params_t* params,
191 } 191 }
192#ifdef BLIS_ENABLE_MULTITHREAD_TEST 192#ifdef BLIS_ENABLE_MULTITHREAD_TEST
193 // Estimate the performance of the best experiment repeat. 193 // Estimate the performance of the best experiment repeat.
194 *perf = ( 2.0 * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 194 perf->gflops = ( 2.0 * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
195 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 195 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
196#else 196#else
197 // Estimate the performance of the best experiment repeat. 197 // Estimate the performance of the best experiment repeat.
198 *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 198 perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
199 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 199 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
200#endif 200#endif
201 perf->time = time_min;
201 202
202#ifdef BLIS_ENABLE_MULTITHREAD_TEST 203#ifdef BLIS_ENABLE_MULTITHREAD_TEST
203 // Check output of each thread, and send max residue to main 204 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_randm.c b/blis/testsuite/src/test_randm.c
index cef126d..a5962cb 100644
--- a/blis/testsuite/src/test_randm.c
+++ b/blis/testsuite/src/test_randm.c
@@ -56,7 +56,7 @@ void libblis_test_randm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_randm_impl( iface_t iface, 62void libblis_test_randm_impl( iface_t iface,
@@ -110,7 +110,7 @@ void libblis_test_randm_experiment( test_params_t* params,
110 char* pc_str, 110 char* pc_str,
111 char* sc_str, 111 char* sc_str,
112 unsigned int p_cur, 112 unsigned int p_cur,
113 double* perf, 113 perf_t* perf,
114 double* resid ) 114 double* resid )
115{ 115{
116 unsigned int n_repeats = params->n_repeats; 116 unsigned int n_repeats = params->n_repeats;
@@ -149,8 +149,9 @@ void libblis_test_randm_experiment( test_params_t* params,
149 } 149 }
150 150
151 // Estimate the performance of the best experiment repeat. 151 // Estimate the performance of the best experiment repeat.
152 *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 152 perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
153 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 153 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
154 perf->time = time_min;
154 155
155 // Perform checks. 156 // Perform checks.
156 // For randm(), we don't return a meaningful residual/diff, since we can't 157 // For randm(), we don't return a meaningful residual/diff, since we can't
diff --git a/blis/testsuite/src/test_randv.c b/blis/testsuite/src/test_randv.c
index b74100f..1990ff2 100644
--- a/blis/testsuite/src/test_randv.c
+++ b/blis/testsuite/src/test_randv.c
@@ -56,7 +56,7 @@ void libblis_test_randv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_randv_impl( iface_t iface, 62void libblis_test_randv_impl( iface_t iface,
@@ -110,7 +110,7 @@ void libblis_test_randv_experiment( test_params_t* params,
110 char* pc_str, 110 char* pc_str,
111 char* sc_str, 111 char* sc_str,
112 unsigned int p_cur, 112 unsigned int p_cur,
113 double* perf, 113 perf_t* perf,
114 double* resid ) 114 double* resid )
115{ 115{
116 unsigned int n_repeats = params->n_repeats; 116 unsigned int n_repeats = params->n_repeats;
@@ -149,8 +149,9 @@ void libblis_test_randv_experiment( test_params_t* params,
149 } 149 }
150 150
151 // Estimate the performance of the best experiment repeat. 151 // Estimate the performance of the best experiment repeat.
152 *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 152 perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
153 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 153 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
154 perf->time = time_min;
154 155
155 // Perform checks. 156 // Perform checks.
156 // For randv(), we don't return a meaningful residual/diff, since we can't 157 // For randv(), we don't return a meaningful residual/diff, since we can't
diff --git a/blis/testsuite/src/test_scal2m.c b/blis/testsuite/src/test_scal2m.c
index 2816c8a..63d830b 100644
--- a/blis/testsuite/src/test_scal2m.c
+++ b/blis/testsuite/src/test_scal2m.c
@@ -56,7 +56,7 @@ void libblis_test_scal2m_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_scal2m_impl( iface_t iface, 62void libblis_test_scal2m_impl( iface_t iface,
@@ -119,7 +119,7 @@ void libblis_test_scal2m_experiment( test_params_t* params,
119 char* pc_str, 119 char* pc_str,
120 char* sc_str, 120 char* sc_str,
121 unsigned int p_cur, 121 unsigned int p_cur,
122 double* perf, 122 perf_t* perf,
123 double* resid ) 123 double* resid )
124{ 124{
125 unsigned int n_repeats = params->n_repeats; 125 unsigned int n_repeats = params->n_repeats;
@@ -181,8 +181,9 @@ void libblis_test_scal2m_experiment( test_params_t* params,
181 } 181 }
182 182
183 // Estimate the performance of the best experiment repeat. 183 // Estimate the performance of the best experiment repeat.
184 *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 184 perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
185 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 185 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
186 perf->time = time_min;
186 187
187 // Perform checks. 188 // Perform checks.
188 libblis_test_scal2m_check( &alpha, &x, &y, &y_save, resid ); 189 libblis_test_scal2m_check( &alpha, &x, &y, &y_save, resid );
diff --git a/blis/testsuite/src/test_scal2v.c b/blis/testsuite/src/test_scal2v.c
index 184b500..06c3a43 100644
--- a/blis/testsuite/src/test_scal2v.c
+++ b/blis/testsuite/src/test_scal2v.c
@@ -56,7 +56,7 @@ void libblis_test_scal2v_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_scal2v_impl( iface_t iface, 62void libblis_test_scal2v_impl( iface_t iface,
@@ -119,7 +119,7 @@ void libblis_test_scal2v_experiment( test_params_t* params,
119 char* pc_str, 119 char* pc_str,
120 char* sc_str, 120 char* sc_str,
121 unsigned int p_cur, 121 unsigned int p_cur,
122 double* perf, 122 perf_t* perf,
123 double* resid ) 123 double* resid )
124{ 124{
125 unsigned int n_repeats = params->n_repeats; 125 unsigned int n_repeats = params->n_repeats;
@@ -179,8 +179,9 @@ void libblis_test_scal2v_experiment( test_params_t* params,
179 } 179 }
180 180
181 // Estimate the performance of the best experiment repeat. 181 // Estimate the performance of the best experiment repeat.
182 *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 182 perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
183 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 183 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
184 perf->time = time_min;
184 185
185 // Perform checks. 186 // Perform checks.
186 libblis_test_scal2v_check( &alpha, &x, &y, &y_save, resid ); 187 libblis_test_scal2v_check( &alpha, &x, &y, &y_save, resid );
diff --git a/blis/testsuite/src/test_scalm.c b/blis/testsuite/src/test_scalm.c
index 1c08b87..d0ce2a1 100644
--- a/blis/testsuite/src/test_scalm.c
+++ b/blis/testsuite/src/test_scalm.c
@@ -56,7 +56,7 @@ void libblis_test_scalm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_scalm_impl( iface_t iface, 62void libblis_test_scalm_impl( iface_t iface,
@@ -115,7 +115,7 @@ void libblis_test_scalm_experiment( test_params_t* params,
115 char* pc_str, 115 char* pc_str,
116 char* sc_str, 116 char* sc_str,
117 unsigned int p_cur, 117 unsigned int p_cur,
118 double* perf, 118 perf_t* perf,
119 double* resid ) 119 double* resid )
120{ 120{
121 unsigned int n_repeats = params->n_repeats; 121 unsigned int n_repeats = params->n_repeats;
@@ -175,8 +175,9 @@ void libblis_test_scalm_experiment( test_params_t* params,
175 } 175 }
176 176
177 // Estimate the performance of the best experiment repeat. 177 // Estimate the performance of the best experiment repeat.
178 *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 178 perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
179 if ( bli_obj_is_complex( y ) ) *perf *= 6.0; 179 if ( bli_obj_is_complex( y ) ) perf->gflops *= 6.0;
180 perf->time = time_min;
180 181
181 // Perform checks. 182 // Perform checks.
182 libblis_test_scalm_check( &beta, &y, &y_save, resid ); 183 libblis_test_scalm_check( &beta, &y, &y_save, resid );
diff --git a/blis/testsuite/src/test_scalv.c b/blis/testsuite/src/test_scalv.c
index e4559e2..eceae19 100644
--- a/blis/testsuite/src/test_scalv.c
+++ b/blis/testsuite/src/test_scalv.c
@@ -56,7 +56,7 @@ void libblis_test_scalv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_scalv_impl( iface_t iface, 62void libblis_test_scalv_impl( iface_t iface,
@@ -116,7 +116,7 @@ void libblis_test_scalv_experiment( test_params_t* params,
116 char* pc_str, 116 char* pc_str,
117 char* sc_str, 117 char* sc_str,
118 unsigned int p_cur, 118 unsigned int p_cur,
119 double* perf, 119 perf_t* perf,
120 double* resid ) 120 double* resid )
121{ 121{
122 unsigned int n_repeats = params->n_repeats; 122 unsigned int n_repeats = params->n_repeats;
@@ -230,13 +230,14 @@ void libblis_test_scalv_experiment( test_params_t* params,
230 } 230 }
231#ifdef BLIS_ENABLE_MULTITHREAD_TEST 231#ifdef BLIS_ENABLE_MULTITHREAD_TEST
232 // Estimate the performance of the best experiment repeat. 232 // Estimate the performance of the best experiment repeat.
233 *perf = ( 1.0 * m ) *test_way/ time_min / FLOPS_PER_UNIT_PERF; 233 perf->gflops = ( 1.0 * m ) *test_way/ time_min / FLOPS_PER_UNIT_PERF;
234 if ( bli_obj_is_complex( y[0] ) ) *perf *= 6.0; 234 if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 6.0;
235#else 235#else
236 // Estimate the performance of the best experiment repeat. 236 // Estimate the performance of the best experiment repeat.
237 *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 237 perf->gflops = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
238 if ( bli_obj_is_complex( y ) ) *perf *= 6.0; 238 if ( bli_obj_is_complex( y ) ) perf->gflops *= 6.0;
239#endif 239#endif
240 perf->time = time_min;
240 241
241#ifdef BLIS_ENABLE_MULTITHREAD_TEST 242#ifdef BLIS_ENABLE_MULTITHREAD_TEST
242 // Check output of each thread, and send max residue to main 243 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_setm.c b/blis/testsuite/src/test_setm.c
index 316e8ed..6a71f37 100644
--- a/blis/testsuite/src/test_setm.c
+++ b/blis/testsuite/src/test_setm.c
@@ -56,7 +56,7 @@ void libblis_test_setm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_setm_impl( iface_t iface, 62void libblis_test_setm_impl( iface_t iface,
@@ -112,7 +112,7 @@ void libblis_test_setm_experiment( test_params_t* params,
112 char* pc_str, 112 char* pc_str,
113 char* sc_str, 113 char* sc_str,
114 unsigned int p_cur, 114 unsigned int p_cur,
115 double* perf, 115 perf_t* perf,
116 double* resid ) 116 double* resid )
117{ 117{
118 unsigned int n_repeats = params->n_repeats; 118 unsigned int n_repeats = params->n_repeats;
@@ -158,8 +158,9 @@ void libblis_test_setm_experiment( test_params_t* params,
158 } 158 }
159 159
160 // Estimate the performance of the best experiment repeat. 160 // Estimate the performance of the best experiment repeat.
161 *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 161 perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
162 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 162 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
163 perf->time = time_min;
163 164
164 // Perform checks. 165 // Perform checks.
165 libblis_test_setm_check( &beta, &x, resid ); 166 libblis_test_setm_check( &beta, &x, resid );
diff --git a/blis/testsuite/src/test_setv.c b/blis/testsuite/src/test_setv.c
index dbf7023..e75d9b2 100644
--- a/blis/testsuite/src/test_setv.c
+++ b/blis/testsuite/src/test_setv.c
@@ -56,7 +56,7 @@ void libblis_test_setv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_setv_impl( iface_t iface, 62void libblis_test_setv_impl( iface_t iface,
@@ -112,7 +112,7 @@ void libblis_test_setv_experiment( test_params_t* params,
112 char* pc_str, 112 char* pc_str,
113 char* sc_str, 113 char* sc_str,
114 unsigned int p_cur, 114 unsigned int p_cur,
115 double* perf, 115 perf_t* perf,
116 double* resid ) 116 double* resid )
117{ 117{
118 unsigned int n_repeats = params->n_repeats; 118 unsigned int n_repeats = params->n_repeats;
@@ -156,8 +156,9 @@ void libblis_test_setv_experiment( test_params_t* params,
156 } 156 }
157 157
158 // Estimate the performance of the best experiment repeat. 158 // Estimate the performance of the best experiment repeat.
159 *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 159 perf->gflops = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
160 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 160 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
161 perf->time = time_min;
161 162
162 // Perform checks. 163 // Perform checks.
163 libblis_test_setv_check( &beta, &x, resid ); 164 libblis_test_setv_check( &beta, &x, resid );
diff --git a/blis/testsuite/src/test_subm.c b/blis/testsuite/src/test_subm.c
index eca95e0..b662bbf 100644
--- a/blis/testsuite/src/test_subm.c
+++ b/blis/testsuite/src/test_subm.c
@@ -56,7 +56,7 @@ void libblis_test_subm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_subm_impl( iface_t iface, 62void libblis_test_subm_impl( iface_t iface,
@@ -115,7 +115,7 @@ void libblis_test_subm_experiment( test_params_t* params,
115 char* pc_str, 115 char* pc_str,
116 char* sc_str, 116 char* sc_str,
117 unsigned int p_cur, 117 unsigned int p_cur,
118 double* perf, 118 perf_t* perf,
119 double* resid ) 119 double* resid )
120{ 120{
121 double time_min = 1e9; 121 double time_min = 1e9;
@@ -168,8 +168,9 @@ void libblis_test_subm_experiment( test_params_t* params,
168 } 168 }
169 169
170 // Estimate the performance of the best experiment repeat. 170 // Estimate the performance of the best experiment repeat.
171 *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 171 perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
172 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 172 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
173 perf->time = time_min;
173 174
174 // Perform checks. 175 // Perform checks.
175 libblis_test_subm_check( &alpha, &beta, &x, &y, resid ); 176 libblis_test_subm_check( &alpha, &beta, &x, &y, resid );
diff --git a/blis/testsuite/src/test_subv.c b/blis/testsuite/src/test_subv.c
index 2b7f8c3..633e6e7 100644
--- a/blis/testsuite/src/test_subv.c
+++ b/blis/testsuite/src/test_subv.c
@@ -56,7 +56,7 @@ void libblis_test_subv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_subv_impl( iface_t iface, 62void libblis_test_subv_impl( iface_t iface,
@@ -115,7 +115,7 @@ void libblis_test_subv_experiment( test_params_t* params,
115 char* pc_str, 115 char* pc_str,
116 char* sc_str, 116 char* sc_str,
117 unsigned int p_cur, 117 unsigned int p_cur,
118 double* perf, 118 perf_t* perf,
119 double* resid ) 119 double* resid )
120{ 120{
121 double time_min = 1e9; 121 double time_min = 1e9;
@@ -165,8 +165,9 @@ void libblis_test_subv_experiment( test_params_t* params,
165 } 165 }
166 166
167 // Estimate the performance of the best experiment repeat. 167 // Estimate the performance of the best experiment repeat.
168 *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; 168 perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF;
169 if ( bli_obj_is_complex( x ) ) *perf *= 2.0; 169 if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0;
170 perf->time = time_min;
170 171
171 // Perform checks. 172 // Perform checks.
172 libblis_test_subv_check( &alpha, &beta, &x, &y, resid ); 173 libblis_test_subv_check( &alpha, &beta, &x, &y, resid );
diff --git a/blis/testsuite/src/test_symm.c b/blis/testsuite/src/test_symm.c
index 165e64e..04e6dc7 100644
--- a/blis/testsuite/src/test_symm.c
+++ b/blis/testsuite/src/test_symm.c
@@ -56,7 +56,7 @@ void libblis_test_symm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_symm_impl( iface_t iface, 62void libblis_test_symm_impl( iface_t iface,
@@ -130,7 +130,7 @@ void libblis_test_symm_experiment( test_params_t* params,
130 char* pc_str, 130 char* pc_str,
131 char* sc_str, 131 char* sc_str,
132 unsigned int p_cur, 132 unsigned int p_cur,
133 double* perf, 133 perf_t* perf,
134 double* resid ) 134 double* resid )
135{ 135{
136 unsigned int n_repeats = params->n_repeats; 136 unsigned int n_repeats = params->n_repeats;
@@ -281,13 +281,15 @@ void libblis_test_symm_experiment( test_params_t* params,
281 // Estimate the performance of the best experiment repeat. 281 // Estimate the performance of the best experiment repeat.
282 282
283#ifdef BLIS_ENABLE_MULTITHREAD_TEST 283#ifdef BLIS_ENABLE_MULTITHREAD_TEST
284 *perf = ( 2.0 * mn_side * m * n ) *test_way / time_min / FLOPS_PER_UNIT_PERF; 284 perf->gflops = ( 2.0 * mn_side * m * n ) *test_way / time_min / FLOPS_PER_UNIT_PERF;
285 if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; 285 if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0;
286#else 286#else
287 *perf = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 287 perf->gflops = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
288 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 288 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
289#endif 289#endif
290 290
291 perf->time = time_min;
292
291 // Perform checks. 293 // Perform checks.
292#ifdef BLIS_ENABLE_MULTITHREAD_TEST 294#ifdef BLIS_ENABLE_MULTITHREAD_TEST
293 // Check output of each thread, and send max residue to main 295 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_symv.c b/blis/testsuite/src/test_symv.c
index 89ec05d..b2798a7 100644
--- a/blis/testsuite/src/test_symv.c
+++ b/blis/testsuite/src/test_symv.c
@@ -56,7 +56,7 @@ void libblis_test_symv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_symv_impl( iface_t iface, 62void libblis_test_symv_impl( iface_t iface,
@@ -125,7 +125,7 @@ void libblis_test_symv_experiment( test_params_t* params,
125 char* pc_str, 125 char* pc_str,
126 char* sc_str, 126 char* sc_str,
127 unsigned int p_cur, 127 unsigned int p_cur,
128 double* perf, 128 perf_t* perf,
129 double* resid ) 129 double* resid )
130{ 130{
131 unsigned int n_repeats = params->n_repeats; 131 unsigned int n_repeats = params->n_repeats;
@@ -283,13 +283,14 @@ void libblis_test_symv_experiment( test_params_t* params,
283 } 283 }
284#ifdef BLIS_ENABLE_MULTITHREAD_TEST 284#ifdef BLIS_ENABLE_MULTITHREAD_TEST
285 // Estimate the performance of the best experiment repeat. 285 // Estimate the performance of the best experiment repeat.
286 *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 286 perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
287 if ( bli_obj_is_complex( y[0] ) ) *perf *= 4.0; 287 if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 4.0;
288#else 288#else
289 // Estimate the performance of the best experiment repeat. 289 // Estimate the performance of the best experiment repeat.
290 *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; 290 perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
291 if ( bli_obj_is_complex( y ) ) *perf *= 4.0; 291 if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0;
292#endif 292#endif
293 perf->time = time_min;
293 294
294#ifdef BLIS_ENABLE_MULTITHREAD_TEST 295#ifdef BLIS_ENABLE_MULTITHREAD_TEST
295 // Check output of each thread, and send max residue to main 296 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_syr.c b/blis/testsuite/src/test_syr.c
index 27b4c09..782dada 100644
--- a/blis/testsuite/src/test_syr.c
+++ b/blis/testsuite/src/test_syr.c
@@ -56,7 +56,7 @@ void libblis_test_syr_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_syr_impl( iface_t iface, 62void libblis_test_syr_impl( iface_t iface,
@@ -122,7 +122,7 @@ void libblis_test_syr_experiment( test_params_t* params,
122 char* pc_str, 122 char* pc_str,
123 char* sc_str, 123 char* sc_str,
124 unsigned int p_cur, 124 unsigned int p_cur,
125 double* perf, 125 perf_t* perf,
126 double* resid ) 126 double* resid )
127{ 127{
128 unsigned int n_repeats = params->n_repeats; 128 unsigned int n_repeats = params->n_repeats;
@@ -258,13 +258,14 @@ void libblis_test_syr_experiment( test_params_t* params,
258 } 258 }
259#ifdef BLIS_ENABLE_MULTITHREAD_TEST 259#ifdef BLIS_ENABLE_MULTITHREAD_TEST
260 // Estimate the performance of the best experiment repeat. 260 // Estimate the performance of the best experiment repeat.
261 *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 261 perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
262 if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0; 262 if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0;
263#else 263#else
264 // Estimate the performance of the best experiment repeat. 264 // Estimate the performance of the best experiment repeat.
265 *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; 265 perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
266 if ( bli_obj_is_complex( a ) ) *perf *= 4.0; 266 if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0;
267#endif 267#endif
268 perf->time = time_min;
268 269
269#ifdef BLIS_ENABLE_MULTITHREAD_TEST 270#ifdef BLIS_ENABLE_MULTITHREAD_TEST
270 // Check output of each thread, and send max residue to main 271 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_syr2.c b/blis/testsuite/src/test_syr2.c
index 5488bb4..45c2c79 100644
--- a/blis/testsuite/src/test_syr2.c
+++ b/blis/testsuite/src/test_syr2.c
@@ -56,7 +56,7 @@ void libblis_test_syr2_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_syr2_impl( iface_t iface, 62void libblis_test_syr2_impl( iface_t iface,
@@ -124,7 +124,7 @@ void libblis_test_syr2_experiment( test_params_t* params,
124 char* pc_str, 124 char* pc_str,
125 char* sc_str, 125 char* sc_str,
126 unsigned int p_cur, 126 unsigned int p_cur,
127 double* perf, 127 perf_t* perf,
128 double* resid ) 128 double* resid )
129{ 129{
130 unsigned int n_repeats = params->n_repeats; 130 unsigned int n_repeats = params->n_repeats;
@@ -266,13 +266,14 @@ void libblis_test_syr2_experiment( test_params_t* params,
266 } 266 }
267#ifdef BLIS_ENABLE_MULTITHREAD_TEST 267#ifdef BLIS_ENABLE_MULTITHREAD_TEST
268 // Estimate the performance of the best experiment repeat. 268 // Estimate the performance of the best experiment repeat.
269 *perf = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 269 perf->gflops = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
270 if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0; 270 if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0;
271#else 271#else
272 // Estimate the performance of the best experiment repeat. 272 // Estimate the performance of the best experiment repeat.
273 *perf = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; 273 perf->gflops = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
274 if ( bli_obj_is_complex( a ) ) *perf *= 4.0; 274 if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0;
275#endif 275#endif
276 perf->time = time_min;
276 277
277#ifdef BLIS_ENABLE_MULTITHREAD_TEST 278#ifdef BLIS_ENABLE_MULTITHREAD_TEST
278 // Check output of each thread, and send max residue to main 279 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_syr2k.c b/blis/testsuite/src/test_syr2k.c
index d590031..c0a0201 100644
--- a/blis/testsuite/src/test_syr2k.c
+++ b/blis/testsuite/src/test_syr2k.c
@@ -56,7 +56,7 @@ void libblis_test_syr2k_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_syr2k_impl( iface_t iface, 62void libblis_test_syr2k_impl( iface_t iface,
@@ -128,7 +128,7 @@ void libblis_test_syr2k_experiment( test_params_t* params,
128 char* pc_str, 128 char* pc_str,
129 char* sc_str, 129 char* sc_str,
130 unsigned int p_cur, 130 unsigned int p_cur,
131 double* perf, 131 perf_t* perf,
132 double* resid ) 132 double* resid )
133{ 133{
134 unsigned int n_repeats = params->n_repeats; 134 unsigned int n_repeats = params->n_repeats;
@@ -296,13 +296,15 @@ void libblis_test_syr2k_experiment( test_params_t* params,
296 296
297 // Estimate the performance of the best experiment repeat. 297 // Estimate the performance of the best experiment repeat.
298#ifdef BLIS_ENABLE_MULTITHREAD_TEST 298#ifdef BLIS_ENABLE_MULTITHREAD_TEST
299 *perf = ( 2.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 299 perf->gflops = ( 2.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
300 if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; 300 if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0;
301#else 301#else
302 *perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; 302 perf->gflops = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF;
303 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 303 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
304#endif 304#endif
305 305
306 perf->time = time_min;
307
306#ifdef BLIS_ENABLE_MULTITHREAD_TEST 308#ifdef BLIS_ENABLE_MULTITHREAD_TEST
307 // Check output of each thread, and send max residue to main 309 // Check output of each thread, and send max residue to main
308 for(i = 0; i < test_way; i++) 310 for(i = 0; i < test_way; i++)
diff --git a/blis/testsuite/src/test_syrk.c b/blis/testsuite/src/test_syrk.c
index 8e072ce..64473fd 100644
--- a/blis/testsuite/src/test_syrk.c
+++ b/blis/testsuite/src/test_syrk.c
@@ -56,7 +56,7 @@ void libblis_test_syrk_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_syrk_impl( iface_t iface, 62void libblis_test_syrk_impl( iface_t iface,
@@ -126,7 +126,7 @@ void libblis_test_syrk_experiment( test_params_t* params,
126 char* pc_str, 126 char* pc_str,
127 char* sc_str, 127 char* sc_str,
128 unsigned int p_cur, 128 unsigned int p_cur,
129 double* perf, 129 perf_t* perf,
130 double* resid ) 130 double* resid )
131{ 131{
132 unsigned int n_repeats = params->n_repeats; 132 unsigned int n_repeats = params->n_repeats;
@@ -287,14 +287,15 @@ void libblis_test_syrk_experiment( test_params_t* params,
287 287
288#ifdef BLIS_ENABLE_MULTITHREAD_TEST 288#ifdef BLIS_ENABLE_MULTITHREAD_TEST
289 // Estimate the performance of the best experiment repeat. 289 // Estimate the performance of the best experiment repeat.
290 *perf = ( 1.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 290 perf->gflops = ( 1.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
291 if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; 291 if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0;
292#else 292#else
293 // Estimate the performance of the best experiment repeat. 293 // Estimate the performance of the best experiment repeat.
294 *perf = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; 294 perf->gflops = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF;
295 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 295 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
296#endif 296#endif
297 297
298 perf->time = time_min;
298 299
299 // Perform checks. 300 // Perform checks.
300#ifdef BLIS_ENABLE_MULTITHREAD_TEST 301#ifdef BLIS_ENABLE_MULTITHREAD_TEST
diff --git a/blis/testsuite/src/test_trmm.c b/blis/testsuite/src/test_trmm.c
index 8e59ac8..1c294e1 100644
--- a/blis/testsuite/src/test_trmm.c
+++ b/blis/testsuite/src/test_trmm.c
@@ -56,7 +56,7 @@ void libblis_test_trmm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_trmm_impl( iface_t iface, 62void libblis_test_trmm_impl( iface_t iface,
@@ -126,7 +126,7 @@ void libblis_test_trmm_experiment( test_params_t* params,
126 char* pc_str, 126 char* pc_str,
127 char* sc_str, 127 char* sc_str,
128 unsigned int p_cur, 128 unsigned int p_cur,
129 double* perf, 129 perf_t* perf,
130 double* resid ) 130 double* resid )
131{ 131{
132 unsigned int n_repeats = params->n_repeats; 132 unsigned int n_repeats = params->n_repeats;
@@ -279,14 +279,16 @@ void libblis_test_trmm_experiment( test_params_t* params,
279 } 279 }
280#ifdef BLIS_ENABLE_MULTITHREAD_TEST 280#ifdef BLIS_ENABLE_MULTITHREAD_TEST
281 // Estimate the performance of the best experiment repeat. 281 // Estimate the performance of the best experiment repeat.
282 *perf = ( 1.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 282 perf->gflops = ( 1.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
283 if ( bli_obj_is_complex( b[0] ) ) *perf *= 4.0; 283 if ( bli_obj_is_complex( b[0] ) ) perf->gflops *= 4.0;
284#else 284#else
285 // Estimate the performance of the best experiment repeat. 285 // Estimate the performance of the best experiment repeat.
286 *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 286 perf->gflops = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
287 if ( bli_obj_is_complex( b ) ) *perf *= 4.0; 287 if ( bli_obj_is_complex( b ) ) perf->gflops *= 4.0;
288#endif 288#endif
289 289
290 perf->time = time_min;
291
290#ifdef BLIS_ENABLE_MULTITHREAD_TEST 292#ifdef BLIS_ENABLE_MULTITHREAD_TEST
291 // Check output of each thread, and send max residue to main 293 // Check output of each thread, and send max residue to main
292 for(i = 0; i < test_way; i++) 294 for(i = 0; i < test_way; i++)
diff --git a/blis/testsuite/src/test_trmm3.c b/blis/testsuite/src/test_trmm3.c
index a36069b..446ebcf 100644
--- a/blis/testsuite/src/test_trmm3.c
+++ b/blis/testsuite/src/test_trmm3.c
@@ -56,7 +56,7 @@ void libblis_test_trmm3_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_trmm3_impl( iface_t iface, 62void libblis_test_trmm3_impl( iface_t iface,
@@ -130,7 +130,7 @@ void libblis_test_trmm3_experiment( test_params_t* params,
130 char* pc_str, 130 char* pc_str,
131 char* sc_str, 131 char* sc_str,
132 unsigned int p_cur, 132 unsigned int p_cur,
133 double* perf, 133 perf_t* perf,
134 double* resid ) 134 double* resid )
135{ 135{
136 unsigned int n_repeats = params->n_repeats; 136 unsigned int n_repeats = params->n_repeats;
@@ -227,8 +227,9 @@ void libblis_test_trmm3_experiment( test_params_t* params,
227 } 227 }
228 228
229 // Estimate the performance of the best experiment repeat. 229 // Estimate the performance of the best experiment repeat.
230 *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 230 perf->gflops = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
231 if ( bli_obj_is_complex( c ) ) *perf *= 4.0; 231 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
232 perf->time = time_min;
232 233
233 // Perform checks. 234 // Perform checks.
234 libblis_test_trmm3_check( side, &alpha, &a, &b, &beta, &c, &c_save, resid ); 235 libblis_test_trmm3_check( side, &alpha, &a, &b, &beta, &c, &c_save, resid );
diff --git a/blis/testsuite/src/test_trmv.c b/blis/testsuite/src/test_trmv.c
index 4147a16..b8fb0f0 100644
--- a/blis/testsuite/src/test_trmv.c
+++ b/blis/testsuite/src/test_trmv.c
@@ -56,7 +56,7 @@ void libblis_test_trmv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_trmv_impl( iface_t iface, 62void libblis_test_trmv_impl( iface_t iface,
@@ -121,7 +121,7 @@ void libblis_test_trmv_experiment( test_params_t* params,
121 char* pc_str, 121 char* pc_str,
122 char* sc_str, 122 char* sc_str,
123 unsigned int p_cur, 123 unsigned int p_cur,
124 double* perf, 124 perf_t* perf,
125 double* resid ) 125 double* resid )
126{ 126{
127 unsigned int n_repeats = params->n_repeats; 127 unsigned int n_repeats = params->n_repeats;
@@ -269,13 +269,14 @@ void libblis_test_trmv_experiment( test_params_t* params,
269 } 269 }
270#ifdef BLIS_ENABLE_MULTITHREAD_TEST 270#ifdef BLIS_ENABLE_MULTITHREAD_TEST
271 // Estimate the performance of the best experiment repeat. 271 // Estimate the performance of the best experiment repeat.
272 *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 272 perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
273 if ( bli_obj_is_complex( x[0] ) ) *perf *= 4.0; 273 if ( bli_obj_is_complex( x[0] ) ) perf->gflops *= 4.0;
274#else 274#else
275 // Estimate the performance of the best experiment repeat. 275 // Estimate the performance of the best experiment repeat.
276 *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; 276 perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
277 if ( bli_obj_is_complex( x ) ) *perf *= 4.0; 277 if ( bli_obj_is_complex( x ) ) perf->gflops *= 4.0;
278#endif 278#endif
279 perf->time = time_min;
279 280
280#ifdef BLIS_ENABLE_MULTITHREAD_TEST 281#ifdef BLIS_ENABLE_MULTITHREAD_TEST
281 // Check output of each thread, and send max residue to main 282 // Check output of each thread, and send max residue to main
diff --git a/blis/testsuite/src/test_trsm.c b/blis/testsuite/src/test_trsm.c
index 75cb9cc..9658563 100644
--- a/blis/testsuite/src/test_trsm.c
+++ b/blis/testsuite/src/test_trsm.c
@@ -56,7 +56,7 @@ void libblis_test_trsm_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_trsm_impl( iface_t iface, 62void libblis_test_trsm_impl( iface_t iface,
@@ -126,7 +126,7 @@ void libblis_test_trsm_experiment( test_params_t* params,
126 char* pc_str, 126 char* pc_str,
127 char* sc_str, 127 char* sc_str,
128 unsigned int p_cur, 128 unsigned int p_cur,
129 double* perf, 129 perf_t* perf,
130 double* resid ) 130 double* resid )
131{ 131{
132 unsigned int n_repeats = params->n_repeats; 132 unsigned int n_repeats = params->n_repeats;
@@ -277,14 +277,16 @@ void libblis_test_trsm_experiment( test_params_t* params,
277 } 277 }
278#ifdef BLIS_ENABLE_MULTITHREAD_TEST 278#ifdef BLIS_ENABLE_MULTITHREAD_TEST
279 // Estimate the performance of the best experiment repeat. 279 // Estimate the performance of the best experiment repeat.
280 *perf = ( 1.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 280 perf->gflops = ( 1.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
281 if ( bli_obj_is_complex( b[0] ) ) *perf *= 4.0; 281 if ( bli_obj_is_complex( b[0] ) ) perf->gflops *= 4.0;
282#else 282#else
283 // Estimate the performance of the best experiment repeat. 283 // Estimate the performance of the best experiment repeat.
284 *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 284 perf->gflops = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
285 if ( bli_obj_is_complex( b ) ) *perf *= 4.0; 285 if ( bli_obj_is_complex( b ) ) perf->gflops *= 4.0;
286#endif 286#endif
287 287
288 perf->time = time_min;
289
288#ifdef BLIS_ENABLE_MULTITHREAD_TEST 290#ifdef BLIS_ENABLE_MULTITHREAD_TEST
289 // Check output of each thread, and send max residue to main 291 // Check output of each thread, and send max residue to main
290 for(i = 0; i < test_way; i++) 292 for(i = 0; i < test_way; i++)
diff --git a/blis/testsuite/src/test_trsm_ukr.c b/blis/testsuite/src/test_trsm_ukr.c
index 8d12d44..0740bfc 100644
--- a/blis/testsuite/src/test_trsm_ukr.c
+++ b/blis/testsuite/src/test_trsm_ukr.c
@@ -56,7 +56,7 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_trsm_ukr_impl( iface_t iface, 62void libblis_test_trsm_ukr_impl( iface_t iface,
@@ -129,7 +129,7 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params,
129 char* pc_str, 129 char* pc_str,
130 char* sc_str, 130 char* sc_str,
131 unsigned int p_cur, 131 unsigned int p_cur,
132 double* perf, 132 perf_t* perf,
133 double* resid ) 133 double* resid )
134{ 134{
135 unsigned int n_repeats = params->n_repeats; 135 unsigned int n_repeats = params->n_repeats;
@@ -241,8 +241,9 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params,
241 } 241 }
242 242
243 // Estimate the performance of the best experiment repeat. 243 // Estimate the performance of the best experiment repeat.
244 *perf = ( 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; 244 perf->gflops = ( 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
245 if ( bli_obj_is_complex( b ) ) *perf *= 4.0; 245 if ( bli_obj_is_complex( b ) ) perf->gflops *= 4.0;
246 perf->time = time_min;
246 247
247 // Perform checks. 248 // Perform checks.
248 libblis_test_trsm_ukr_check( side, &a, &c, &b, resid ); 249 libblis_test_trsm_ukr_check( side, &a, &c, &b, resid );
diff --git a/blis/testsuite/src/test_trsv.c b/blis/testsuite/src/test_trsv.c
index ccd1a79..f2e4371 100644
--- a/blis/testsuite/src/test_trsv.c
+++ b/blis/testsuite/src/test_trsv.c
@@ -56,7 +56,7 @@ void libblis_test_trsv_experiment( test_params_t* params,
56 char* pc_str, 56 char* pc_str,
57 char* sc_str, 57 char* sc_str,
58 unsigned int p_cur, 58 unsigned int p_cur,
59 double* perf, 59 perf_t* perf,
60 double* resid ); 60 double* resid );
61 61
62void libblis_test_trsv_impl( iface_t iface, 62void libblis_test_trsv_impl( iface_t iface,
@@ -121,7 +121,7 @@ void libblis_test_trsv_experiment( test_params_t* params,
121 char* pc_str, 121 char* pc_str,
122 char* sc_str, 122 char* sc_str,
123 unsigned int p_cur, 123 unsigned int p_cur,
124 double* perf, 124 perf_t* perf,
125 double* resid ) 125 double* resid )
126{ 126{
127 unsigned int n_repeats = params->n_repeats; 127 unsigned int n_repeats = params->n_repeats;
@@ -264,13 +264,14 @@ void libblis_test_trsv_experiment( test_params_t* params,
264 } 264 }
265#ifdef BLIS_ENABLE_MULTITHREAD_TEST 265#ifdef BLIS_ENABLE_MULTITHREAD_TEST
266 // Estimate the performance of the best experiment repeat. 266 // Estimate the performance of the best experiment repeat.
267 *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; 267 perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
268 if ( bli_obj_is_complex( x[0] ) ) *perf *= 4.0; 268 if ( bli_obj_is_complex( x[0] ) ) perf->gflops *= 4.0;
269#else 269#else
270 // Estimate the performance of the best experiment repeat. 270 // Estimate the performance of the best experiment repeat.
271 *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; 271 perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
272 if ( bli_obj_is_complex( x ) ) *perf *= 4.0; 272 if ( bli_obj_is_complex( x ) ) perf->gflops *= 4.0;
273#endif 273#endif
274 perf->time = time_min;
274 275
275#ifdef BLIS_ENABLE_MULTITHREAD_TEST 276#ifdef BLIS_ENABLE_MULTITHREAD_TEST
276 // Check output of each thread, and send max residue to main 277 // Check output of each thread, and send max residue to main
diff --git a/docs/doxygen/doxycfg.txt b/docs/doxygen/doxycfg.txt
new file mode 100644
index 0000000..74a10ee
--- /dev/null
+++ b/docs/doxygen/doxycfg.txt
@@ -0,0 +1,1781 @@
1# Doxyfile 1.7.6.1
2
3# This file describes the settings to be used by the documentation system
4# doxygen (www.doxygen.org) for a project.
5#
6# All text after a hash (#) is considered a comment and will be ignored.
7# The format is:
8# TAG = value [value, ...]
9# For lists items can also be appended using:
10# TAG += value [value, ...]
11# Values that contain spaces should be placed between quotes (" ").
12
13#---------------------------------------------------------------------------
14# Project related configuration options
15#---------------------------------------------------------------------------
16
17# This tag specifies the encoding used for all characters in the config file
18# that follow. The default is UTF-8 which is also the encoding used for all
19# text before the first occurrence of this tag. Doxygen uses libiconv (or the
20# iconv built into libc) for the transcoding. See
21# http://www.gnu.org/software/libiconv for the list of possible encodings.
22
23DOXYFILE_ENCODING = UTF-8
24
25# The PROJECT_NAME tag is a single word (or sequence of words) that should
26# identify the project. Note that if you do not use Doxywizard you need
27# to put quotes around the project name if it contains spaces.
28
29PROJECT_NAME = "LINALG "
30
31# The PROJECT_NUMBER tag can be used to enter a project or revision number.
32# This could be handy for archiving the generated documentation or
33# if some version control system is used.
34
35PROJECT_NUMBER =
36
37# Using the PROJECT_BRIEF tag one can provide an optional one line description
38# for a project that appears at the top of each page and should give viewer
39# a quick idea about the purpose of the project. Keep the description short.
40
41PROJECT_BRIEF = "Linear Algebra Library"
42
43# With the PROJECT_LOGO tag one can specify an logo or icon that is
44# included in the documentation. The maximum height of the logo should not
45# exceed 55 pixels and the maximum width should not exceed 200 pixels.
46# Doxygen will copy the logo to the output directory.
47
48PROJECT_LOGO =
49
50# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
51# base path where the generated documentation will be put.
52# If a relative path is entered, it will be relative to the location
53# where doxygen was started. If left blank the current directory will be used.
54
55OUTPUT_DIRECTORY = ./docs/doxygen
56
57# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
58# 4096 sub-directories (in 2 levels) under the output directory of each output
59# format and will distribute the generated files over these directories.
60# Enabling this option can be useful when feeding doxygen a huge amount of
61# source files, where putting all generated files in the same directory would
62# otherwise cause performance problems for the file system.
63
64CREATE_SUBDIRS = YES
65
66# The OUTPUT_LANGUAGE tag is used to specify the language in which all
67# documentation generated by doxygen is written. Doxygen will use this
68# information to generate all constant output in the proper language.
69# The default language is English, other supported languages are:
70# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
71# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
72# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
73# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
74# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak,
75# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
76
77OUTPUT_LANGUAGE = English
78
79# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
80# include brief member descriptions after the members that are listed in
81# the file and class documentation (similar to JavaDoc).
82# Set to NO to disable this.
83
84BRIEF_MEMBER_DESC = YES
85
86# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
87# the brief description of a member or function before the detailed description.
88# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
89# brief descriptions will be completely suppressed.
90
91REPEAT_BRIEF = YES
92
93# This tag implements a quasi-intelligent brief description abbreviator
94# that is used to form the text in various listings. Each string
95# in this list, if found as the leading text of the brief description, will be
96# stripped from the text and the result after processing the whole list, is
97# used as the annotated text. Otherwise, the brief description is used as-is.
98# If left blank, the following values are used ("$name" is automatically
99# replaced with the name of the entity): "The $name class" "The $name widget"
100# "The $name file" "is" "provides" "specifies" "contains"
101# "represents" "a" "an" "the"
102
103ABBREVIATE_BRIEF =
104
105# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
106# Doxygen will generate a detailed section even if there is only a brief
107# description.
108
109ALWAYS_DETAILED_SEC = NO
110
111# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
112# inherited members of a class in the documentation of that class as if those
113# members were ordinary class members. Constructors, destructors and assignment
114# operators of the base classes will not be shown.
115
116INLINE_INHERITED_MEMB = NO
117
118# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
119# path before files name in the file list and in the header files. If set
120# to NO the shortest path that makes the file name unique will be used.
121
122FULL_PATH_NAMES = YES
123
124# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
125# can be used to strip a user-defined part of the path. Stripping is
126# only done if one of the specified strings matches the left-hand part of
127# the path. The tag can be used to show relative paths in the file list.
128# If left blank the directory from which doxygen is run is used as the
129# path to strip.
130
131STRIP_FROM_PATH =
132
133# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
134# the path mentioned in the documentation of a class, which tells
135# the reader which header file to include in order to use a class.
136# If left blank only the name of the header file containing the class
137# definition is used. Otherwise one should specify the include paths that
138# are normally passed to the compiler using the -I flag.
139
140STRIP_FROM_INC_PATH =
141
142# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
143# (but less readable) file names. This can be useful if your file system
144# doesn't support long names like on DOS, Mac, or CD-ROM.
145
146SHORT_NAMES = NO
147
148# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
149# will interpret the first line (until the first dot) of a JavaDoc-style
150# comment as the brief description. If set to NO, the JavaDoc
151# comments will behave just like regular Qt-style comments
152# (thus requiring an explicit @brief command for a brief description.)
153
154JAVADOC_AUTOBRIEF = NO
155
156# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
157# interpret the first line (until the first dot) of a Qt-style
158# comment as the brief description. If set to NO, the comments
159# will behave just like regular Qt-style comments (thus requiring
160# an explicit \brief command for a brief description.)
161
162QT_AUTOBRIEF = NO
163
164# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
165# treat a multi-line C++ special comment block (i.e. a block of //! or ///
166# comments) as a brief description. This used to be the default behaviour.
167# The new default is to treat a multi-line C++ comment block as a detailed
168# description. Set this tag to YES if you prefer the old behaviour instead.
169
170MULTILINE_CPP_IS_BRIEF = NO
171
172# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
173# member inherits the documentation from any documented member that it
174# re-implements.
175
176INHERIT_DOCS = YES
177
178# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
179# a new page for each member. If set to NO, the documentation of a member will
180# be part of the file/class/namespace that contains it.
181
182SEPARATE_MEMBER_PAGES = NO
183
184# The TAB_SIZE tag can be used to set the number of spaces in a tab.
185# Doxygen uses this value to replace tabs by spaces in code fragments.
186
187TAB_SIZE = 8
188
189# This tag can be used to specify a number of aliases that acts
190# as commands in the documentation. An alias has the form "name=value".
191# For example adding "sideeffect=\par Side Effects:\n" will allow you to
192# put the command \sideeffect (or @sideeffect) in the documentation, which
193# will result in a user-defined paragraph with heading "Side Effects:".
194# You can put \n's in the value part of an alias to insert newlines.
195
196ALIASES =
197
198# This tag can be used to specify a number of word-keyword mappings (TCL only).
199# A mapping has the form "name=value". For example adding
200# "class=itcl::class" will allow you to use the command class in the
201# itcl::class meaning.
202
203TCL_SUBST =
204
205# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
206# sources only. Doxygen will then generate output that is more tailored for C.
207# For instance, some of the names that are used will be different. The list
208# of all members will be omitted, etc.
209
210OPTIMIZE_OUTPUT_FOR_C = YES
211
212# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
213# sources only. Doxygen will then generate output that is more tailored for
214# Java. For instance, namespaces will be presented as packages, qualified
215# scopes will look different, etc.
216
217OPTIMIZE_OUTPUT_JAVA = NO
218
219# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
220# sources only. Doxygen will then generate output that is more tailored for
221# Fortran.
222
223OPTIMIZE_FOR_FORTRAN = NO
224
225# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
226# sources. Doxygen will then generate output that is tailored for
227# VHDL.
228
229OPTIMIZE_OUTPUT_VHDL = NO
230
231# Doxygen selects the parser to use depending on the extension of the files it
232# parses. With this tag you can assign which parser to use for a given extension.
233# Doxygen has a built-in mapping, but you can override or extend it using this
234# tag. The format is ext=language, where ext is a file extension, and language
235# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C,
236# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make
237# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C
238# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions
239# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
240
241EXTENSION_MAPPING =
242
243# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
244# to include (a tag file for) the STL sources as input, then you should
245# set this tag to YES in order to let doxygen match functions declarations and
246# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
247# func(std::string) {}). This also makes the inheritance and collaboration
248# diagrams that involve STL classes more complete and accurate.
249
250BUILTIN_STL_SUPPORT = NO
251
252# If you use Microsoft's C++/CLI language, you should set this option to YES to
253# enable parsing support.
254
255CPP_CLI_SUPPORT = NO
256
257# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
258# Doxygen will parse them like normal C++ but will assume all classes use public
259# instead of private inheritance when no explicit protection keyword is present.
260
261SIP_SUPPORT = NO
262
263# For Microsoft's IDL there are propget and propput attributes to indicate getter
264# and setter methods for a property. Setting this option to YES (the default)
265# will make doxygen replace the get and set methods by a property in the
266# documentation. This will only work if the methods are indeed getting or
267# setting a simple type. If this is not the case, or you want to show the
268# methods anyway, you should set this option to NO.
269
270IDL_PROPERTY_SUPPORT = YES
271
272# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
273# tag is set to YES, then doxygen will reuse the documentation of the first
274# member in the group (if any) for the other members of the group. By default
275# all members of a group must be documented explicitly.
276
277DISTRIBUTE_GROUP_DOC = NO
278
279# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
280# the same type (for instance a group of public functions) to be put as a
281# subgroup of that type (e.g. under the Public Functions section). Set it to
282# NO to prevent subgrouping. Alternatively, this can be done per class using
283# the \nosubgrouping command.
284
285SUBGROUPING = YES
286
287# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and
288# unions are shown inside the group in which they are included (e.g. using
289# @ingroup) instead of on a separate page (for HTML and Man pages) or
290# section (for LaTeX and RTF).
291
292INLINE_GROUPED_CLASSES = NO
293
294# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and
295# unions with only public data fields will be shown inline in the documentation
296# of the scope in which they are defined (i.e. file, namespace, or group
297# documentation), provided this scope is documented. If set to NO (the default),
298# structs, classes, and unions are shown on a separate page (for HTML and Man
299# pages) or section (for LaTeX and RTF).
300
301INLINE_SIMPLE_STRUCTS = NO
302
303# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
304# is documented as struct, union, or enum with the name of the typedef. So
305# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
306# with name TypeT. When disabled the typedef will appear as a member of a file,
307# namespace, or class. And the struct will be named TypeS. This can typically
308# be useful for C code in case the coding convention dictates that all compound
309# types are typedef'ed and only the typedef is referenced, never the tag name.
310
311TYPEDEF_HIDES_STRUCT = NO
312
313# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
314# determine which symbols to keep in memory and which to flush to disk.
315# When the cache is full, less often used symbols will be written to disk.
316# For small to medium size projects (<1000 input files) the default value is
317# probably good enough. For larger projects a too small cache size can cause
318# doxygen to be busy swapping symbols to and from disk most of the time
319# causing a significant performance penalty.
320# If the system has enough physical memory increasing the cache will improve the
321# performance by keeping more symbols in memory. Note that the value works on
322# a logarithmic scale so increasing the size by one will roughly double the
323# memory usage. The cache size is given by this formula:
324# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
325# corresponding to a cache size of 2^16 = 65536 symbols.
326
327SYMBOL_CACHE_SIZE = 0
328
329# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be
330# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given
331# their name and scope. Since this can be an expensive process and often the
332# same symbol appear multiple times in the code, doxygen keeps a cache of
333# pre-resolved symbols. If the cache is too small doxygen will become slower.
334# If the cache is too large, memory is wasted. The cache size is given by this
335# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0,
336# corresponding to a cache size of 2^16 = 65536 symbols.
337
338LOOKUP_CACHE_SIZE = 0
339
340#---------------------------------------------------------------------------
341# Build related configuration options
342#---------------------------------------------------------------------------
343
344# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
345# documentation are documented, even if no documentation was available.
346# Private class members and static file members will be hidden unless
347# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
348
349EXTRACT_ALL = YES
350
351# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
352# will be included in the documentation.
353
354EXTRACT_PRIVATE = NO
355
356# If the EXTRACT_STATIC tag is set to YES all static members of a file
357# will be included in the documentation.
358
359EXTRACT_STATIC = YES
360
361# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
362# defined locally in source files will be included in the documentation.
363# If set to NO only classes defined in header files are included.
364
365EXTRACT_LOCAL_CLASSES = YES
366
367# This flag is only useful for Objective-C code. When set to YES local
368# methods, which are defined in the implementation section but not in
369# the interface are included in the documentation.
370# If set to NO (the default) only methods in the interface are included.
371
372EXTRACT_LOCAL_METHODS = NO
373
374# If this flag is set to YES, the members of anonymous namespaces will be
375# extracted and appear in the documentation as a namespace called
376# 'anonymous_namespace{file}', where file will be replaced with the base
377# name of the file that contains the anonymous namespace. By default
378# anonymous namespaces are hidden.
379
380EXTRACT_ANON_NSPACES = NO
381
382# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
383# undocumented members of documented classes, files or namespaces.
384# If set to NO (the default) these members will be included in the
385# various overviews, but no documentation section is generated.
386# This option has no effect if EXTRACT_ALL is enabled.
387
388HIDE_UNDOC_MEMBERS = NO
389
390# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
391# undocumented classes that are normally visible in the class hierarchy.
392# If set to NO (the default) these classes will be included in the various
393# overviews. This option has no effect if EXTRACT_ALL is enabled.
394
395HIDE_UNDOC_CLASSES = NO
396
397# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
398# friend (class|struct|union) declarations.
399# If set to NO (the default) these declarations will be included in the
400# documentation.
401
402HIDE_FRIEND_COMPOUNDS = NO
403
404# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
405# documentation blocks found inside the body of a function.
406# If set to NO (the default) these blocks will be appended to the
407# function's detailed documentation block.
408
409HIDE_IN_BODY_DOCS = NO
410
411# The INTERNAL_DOCS tag determines if documentation
412# that is typed after a \internal command is included. If the tag is set
413# to NO (the default) then the documentation will be excluded.
414# Set it to YES to include the internal documentation.
415
416INTERNAL_DOCS = NO
417
418# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
419# file names in lower-case letters. If set to YES upper-case letters are also
420# allowed. This is useful if you have classes or files whose names only differ
421# in case and if your file system supports case sensitive file names. Windows
422# and Mac users are advised to set this option to NO.
423
424CASE_SENSE_NAMES = YES
425
426# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
427# will show members with their full class and namespace scopes in the
428# documentation. If set to YES the scope will be hidden.
429
430HIDE_SCOPE_NAMES = NO
431
432# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
433# will put a list of the files that are included by a file in the documentation
434# of that file.
435
436SHOW_INCLUDE_FILES = YES
437
438# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
439# will list include files with double quotes in the documentation
440# rather than with sharp brackets.
441
442FORCE_LOCAL_INCLUDES = NO
443
444# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
445# is inserted in the documentation for inline members.
446
447INLINE_INFO = YES
448
449# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
450# will sort the (detailed) documentation of file and class members
451# alphabetically by member name. If set to NO the members will appear in
452# declaration order.
453
454SORT_MEMBER_DOCS = YES
455
456# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
457# brief documentation of file, namespace and class members alphabetically
458# by member name. If set to NO (the default) the members will appear in
459# declaration order.
460
461SORT_BRIEF_DOCS = NO
462
463# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen
464# will sort the (brief and detailed) documentation of class members so that
465# constructors and destructors are listed first. If set to NO (the default)
466# the constructors will appear in the respective orders defined by
467# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS.
468# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO
469# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
470
471SORT_MEMBERS_CTORS_1ST = NO
472
473# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
474# hierarchy of group names into alphabetical order. If set to NO (the default)
475# the group names will appear in their defined order.
476
477SORT_GROUP_NAMES = NO
478
479# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
480# sorted by fully-qualified names, including namespaces. If set to
481# NO (the default), the class list will be sorted only by class name,
482# not including the namespace part.
483# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
484# Note: This option applies only to the class list, not to the
485# alphabetical list.
486
487SORT_BY_SCOPE_NAME = NO
488
489# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to
490# do proper type resolution of all parameters of a function it will reject a
491# match between the prototype and the implementation of a member function even
492# if there is only one candidate or it is obvious which candidate to choose
493# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen
494# will still accept a match between prototype and implementation in such cases.
495
496STRICT_PROTO_MATCHING = NO
497
498# The GENERATE_TODOLIST tag can be used to enable (YES) or
499# disable (NO) the todo list. This list is created by putting \todo
500# commands in the documentation.
501
502GENERATE_TODOLIST = YES
503
504# The GENERATE_TESTLIST tag can be used to enable (YES) or
505# disable (NO) the test list. This list is created by putting \test
506# commands in the documentation.
507
508GENERATE_TESTLIST = YES
509
510# The GENERATE_BUGLIST tag can be used to enable (YES) or
511# disable (NO) the bug list. This list is created by putting \bug
512# commands in the documentation.
513
514GENERATE_BUGLIST = YES
515
516# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
517# disable (NO) the deprecated list. This list is created by putting
518# \deprecated commands in the documentation.
519
520GENERATE_DEPRECATEDLIST= YES
521
522# The ENABLED_SECTIONS tag can be used to enable conditional
523# documentation sections, marked by \if sectionname ... \endif.
524
525ENABLED_SECTIONS =
526
527# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
528# the initial value of a variable or macro consists of for it to appear in
529# the documentation. If the initializer consists of more lines than specified
530# here it will be hidden. Use a value of 0 to hide initializers completely.
531# The appearance of the initializer of individual variables and macros in the
532# documentation can be controlled using \showinitializer or \hideinitializer
533# command in the documentation regardless of this setting.
534
535MAX_INITIALIZER_LINES = 30
536
537# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
538# at the bottom of the documentation of classes and structs. If set to YES the
539# list will mention the files that were used to generate the documentation.
540
541SHOW_USED_FILES = YES
542
543# If the sources in your project are distributed over multiple directories
544# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
545# in the documentation. The default is NO.
546
547SHOW_DIRECTORIES = NO
548
549# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
550# This will remove the Files entry from the Quick Index and from the
551# Folder Tree View (if specified). The default is YES.
552
553SHOW_FILES = YES
554
555# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
556# Namespaces page.
557# This will remove the Namespaces entry from the Quick Index
558# and from the Folder Tree View (if specified). The default is YES.
559
560SHOW_NAMESPACES = YES
561
562# The FILE_VERSION_FILTER tag can be used to specify a program or script that
563# doxygen should invoke to get the current version for each file (typically from
564# the version control system). Doxygen will invoke the program by executing (via
565# popen()) the command <command> <input-file>, where <command> is the value of
566# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
567# provided by doxygen. Whatever the program writes to standard output
568# is used as the file version. See the manual for examples.
569
570FILE_VERSION_FILTER =
571
572# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
573# by doxygen. The layout file controls the global structure of the generated
574# output files in an output format independent way. The create the layout file
575# that represents doxygen's defaults, run doxygen with the -l option.
576# You can optionally specify a file name after the option, if omitted
577# DoxygenLayout.xml will be used as the name of the layout file.
578
579LAYOUT_FILE =
580
581# The CITE_BIB_FILES tag can be used to specify one or more bib files
582# containing the references data. This must be a list of .bib files. The
583# .bib extension is automatically appended if omitted. Using this command
584# requires the bibtex tool to be installed. See also
585# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style
586# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this
587# feature you need bibtex and perl available in the search path.
588
589CITE_BIB_FILES =
590
591#---------------------------------------------------------------------------
592# configuration options related to warning and progress messages
593#---------------------------------------------------------------------------
594
595# The QUIET tag can be used to turn on/off the messages that are generated
596# by doxygen. Possible values are YES and NO. If left blank NO is used.
597
598QUIET = NO
599
600# The WARNINGS tag can be used to turn on/off the warning messages that are
601# generated by doxygen. Possible values are YES and NO. If left blank
602# NO is used.
603
604WARNINGS = YES
605
606# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
607# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
608# automatically be disabled.
609
610WARN_IF_UNDOCUMENTED = YES
611
612# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
613# potential errors in the documentation, such as not documenting some
614# parameters in a documented function, or documenting parameters that
615# don't exist or using markup commands wrongly.
616
617WARN_IF_DOC_ERROR = YES
618
619# The WARN_NO_PARAMDOC option can be enabled to get warnings for
620# functions that are documented, but have no documentation for their parameters
621# or return value. If set to NO (the default) doxygen will only warn about
622# wrong or incomplete parameter documentation, but not about the absence of
623# documentation.
624
625WARN_NO_PARAMDOC = NO
626
627# The WARN_FORMAT tag determines the format of the warning messages that
628# doxygen can produce. The string should contain the $file, $line, and $text
629# tags, which will be replaced by the file and line number from which the
630# warning originated and the warning text. Optionally the format may contain
631# $version, which will be replaced by the version of the file (if it could
632# be obtained via FILE_VERSION_FILTER)
633
634WARN_FORMAT = "$file:$line: $text"
635
636# The WARN_LOGFILE tag can be used to specify a file to which warning
637# and error messages should be written. If left blank the output is written
638# to stderr.
639
640WARN_LOGFILE =
641
642#---------------------------------------------------------------------------
643# configuration options related to the input files
644#---------------------------------------------------------------------------
645
646# The INPUT tag can be used to specify the files and/or directories that contain
647# documented source files. You may enter file names like "myfile.cpp" or
648# directories like "/usr/src/myproject". Separate the files or directories
649# with spaces.
650
651INPUT = ./ticblas ./docs/doxygen
652
653# This tag can be used to specify the character encoding of the source files
654# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
655# also the default input encoding. Doxygen uses libiconv (or the iconv built
656# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
657# the list of possible encodings.
658
659INPUT_ENCODING = UTF-8
660
661# If the value of the INPUT tag contains directories, you can use the
662# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
663# and *.h) to filter out the source-files in the directories. If left
664# blank the following patterns are tested:
665# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh
666# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py
667# *.f90 *.f *.for *.vhd *.vhdl
668
669FILE_PATTERNS = *.c *.h *.dox
670
671# The RECURSIVE tag can be used to turn specify whether or not subdirectories
672# should be searched for input files as well. Possible values are YES and NO.
673# If left blank NO is used.
674
675RECURSIVE = NO
676
677# The EXCLUDE tag can be used to specify files and/or directories that should be
678# excluded from the INPUT source files. This way you can easily exclude a
679# subdirectory from a directory tree whose root is specified with the INPUT tag.
680# Note that relative paths are relative to the directory from which doxygen is
681# run.
682
683EXCLUDE =
684
685# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
686# directories that are symbolic links (a Unix file system feature) are excluded
687# from the input.
688
689EXCLUDE_SYMLINKS = NO
690
691# If the value of the INPUT tag contains directories, you can use the
692# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
693# certain files from those directories. Note that the wildcards are matched
694# against the file with absolute path, so to exclude all test directories
695# for example use the pattern */test/*
696
697EXCLUDE_PATTERNS =
698
699# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
700# (namespaces, classes, functions, etc.) that should be excluded from the
701# output. The symbol name can be a fully qualified name, a word, or if the
702# wildcard * is used, a substring. Examples: ANamespace, AClass,
703# AClass::ANamespace, ANamespace::*Test
704
705EXCLUDE_SYMBOLS =
706
707# The EXAMPLE_PATH tag can be used to specify one or more files or
708# directories that contain example code fragments that are included (see
709# the \include command).
710
711EXAMPLE_PATH =
712
713# If the value of the EXAMPLE_PATH tag contains directories, you can use the
714# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
715# and *.h) to filter out the source-files in the directories. If left
716# blank all files are included.
717
718EXAMPLE_PATTERNS =
719
720# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
721# searched for input files to be used with the \include or \dontinclude
722# commands irrespective of the value of the RECURSIVE tag.
723# Possible values are YES and NO. If left blank NO is used.
724
725EXAMPLE_RECURSIVE = NO
726
727# The IMAGE_PATH tag can be used to specify one or more files or
728# directories that contain image that are included in the documentation (see
729# the \image command).
730
731IMAGE_PATH = ./docs/doxygen/images
732
733# The INPUT_FILTER tag can be used to specify a program that doxygen should
734# invoke to filter for each input file. Doxygen will invoke the filter program
735# by executing (via popen()) the command <filter> <input-file>, where <filter>
736# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
737# input file. Doxygen will then use the output that the filter program writes
738# to standard output.
739# If FILTER_PATTERNS is specified, this tag will be
740# ignored.
741
742INPUT_FILTER =
743
744# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
745# basis.
746# Doxygen will compare the file name with each pattern and apply the
747# filter if there is a match.
748# The filters are a list of the form:
749# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
750# info on how filters are used. If FILTER_PATTERNS is empty or if
751# non of the patterns match the file name, INPUT_FILTER is applied.
752
753FILTER_PATTERNS =
754
755# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
756# INPUT_FILTER) will be used to filter the input files when producing source
757# files to browse (i.e. when SOURCE_BROWSER is set to YES).
758
759FILTER_SOURCE_FILES = NO
760
761# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
762# pattern. A pattern will override the setting for FILTER_PATTERN (if any)
763# and it is also possible to disable source filtering for a specific pattern
764# using *.ext= (so without naming a filter). This option only has effect when
765# FILTER_SOURCE_FILES is enabled.
766
767FILTER_SOURCE_PATTERNS =
768
769#---------------------------------------------------------------------------
770# configuration options related to source browsing
771#---------------------------------------------------------------------------
772
773# If the SOURCE_BROWSER tag is set to YES then a list of source files will
774# be generated. Documented entities will be cross-referenced with these sources.
775# Note: To get rid of all source code in the generated output, make sure also
776# VERBATIM_HEADERS is set to NO.
777
778SOURCE_BROWSER = NO
779
780# Setting the INLINE_SOURCES tag to YES will include the body
781# of functions and classes directly in the documentation.
782
783INLINE_SOURCES = NO
784
785# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
786# doxygen to hide any special comment blocks from generated source code
787# fragments. Normal C and C++ comments will always remain visible.
788
789STRIP_CODE_COMMENTS = YES
790
791# If the REFERENCED_BY_RELATION tag is set to YES
792# then for each documented function all documented
793# functions referencing it will be listed.
794
795REFERENCED_BY_RELATION = NO
796
797# If the REFERENCES_RELATION tag is set to YES
798# then for each documented function all documented entities
799# called/used by that function will be listed.
800
801REFERENCES_RELATION = NO
802
803# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
804# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
805# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
806# link to the source code.
807# Otherwise they will link to the documentation.
808
809REFERENCES_LINK_SOURCE = YES
810
811# If the USE_HTAGS tag is set to YES then the references to source code
812# will point to the HTML generated by the htags(1) tool instead of doxygen
813# built-in source browser. The htags tool is part of GNU's global source
814# tagging system (see http://www.gnu.org/software/global/global.html). You
815# will need version 4.8.6 or higher.
816
817USE_HTAGS = NO
818
819# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
820# will generate a verbatim copy of the header file for each class for
821# which an include is specified. Set to NO to disable this.
822
823VERBATIM_HEADERS = YES
824
825#---------------------------------------------------------------------------
826# configuration options related to the alphabetical class index
827#---------------------------------------------------------------------------
828
829# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
830# of all compounds will be generated. Enable this if the project
831# contains a lot of classes, structs, unions or interfaces.
832
833ALPHABETICAL_INDEX = YES
834
835# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
836# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
837# in which this list will be split (can be a number in the range [1..20])
838
839COLS_IN_ALPHA_INDEX = 5
840
841# In case all classes in a project start with a common prefix, all
842# classes will be put under the same header in the alphabetical index.
843# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
844# should be ignored while generating the index headers.
845
846IGNORE_PREFIX =
847
848#---------------------------------------------------------------------------
849# configuration options related to the HTML output
850#---------------------------------------------------------------------------
851
852# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
853# generate HTML output.
854
855GENERATE_HTML = YES
856
857# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
858# If a relative path is entered the value of OUTPUT_DIRECTORY will be
859# put in front of it. If left blank `html' will be used as the default path.
860
861HTML_OUTPUT = html
862
863# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
864# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
865# doxygen will generate files with .html extension.
866
867HTML_FILE_EXTENSION = .html
868
869# The HTML_HEADER tag can be used to specify a personal HTML header for
870# each generated HTML page. If it is left blank doxygen will generate a
871# standard header. Note that when using a custom header you are responsible
872# for the proper inclusion of any scripts and style sheets that doxygen
873# needs, which is dependent on the configuration options used.
874# It is advised to generate a default header using "doxygen -w html
875# header.html footer.html stylesheet.css YourConfigFile" and then modify
876# that header. Note that the header is subject to change so you typically
877# have to redo this when upgrading to a newer version of doxygen or when
878# changing the value of configuration settings such as GENERATE_TREEVIEW!
879
880HTML_HEADER =
881
882# The HTML_FOOTER tag can be used to specify a personal HTML footer for
883# each generated HTML page. If it is left blank doxygen will generate a
884# standard footer.
885
886HTML_FOOTER =
887
888# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
889# style sheet that is used by each HTML page. It can be used to
890# fine-tune the look of the HTML output. If the tag is left blank doxygen
891# will generate a default style sheet. Note that doxygen will try to copy
892# the style sheet file to the HTML output directory, so don't put your own
893# style sheet in the HTML output directory as well, or it will be erased!
894
895HTML_STYLESHEET =
896
897# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
898# other source files which should be copied to the HTML output directory. Note
899# that these files will be copied to the base HTML output directory. Use the
900# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
901# files. In the HTML_STYLESHEET file, use the file name only. Also note that
902# the files will be copied as-is; there are no commands or markers available.
903
904HTML_EXTRA_FILES =
905
906# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output.
907# Doxygen will adjust the colors in the style sheet and background images
908# according to this color. Hue is specified as an angle on a colorwheel,
909# see http://en.wikipedia.org/wiki/Hue for more information.
910# For instance the value 0 represents red, 60 is yellow, 120 is green,
911# 180 is cyan, 240 is blue, 300 purple, and 360 is red again.
912# The allowed range is 0 to 359.
913
914HTML_COLORSTYLE_HUE = 220
915
916# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of
917# the colors in the HTML output. For a value of 0 the output will use
918# grayscales only. A value of 255 will produce the most vivid colors.
919
920HTML_COLORSTYLE_SAT = 100
921
922# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to
923# the luminance component of the colors in the HTML output. Values below
924# 100 gradually make the output lighter, whereas values above 100 make
925# the output darker. The value divided by 100 is the actual gamma applied,
926# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2,
927# and 100 does not change the gamma.
928
929HTML_COLORSTYLE_GAMMA = 80
930
931# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
932# page will contain the date and time when the page was generated. Setting
933# this to NO can help when comparing the output of multiple runs.
934
935HTML_TIMESTAMP = YES
936
937# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
938# files or namespaces will be aligned in HTML using tables. If set to
939# NO a bullet list will be used.
940
941HTML_ALIGN_MEMBERS = YES
942
943# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
944# documentation will contain sections that can be hidden and shown after the
945# page has loaded. For this to work a browser that supports
946# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
947# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
948
949HTML_DYNAMIC_SECTIONS = NO
950
951# If the GENERATE_DOCSET tag is set to YES, additional index files
952# will be generated that can be used as input for Apple's Xcode 3
953# integrated development environment, introduced with OSX 10.5 (Leopard).
954# To create a documentation set, doxygen will generate a Makefile in the
955# HTML output directory. Running make will produce the docset in that
956# directory and running "make install" will install the docset in
957# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
958# it at startup.
959# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html
960# for more information.
961
962GENERATE_DOCSET = NO
963
964# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
965# feed. A documentation feed provides an umbrella under which multiple
966# documentation sets from a single provider (such as a company or product suite)
967# can be grouped.
968
969DOCSET_FEEDNAME = "Doxygen generated docs"
970
971# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
972# should uniquely identify the documentation set bundle. This should be a
973# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
974# will append .docset to the name.
975
976DOCSET_BUNDLE_ID = org.doxygen.Project
977
978# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify
979# the documentation publisher. This should be a reverse domain-name style
980# string, e.g. com.mycompany.MyDocSet.documentation.
981
982DOCSET_PUBLISHER_ID = org.doxygen.Publisher
983
984# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher.
985
986DOCSET_PUBLISHER_NAME = Publisher
987
988# If the GENERATE_HTMLHELP tag is set to YES, additional index files
989# will be generated that can be used as input for tools like the
990# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
991# of the generated HTML documentation.
992
993GENERATE_HTMLHELP = NO
994
995# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
996# be used to specify the file name of the resulting .chm file. You
997# can add a path in front of the file if the result should not be
998# written to the html output directory.
999
1000CHM_FILE =
1001
1002# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
1003# be used to specify the location (absolute path including file name) of
1004# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
1005# the HTML help compiler on the generated index.hhp.
1006
1007HHC_LOCATION =
1008
1009# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
1010# controls if a separate .chi index file is generated (YES) or that
1011# it should be included in the master .chm file (NO).
1012
1013GENERATE_CHI = NO
1014
1015# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
1016# is used to encode HtmlHelp index (hhk), content (hhc) and project file
1017# content.
1018
1019CHM_INDEX_ENCODING =
1020
1021# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
1022# controls whether a binary table of contents is generated (YES) or a
1023# normal table of contents (NO) in the .chm file.
1024
1025BINARY_TOC = NO
1026
1027# The TOC_EXPAND flag can be set to YES to add extra items for group members
1028# to the contents of the HTML help documentation and to the tree view.
1029
1030TOC_EXPAND = NO
1031
1032# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
1033# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated
1034# that can be used as input for Qt's qhelpgenerator to generate a
1035# Qt Compressed Help (.qch) of the generated HTML documentation.
1036
1037GENERATE_QHP = NO
1038
1039# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
1040# be used to specify the file name of the resulting .qch file.
1041# The path specified is relative to the HTML output folder.
1042
1043QCH_FILE =
1044
1045# The QHP_NAMESPACE tag specifies the namespace to use when generating
1046# Qt Help Project output. For more information please see
1047# http://doc.trolltech.com/qthelpproject.html#namespace
1048
1049QHP_NAMESPACE = org.doxygen.Project
1050
1051# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
1052# Qt Help Project output. For more information please see
1053# http://doc.trolltech.com/qthelpproject.html#virtual-folders
1054
1055QHP_VIRTUAL_FOLDER = doc
1056
1057# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to
1058# add. For more information please see
1059# http://doc.trolltech.com/qthelpproject.html#custom-filters
1060
1061QHP_CUST_FILTER_NAME =
1062
1063# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the
1064# custom filter to add. For more information please see
1065# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">
1066# Qt Help Project / Custom Filters</a>.
1067
1068QHP_CUST_FILTER_ATTRS =
1069
1070# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
1071# project's
1072# filter section matches.
1073# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">
1074# Qt Help Project / Filter Attributes</a>.
1075
1076QHP_SECT_FILTER_ATTRS =
1077
1078# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
1079# be used to specify the location of Qt's qhelpgenerator.
1080# If non-empty doxygen will try to run qhelpgenerator on the generated
1081# .qhp file.
1082
1083QHG_LOCATION =
1084
1085# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
1086# will be generated, which together with the HTML files, form an Eclipse help
1087# plugin. To install this plugin and make it available under the help contents
1088# menu in Eclipse, the contents of the directory containing the HTML and XML
1089# files needs to be copied into the plugins directory of eclipse. The name of
1090# the directory within the plugins directory should be the same as
1091# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before
1092# the help appears.
1093
1094GENERATE_ECLIPSEHELP = NO
1095
1096# A unique identifier for the eclipse help plugin. When installing the plugin
1097# the directory name containing the HTML and XML files should also have
1098# this name.
1099
1100ECLIPSE_DOC_ID = org.doxygen.Project
1101
1102# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs)
1103# at top of each HTML page. The value NO (the default) enables the index and
1104# the value YES disables it. Since the tabs have the same information as the
1105# navigation tree you can set this option to NO if you already set
1106# GENERATE_TREEVIEW to YES.
1107
1108DISABLE_INDEX = NO
1109
1110# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
1111# structure should be generated to display hierarchical information.
1112# If the tag value is set to YES, a side panel will be generated
1113# containing a tree-like index structure (just like the one that
1114# is generated for HTML Help). For this to work a browser that supports
1115# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
1116# Windows users are probably better off using the HTML help feature.
1117# Since the tree basically has the same information as the tab index you
1118# could consider to set DISABLE_INDEX to NO when enabling this option.
1119
1120GENERATE_TREEVIEW = NO
1121
1122# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values
1123# (range [0,1..20]) that doxygen will group on one line in the generated HTML
1124# documentation. Note that a value of 0 will completely suppress the enum
1125# values from appearing in the overview section.
1126
1127ENUM_VALUES_PER_LINE = 4
1128
1129# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
1130# and Class Hierarchy pages using a tree view instead of an ordered list.
1131
1132USE_INLINE_TREES = NO
1133
1134# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
1135# used to set the initial width (in pixels) of the frame in which the tree
1136# is shown.
1137
1138TREEVIEW_WIDTH = 250
1139
1140# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open
1141# links to external symbols imported via tag files in a separate window.
1142
1143EXT_LINKS_IN_WINDOW = NO
1144
1145# Use this tag to change the font size of Latex formulas included
1146# as images in the HTML documentation. The default is 10. Note that
1147# when you change the font size after a successful doxygen run you need
1148# to manually remove any form_*.png images from the HTML output directory
1149# to force them to be regenerated.
1150
1151FORMULA_FONTSIZE = 10
1152
1153# Use the FORMULA_TRANPARENT tag to determine whether or not the images
1154# generated for formulas are transparent PNGs. Transparent PNGs are
1155# not supported properly for IE 6.0, but are supported on all modern browsers.
1156# Note that when changing this option you need to delete any form_*.png files
1157# in the HTML output before the changes have effect.
1158
1159FORMULA_TRANSPARENT = YES
1160
1161# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax
1162# (see http://www.mathjax.org) which uses client side Javascript for the
1163# rendering instead of using prerendered bitmaps. Use this if you do not
1164# have LaTeX installed or if you want to formulas look prettier in the HTML
1165# output. When enabled you also need to install MathJax separately and
1166# configure the path to it using the MATHJAX_RELPATH option.
1167
1168USE_MATHJAX = NO
1169
1170# When MathJax is enabled you need to specify the location relative to the
1171# HTML output directory using the MATHJAX_RELPATH option. The destination
1172# directory should contain the MathJax.js script. For instance, if the mathjax
1173# directory is located at the same level as the HTML output directory, then
1174# MATHJAX_RELPATH should be ../mathjax. The default value points to the
1175# mathjax.org site, so you can quickly see the result without installing
1176# MathJax, but it is strongly recommended to install a local copy of MathJax
1177# before deployment.
1178
1179MATHJAX_RELPATH = http://www.mathjax.org/mathjax
1180
1181# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension
1182# names that should be enabled during MathJax rendering.
1183
1184MATHJAX_EXTENSIONS =
1185
1186# When the SEARCHENGINE tag is enabled doxygen will generate a search box
1187# for the HTML output. The underlying search engine uses javascript
1188# and DHTML and should work on any modern browser. Note that when using
1189# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets
1190# (GENERATE_DOCSET) there is already a search function so this one should
1191# typically be disabled. For large projects the javascript based search engine
1192# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
1193
1194SEARCHENGINE = YES
1195
1196# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
1197# implemented using a PHP enabled web server instead of at the web client
1198# using Javascript. Doxygen will generate the search PHP script and index
1199# file to put on the web server. The advantage of the server
1200# based approach is that it scales better to large projects and allows
1201# full text search. The disadvantages are that it is more difficult to setup
1202# and does not have live searching capabilities.
1203
1204SERVER_BASED_SEARCH = NO
1205
1206#---------------------------------------------------------------------------
1207# configuration options related to the LaTeX output
1208#---------------------------------------------------------------------------
1209
1210# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
1211# generate Latex output.
1212
1213GENERATE_LATEX = YES
1214
1215# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
1216# If a relative path is entered the value of OUTPUT_DIRECTORY will be
1217# put in front of it. If left blank `latex' will be used as the default path.
1218
1219LATEX_OUTPUT = latex
1220
1221# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
1222# invoked. If left blank `latex' will be used as the default command name.
1223# Note that when enabling USE_PDFLATEX this option is only used for
1224# generating bitmaps for formulas in the HTML output, but not in the
1225# Makefile that is written to the output directory.
1226
1227LATEX_CMD_NAME = latex
1228
1229# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
1230# generate index for LaTeX. If left blank `makeindex' will be used as the
1231# default command name.
1232
1233MAKEINDEX_CMD_NAME = makeindex
1234
1235# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
1236# LaTeX documents. This may be useful for small projects and may help to
1237# save some trees in general.
1238
1239COMPACT_LATEX = NO
1240
1241# The PAPER_TYPE tag can be used to set the paper type that is used
1242# by the printer. Possible values are: a4, letter, legal and
1243# executive. If left blank a4wide will be used.
1244
1245PAPER_TYPE = a4
1246
1247# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
1248# packages that should be included in the LaTeX output.
1249
1250EXTRA_PACKAGES =
1251
1252# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
1253# the generated latex document. The header should contain everything until
1254# the first chapter. If it is left blank doxygen will generate a
1255# standard header. Notice: only use this tag if you know what you are doing!
1256
1257LATEX_HEADER =
1258
1259# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for
1260# the generated latex document. The footer should contain everything after
1261# the last chapter. If it is left blank doxygen will generate a
1262# standard footer. Notice: only use this tag if you know what you are doing!
1263
1264LATEX_FOOTER =
1265
1266# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
1267# is prepared for conversion to pdf (using ps2pdf). The pdf file will
1268# contain links (just like the HTML output) instead of page references
1269# This makes the output suitable for online browsing using a pdf viewer.
1270
1271PDF_HYPERLINKS = YES
1272
1273# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
1274# plain latex in the generated Makefile. Set this option to YES to get a
1275# higher quality PDF documentation.
1276
1277USE_PDFLATEX = YES
1278
1279# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
1280# command to the generated LaTeX files. This will instruct LaTeX to keep
1281# running if errors occur, instead of asking the user for help.
1282# This option is also used when generating formulas in HTML.
1283
1284LATEX_BATCHMODE = NO
1285
1286# If LATEX_HIDE_INDICES is set to YES then doxygen will not
1287# include the index chapters (such as File Index, Compound Index, etc.)
1288# in the output.
1289
1290LATEX_HIDE_INDICES = NO
1291
1292# If LATEX_SOURCE_CODE is set to YES then doxygen will include
1293# source code with syntax highlighting in the LaTeX output.
1294# Note that which sources are shown also depends on other settings
1295# such as SOURCE_BROWSER.
1296
1297LATEX_SOURCE_CODE = NO
1298
1299# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
1300# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See
1301# http://en.wikipedia.org/wiki/BibTeX for more info.
1302
1303LATEX_BIB_STYLE = plain
1304
1305#---------------------------------------------------------------------------
1306# configuration options related to the RTF output
1307#---------------------------------------------------------------------------
1308
1309# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
1310# The RTF output is optimized for Word 97 and may not look very pretty with
1311# other RTF readers or editors.
1312
1313GENERATE_RTF = NO
1314
1315# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
1316# If a relative path is entered the value of OUTPUT_DIRECTORY will be
1317# put in front of it. If left blank `rtf' will be used as the default path.
1318
1319RTF_OUTPUT = rtf
1320
1321# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
1322# RTF documents. This may be useful for small projects and may help to
1323# save some trees in general.
1324
1325COMPACT_RTF = NO
1326
1327# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
1328# will contain hyperlink fields. The RTF file will
1329# contain links (just like the HTML output) instead of page references.
1330# This makes the output suitable for online browsing using WORD or other
1331# programs which support those fields.
1332# Note: wordpad (write) and others do not support links.
1333
1334RTF_HYPERLINKS = NO
1335
1336# Load style sheet definitions from file. Syntax is similar to doxygen's
1337# config file, i.e. a series of assignments. You only have to provide
1338# replacements, missing definitions are set to their default value.
1339
1340RTF_STYLESHEET_FILE =
1341
1342# Set optional variables used in the generation of an rtf document.
1343# Syntax is similar to doxygen's config file.
1344
1345RTF_EXTENSIONS_FILE =
1346
1347#---------------------------------------------------------------------------
1348# configuration options related to the man page output
1349#---------------------------------------------------------------------------
1350
1351# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
1352# generate man pages
1353
1354GENERATE_MAN = NO
1355
1356# The MAN_OUTPUT tag is used to specify where the man pages will be put.
1357# If a relative path is entered the value of OUTPUT_DIRECTORY will be
1358# put in front of it. If left blank `man' will be used as the default path.
1359
1360MAN_OUTPUT = man
1361
1362# The MAN_EXTENSION tag determines the extension that is added to
1363# the generated man pages (default is the subroutine's section .3)
1364
1365MAN_EXTENSION = .3
1366
1367# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
1368# then it will generate one additional man file for each entity
1369# documented in the real man page(s). These additional files
1370# only source the real man page, but without them the man command
1371# would be unable to find the correct page. The default is NO.
1372
1373MAN_LINKS = NO
1374
1375#---------------------------------------------------------------------------
1376# configuration options related to the XML output
1377#---------------------------------------------------------------------------
1378
1379# If the GENERATE_XML tag is set to YES Doxygen will
1380# generate an XML file that captures the structure of
1381# the code including all documentation.
1382
1383GENERATE_XML = NO
1384
1385# The XML_OUTPUT tag is used to specify where the XML pages will be put.
1386# If a relative path is entered the value of OUTPUT_DIRECTORY will be
1387# put in front of it. If left blank `xml' will be used as the default path.
1388
1389XML_OUTPUT = xml
1390
1391# The XML_SCHEMA tag can be used to specify an XML schema,
1392# which can be used by a validating XML parser to check the
1393# syntax of the XML files.
1394
1395XML_SCHEMA =
1396
1397# The XML_DTD tag can be used to specify an XML DTD,
1398# which can be used by a validating XML parser to check the
1399# syntax of the XML files.
1400
1401XML_DTD =
1402
1403# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
1404# dump the program listings (including syntax highlighting
1405# and cross-referencing information) to the XML output. Note that
1406# enabling this will significantly increase the size of the XML output.
1407
1408XML_PROGRAMLISTING = YES
1409
1410#---------------------------------------------------------------------------
1411# configuration options for the AutoGen Definitions output
1412#---------------------------------------------------------------------------
1413
1414# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
1415# generate an AutoGen Definitions (see autogen.sf.net) file
1416# that captures the structure of the code including all
1417# documentation. Note that this feature is still experimental
1418# and incomplete at the moment.
1419
1420GENERATE_AUTOGEN_DEF = NO
1421
1422#---------------------------------------------------------------------------
1423# configuration options related to the Perl module output
1424#---------------------------------------------------------------------------
1425
1426# If the GENERATE_PERLMOD tag is set to YES Doxygen will
1427# generate a Perl module file that captures the structure of
1428# the code including all documentation. Note that this
1429# feature is still experimental and incomplete at the
1430# moment.
1431
1432GENERATE_PERLMOD = NO
1433
1434# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
1435# the necessary Makefile rules, Perl scripts and LaTeX code to be able
1436# to generate PDF and DVI output from the Perl module output.
1437
1438PERLMOD_LATEX = NO
1439
1440# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
1441# nicely formatted so it can be parsed by a human reader.
1442# This is useful
1443# if you want to understand what is going on.
1444# On the other hand, if this
1445# tag is set to NO the size of the Perl module output will be much smaller
1446# and Perl will parse it just the same.
1447
1448PERLMOD_PRETTY = YES
1449
1450# The names of the make variables in the generated doxyrules.make file
1451# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
1452# This is useful so different doxyrules.make files included by the same
1453# Makefile don't overwrite each other's variables.
1454
1455PERLMOD_MAKEVAR_PREFIX =
1456
1457#---------------------------------------------------------------------------
1458# Configuration options related to the preprocessor
1459#---------------------------------------------------------------------------
1460
1461# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
1462# evaluate all C-preprocessor directives found in the sources and include
1463# files.
1464
1465ENABLE_PREPROCESSING = YES
1466
1467# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
1468# names in the source code. If set to NO (the default) only conditional
1469# compilation will be performed. Macro expansion can be done in a controlled
1470# way by setting EXPAND_ONLY_PREDEF to YES.
1471
1472MACRO_EXPANSION = NO
1473
1474# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
1475# then the macro expansion is limited to the macros specified with the
1476# PREDEFINED and EXPAND_AS_DEFINED tags.
1477
1478EXPAND_ONLY_PREDEF = NO
1479
1480# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
1481# pointed to by INCLUDE_PATH will be searched when a #include is found.
1482
1483SEARCH_INCLUDES = YES
1484
1485# The INCLUDE_PATH tag can be used to specify one or more directories that
1486# contain include files that are not input files but should be processed by
1487# the preprocessor.
1488
1489INCLUDE_PATH =
1490
1491# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
1492# patterns (like *.h and *.hpp) to filter out the header-files in the
1493# directories. If left blank, the patterns specified with FILE_PATTERNS will
1494# be used.
1495
1496INCLUDE_FILE_PATTERNS =
1497
1498# The PREDEFINED tag can be used to specify one or more macro names that
1499# are defined before the preprocessor is started (similar to the -D option of
1500# gcc). The argument of the tag is a list of macros of the form: name
1501# or name=definition (no spaces). If the definition and the = are
1502# omitted =1 is assumed. To prevent a macro definition from being
1503# undefined via #undef or recursively expanded use the := operator
1504# instead of the = operator.
1505
1506PREDEFINED =
1507
1508# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
1509# this tag can be used to specify a list of macro names that should be expanded.
1510# The macro definition that is found in the sources will be used.
1511# Use the PREDEFINED tag if you want to use a different macro definition that
1512# overrules the definition found in the source code.
1513
1514EXPAND_AS_DEFINED =
1515
1516# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
1517# doxygen's preprocessor will remove all references to function-like macros
1518# that are alone on a line, have an all uppercase name, and do not end with a
1519# semicolon, because these will confuse the parser if not removed.
1520
1521SKIP_FUNCTION_MACROS = YES
1522
1523#---------------------------------------------------------------------------
1524# Configuration::additions related to external references
1525#---------------------------------------------------------------------------
1526
1527# The TAGFILES option can be used to specify one or more tagfiles.
1528# Optionally an initial location of the external documentation
1529# can be added for each tagfile. The format of a tag file without
1530# this location is as follows:
1531#
1532# TAGFILES = file1 file2 ...
1533# Adding location for the tag files is done as follows:
1534#
1535# TAGFILES = file1=loc1 "file2 = loc2" ...
1536# where "loc1" and "loc2" can be relative or absolute paths or
1537# URLs. If a location is present for each tag, the installdox tool
1538# does not have to be run to correct the links.
1539# Note that each tag file must have a unique name
1540# (where the name does NOT include the path)
1541# If a tag file is not located in the directory in which doxygen
1542# is run, you must also specify the path to the tagfile here.
1543
1544TAGFILES =
1545
1546# When a file name is specified after GENERATE_TAGFILE, doxygen will create
1547# a tag file that is based on the input files it reads.
1548
1549GENERATE_TAGFILE =
1550
1551# If the ALLEXTERNALS tag is set to YES all external classes will be listed
1552# in the class index. If set to NO only the inherited external classes
1553# will be listed.
1554
1555ALLEXTERNALS = NO
1556
1557# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
1558# in the modules index. If set to NO, only the current project's groups will
1559# be listed.
1560
1561EXTERNAL_GROUPS = YES
1562
1563# The PERL_PATH should be the absolute path and name of the perl script
1564# interpreter (i.e. the result of `which perl').
1565
1566PERL_PATH = /usr/bin/perl
1567
1568#---------------------------------------------------------------------------
1569# Configuration options related to the dot tool
1570#---------------------------------------------------------------------------
1571
1572# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
1573# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
1574# or super classes. Setting the tag to NO turns the diagrams off. Note that
1575# this option also works with HAVE_DOT disabled, but it is recommended to
1576# install and use dot, since it yields more powerful graphs.
1577
1578CLASS_DIAGRAMS = YES
1579
1580# You can define message sequence charts within doxygen comments using the \msc
1581# command. Doxygen will then run the mscgen tool (see
1582# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
1583# documentation. The MSCGEN_PATH tag allows you to specify the directory where
1584# the mscgen tool resides. If left empty the tool is assumed to be found in the
1585# default search path.
1586
1587MSCGEN_PATH =
1588
1589# If set to YES, the inheritance and collaboration graphs will hide
1590# inheritance and usage relations if the target is undocumented
1591# or is not a class.
1592
1593HIDE_UNDOC_RELATIONS = YES
1594
1595# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
1596# available from the path. This tool is part of Graphviz, a graph visualization
1597# toolkit from AT&T and Lucent Bell Labs. The other options in this section
1598# have no effect if this option is set to NO (the default)
1599
1600HAVE_DOT = NO
1601
1602# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is
1603# allowed to run in parallel. When set to 0 (the default) doxygen will
1604# base this on the number of processors available in the system. You can set it
1605# explicitly to a value larger than 0 to get control over the balance
1606# between CPU load and processing speed.
1607
1608DOT_NUM_THREADS = 0
1609
1610# By default doxygen will use the Helvetica font for all dot files that
1611# doxygen generates. When you want a differently looking font you can specify
1612# the font name using DOT_FONTNAME. You need to make sure dot is able to find
1613# the font, which can be done by putting it in a standard location or by setting
1614# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the
1615# directory containing the font.
1616
1617DOT_FONTNAME = Helvetica
1618
1619# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
1620# The default size is 10pt.
1621
1622DOT_FONTSIZE = 10
1623
1624# By default doxygen will tell dot to use the Helvetica font.
1625# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to
1626# set the path where dot can find it.
1627
1628DOT_FONTPATH =
1629
1630# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
1631# will generate a graph for each documented class showing the direct and
1632# indirect inheritance relations. Setting this tag to YES will force the
1633# CLASS_DIAGRAMS tag to NO.
1634
1635CLASS_GRAPH = YES
1636
1637# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
1638# will generate a graph for each documented class showing the direct and
1639# indirect implementation dependencies (inheritance, containment, and
1640# class references variables) of the class with other documented classes.
1641
1642COLLABORATION_GRAPH = YES
1643
1644# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
1645# will generate a graph for groups, showing the direct groups dependencies
1646
1647GROUP_GRAPHS = YES
1648
1649# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
1650# collaboration diagrams in a style similar to the OMG's Unified Modeling
1651# Language.
1652
1653UML_LOOK = NO
1654
1655# If set to YES, the inheritance and collaboration graphs will show the
1656# relations between templates and their instances.
1657
1658TEMPLATE_RELATIONS = NO
1659
1660# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
1661# tags are set to YES then doxygen will generate a graph for each documented
1662# file showing the direct and indirect include dependencies of the file with
1663# other documented files.
1664
1665INCLUDE_GRAPH = YES
1666
1667# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
1668# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
1669# documented header file showing the documented files that directly or
1670# indirectly include this file.
1671
1672INCLUDED_BY_GRAPH = YES
1673
1674# If the CALL_GRAPH and HAVE_DOT options are set to YES then
1675# doxygen will generate a call dependency graph for every global function
1676# or class method. Note that enabling this option will significantly increase
1677# the time of a run. So in most cases it will be better to enable call graphs
1678# for selected functions only using the \callgraph command.
1679
1680CALL_GRAPH = NO
1681
1682# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
1683# doxygen will generate a caller dependency graph for every global function
1684# or class method. Note that enabling this option will significantly increase
1685# the time of a run. So in most cases it will be better to enable caller
1686# graphs for selected functions only using the \callergraph command.
1687
1688CALLER_GRAPH = NO
1689
1690# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
1691# will generate a graphical hierarchy of all classes instead of a textual one.
1692
1693GRAPHICAL_HIERARCHY = YES
1694
1695# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
1696# then doxygen will show the dependencies a directory has on other directories
1697# in a graphical way. The dependency relations are determined by the #include
1698# relations between the files in the directories.
1699
1700DIRECTORY_GRAPH = YES
1701
1702# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
1703# generated by dot. Possible values are svg, png, jpg, or gif.
1704# If left blank png will be used. If you choose svg you need to set
1705# HTML_FILE_EXTENSION to xhtml in order to make the SVG files
1706# visible in IE 9+ (other browsers do not have this requirement).
1707
1708DOT_IMAGE_FORMAT = png
1709
1710# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
1711# enable generation of interactive SVG images that allow zooming and panning.
1712# Note that this requires a modern browser other than Internet Explorer.
1713# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you
1714# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files
1715# visible. Older versions of IE do not have SVG support.
1716
1717INTERACTIVE_SVG = NO
1718
1719# The tag DOT_PATH can be used to specify the path where the dot tool can be
1720# found. If left blank, it is assumed the dot tool can be found in the path.
1721
1722DOT_PATH =
1723
1724# The DOTFILE_DIRS tag can be used to specify one or more directories that
1725# contain dot files that are included in the documentation (see the
1726# \dotfile command).
1727
1728DOTFILE_DIRS =
1729
1730# The MSCFILE_DIRS tag can be used to specify one or more directories that
1731# contain msc files that are included in the documentation (see the
1732# \mscfile command).
1733
1734MSCFILE_DIRS =
1735
1736# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
1737# nodes that will be shown in the graph. If the number of nodes in a graph
1738# becomes larger than this value, doxygen will truncate the graph, which is
1739# visualized by representing a node as a red box. Note that doxygen if the
1740# number of direct children of the root node in a graph is already larger than
1741# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
1742# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
1743
1744DOT_GRAPH_MAX_NODES = 50
1745
1746# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
1747# graphs generated by dot. A depth value of 3 means that only nodes reachable
1748# from the root by following a path via at most 3 edges will be shown. Nodes
1749# that lay further from the root node will be omitted. Note that setting this
1750# option to 1 or 2 may greatly reduce the computation time needed for large
1751# code bases. Also note that the size of a graph can be further restricted by
1752# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
1753
1754MAX_DOT_GRAPH_DEPTH = 0
1755
1756# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
1757# background. This is disabled by default, because dot on Windows does not
1758# seem to support this out of the box. Warning: Depending on the platform used,
1759# enabling this option may lead to badly anti-aliased labels on the edges of
1760# a graph (i.e. they become hard to read).
1761
1762DOT_TRANSPARENT = NO
1763
1764# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
1765# files in one run (i.e. multiple -o and -T options on the command line). This
1766# makes dot run faster, but since only newer versions of dot (>1.8.10)
1767# support this, this feature is disabled by default.
1768
1769DOT_MULTI_TARGETS = YES
1770
1771# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
1772# generate a legend page explaining the meaning of the various boxes and
1773# arrows in the dot generated graphs.
1774
1775GENERATE_LEGEND = YES
1776
1777# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
1778# remove the intermediate dot files that are used to generate
1779# the various graphs.
1780
1781DOT_CLEANUP = YES
diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox
new file mode 100644
index 0000000..638242e
--- /dev/null
+++ b/docs/doxygen/mainpage.dox
@@ -0,0 +1,25 @@
1/*
2 * This is a little header file which doxygen parses to generate the main
3 * documentation page
4 */
5
6/*! \mainpage LINALG User's Guide
7 *
8 * @section introduction Introduction
9 * LINALG is TI's Linear Algebra Library, supporting two types of TI platforms:
10 * - ARM+DSP platforms such as AM57xx and Keystone II devices
11 * - DSP-only platforms such as Keystone I devices
12 *
13 * LINALG includes BLAS and LAPACK:
14 * - BLAS is based on BLIS (https://github.com/flame/blis) and available on both ARM+DSP and DSP-only plaforms.
15 * - LAPACK is based on CLAPACK 3.2.1 (http://www.netlib.org/clapack/) and available only on ARM+DSP platforms.
16 *
17 * @section linalgapi LINALG API
18 * For ARM+DSP library, LINALG API is the standard CBLAS and CLAPACK API, accessible through ARM (host).
19 * For DSP-only library, LINALG API includes the standard CBLAS API as well as TI CBLAS API extension.
20 *
21 * - CBLAS API: http://www.netlib.org/blas/#_cblas
22 * - CLAPACK API: http://www.netlib.org/clapack/
23 * - @ref ti_cblas_api
24 *
25 */
diff --git a/docs/linalg_user_guide.html b/docs/linalg_user_guide.html
new file mode 120000
index 0000000..2fbd5ff
--- /dev/null
+++ b/docs/linalg_user_guide.html
@@ -0,0 +1 @@
doxygen/html/index.html \ No newline at end of file
diff --git a/examples/make.inc b/examples/make.inc
index d1a70af..e4d7634 100644
--- a/examples/make.inc
+++ b/examples/make.inc
@@ -20,7 +20,8 @@ CFLAGS = -g -O2 -I$(TARGET_ROOTDIR)/usr/include -I$(LINALG_DIR)/include
20 20
21LIB_DIR = $(LINALG_DIR)/lib/ 21LIB_DIR = $(LINALG_DIR)/lib/
22LD_FLAGS=-L$(TARGET_ROOTDIR)/lib -L$(TARGET_ROOTDIR)/usr/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/usr/lib 22LD_FLAGS=-L$(TARGET_ROOTDIR)/lib -L$(TARGET_ROOTDIR)/usr/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/usr/lib
23BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread 23#BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread
24BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread
24LAPACKLIB = $(LIB_DIR)libcblaswr.a $(LIB_DIR)liblapack.a $(LIB_DIR)libf2c.a $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread 25LAPACKLIB = $(LIB_DIR)libcblaswr.a $(LIB_DIR)liblapack.a $(LIB_DIR)libf2c.a $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread
25 26
26%.o: %.c 27%.o: %.c
diff --git a/examples/matmpy/main.c b/examples/matmpy/main.c
index 94f6558..3063ec8 100644
--- a/examples/matmpy/main.c
+++ b/examples/matmpy/main.c
@@ -84,7 +84,7 @@ int main()
84 int t; 84 int t;
85 double checksum; 85 double checksum;
86 char *ti_cblas_offload_env; 86 char *ti_cblas_offload_env;
87 int numtests = 1; 87 int numtests = 10;
88 88
89 /* configuration */ 89 /* configuration */
90 m = k = n = 1000; 90 m = k = n = 1000;
diff --git a/ticblas/src/ticblas.c b/ticblas/src/ticblas.c
index 5543557..55dfe2e 100644
--- a/ticblas/src/ticblas.c
+++ b/ticblas/src/ticblas.c
@@ -121,14 +121,14 @@ int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size,
121 ||((mem_medium_base == NULL) || (mem_medium_size < BLAS_MEM_SIZE_MEDIUM) ) 121 ||((mem_medium_base == NULL) || (mem_medium_size < BLAS_MEM_SIZE_MEDIUM) )
122 ||((mem_slow_base == NULL) || (mem_slow_size < BLAS_MEM_SIZE_SLOW) ) 122 ||((mem_slow_base == NULL) || (mem_slow_size < BLAS_MEM_SIZE_SLOW) )
123 ) { 123 ) {
124 return(TICBLAS_INIT_ERROR); 124 return(TICBLAS_ERROR);
125 } 125 }
126 else { 126 else {
127 lib_smem_vinit(blas_mem_handle, mem_vfast_base, mem_vfast_size); 127 lib_smem_vinit(blas_mem_handle, mem_vfast_base, mem_vfast_size);
128 lib_smem_finit(blas_mem_handle, mem_fast_base, mem_fast_size); 128 lib_smem_finit(blas_mem_handle, mem_fast_base, mem_fast_size);
129 lib_smem_minit(blas_mem_handle, mem_medium_base, mem_medium_size); 129 lib_smem_minit(blas_mem_handle, mem_medium_base, mem_medium_size);
130 lib_smem_sinit(blas_mem_handle, mem_slow_base, mem_slow_size); 130 lib_smem_sinit(blas_mem_handle, mem_slow_base, mem_slow_size);
131 131
132 pool_mk_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE); 132 pool_mk_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
133 pool_kn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE); 133 pool_kn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
134 pool_mn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE); 134 pool_mn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
@@ -141,36 +141,42 @@ int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size,
141 pool_kn_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE); 141 pool_kn_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
142 pool_mn_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE); 142 pool_mn_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
143 143
144 if( (pool_mk_mem_L1 == NULL) 144 if( (pool_mk_mem_L1 == NULL)
145 ||(pool_kn_mem_L1 == NULL) 145 ||(pool_kn_mem_L1 == NULL)
146 ||(pool_mn_mem_L1 == NULL) 146 ||(pool_mn_mem_L1 == NULL)
147 ||(pool_mk_mem_L2 == NULL) 147 ||(pool_mk_mem_L2 == NULL)
148 ||(pool_kn_mem_L2 == NULL) 148 ||(pool_kn_mem_L2 == NULL)
149 ||(pool_mn_mem_L2 == NULL) 149 ||(pool_mn_mem_L2 == NULL)
150 ||(pool_mk_mem_L3 == NULL) 150 ||(pool_mk_mem_L3 == NULL)
151 ||(pool_kn_mem_L3 == NULL) 151 ||(pool_kn_mem_L3 == NULL)
152 ||(pool_mn_mem_L3 == NULL) ) { 152 ||(pool_mn_mem_L3 == NULL) ) {
153 return(TICBLAS_INIT_ERROR); 153 return(TICBLAS_ERROR);
154 } 154 }
155 else { 155 else {
156 bli_mem_init(); 156 bli_mem_init();
157 return(TICBLAS_SUCCESS); 157 return(TICBLAS_SUCCESS);
158 } 158 }
159 } 159 }
160} /* tiCblasInit */ 160} /* tiCblasInit */
161 161
162int tiCblasNew() 162int tiCblasNew()
163{ 163{
164 bli_init(); 164 if(bli_init() == BLIS_SUCCESS) {
165 165 return(TICBLAS_SUCCESS);
166 return(TICBLAS_SUCCESS); 166 }
167 else {
168 return(TICBLAS_ERROR);
169 }
167} 170}
168 171
169int tiCblasDelete() 172int tiCblasDelete()
170{ 173{
171 bli_finalize(); 174 if(bli_finalize() == BLIS_SUCCESS) {
172 175 return(TICBLAS_SUCCESS);
173 return(TICBLAS_SUCCESS); 176 }
177 else {
178 return(TICBLAS_ERROR);
179 }
174} 180}
175 181
176/* Nothing after this line */ 182/* Nothing after this line */
diff --git a/ticblas/ticblas.h b/ticblas/ticblas.h
index 2dff96d..611118d 100644
--- a/ticblas/ticblas.h
+++ b/ticblas/ticblas.h
@@ -25,20 +25,84 @@
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE. 26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/ 27 *****************************************************************************/
28#ifndef TICBLAS_H 28#ifndef _TICBLAS_H
29#define TICBLAS_H 29#define _TICBLAS_H
30 30
31#include <stddef.h> 31#include <stddef.h>
32 32
33#define TICBLAS_SUCCESS (0) 33/** @defgroup ti_cblas_api CBLAS API Extension for TI-DSP
34#define TICBLAS_INIT_ERROR (-1) 34 * @{
35 */
36/** @} */
35 37
38/** @addtogroup ti_cblas_api
39 * @{
40 * @name Error Return Codes
41 */
42/*@{*/
43#define TICBLAS_SUCCESS (0) /**< Success. No error. */
44#define TICBLAS_ERROR (-1) /**< Failure. */
45/*@}*/
46/** @} */
47
48/**
49 * @ingroup ti_cblas_api
50 * @brief Function tiCblasGetSizes() returns the required size of each of the
51 * memory types defined by the Library Architecture and Framework
52 * (LibArch)
53 *
54 * @param[out] smem_size_vfast size of very fast shared memory
55 * @param[out] smem_size_fast size of fast shared memory
56 * @param[out] smem_size_medium size of medium speed shared memory
57 * @param[out] smem_size_slow size of slow shared memory
58 *
59 */
36void tiCblasGetSizes(size_t *smem_size_vfast, size_t *smem_size_fast, 60void tiCblasGetSizes(size_t *smem_size_vfast, size_t *smem_size_fast,
37 size_t *smem_size_medium, size_t *smem_size_slow); 61 size_t *smem_size_medium, size_t *smem_size_slow);
38 62
63/**
64 * @ingroup ti_cblas_api
65 * @brief Function tiCblasNew() creates an instance for CBLAS.
66 *
67 * @remarks tiCblasNew() MUST be called before tiCblasInit().
68 *
69 * @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS
70 * @retval TICBLAS_ERROR @copydoc TICBLAS_ERROR
71 */
72int tiCblasNew();
73
74/**
75 * @ingroup ti_cblas_api
76 * @brief Function tiCblasInit() performs heap initialization for CBLAS
77 * to do memory allocations.
78 *
79 * @remarks tiCblasInit() must NOT be called before tiCblasNew().
80 *
81 * @param[in] mem_vfast_base base of very fast shared memory
82 * @param[in] mem_vfast_size size of very fast shared memory
83 * @param[in] mem_fast_base base of fast shared memory
84 * @param[in] mem_fast_size size of fast shared memory
85 * @param[in] mem_medium_base base of medium speed shared memory
86 * @param[in] mem_medium_size size of medium speed shared memory
87 * @param[in] mem_slow_base base of slow shared memory
88 * @param[in] mem_slow_size size of slow shared memory
89 *
90 * @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS
91 * @retval TICBLAS_ERROR @copydoc TICBLAS_ERROR
92 */
39int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size, 93int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size,
40 void * mem_fast_base, size_t mem_fast_size, 94 void * mem_fast_base, size_t mem_fast_size,
41 void * mem_medium_base, size_t mem_medium_size, 95 void * mem_medium_base, size_t mem_medium_size,
42 void * mem_slow_base, size_t mem_slow_size); 96 void * mem_slow_base, size_t mem_slow_size);
43 97
44#endif /* TICBLAS_H */ 98/**
99 * @ingroup ti_cblas_api
100 * @brief Function tiCblasDelete() deletes the instance of CBLAS created by
101 * tiCblasNew().
102 *
103 * @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS
104 * @retval TICBLAS_ERROR @copydoc TICBLAS_ERROR
105 */
106int tiCblasDelete();
107
108#endif /* _TICBLAS_H */