From: Jianzhong Xu Date: Fri, 5 Feb 2016 22:22:07 +0000 (+0000) Subject: 1. Added time(latency) to BLIS benchmarking raw data. 2. Combined libblis.a and libcb... X-Git-Tag: DEV.LINALG.1.2.0.0~15 X-Git-Url: https://git.ti.com/gitweb?p=dense-linear-algebra-libraries%2Flinalg.git;a=commitdiff_plain;h=7ac1b6ebdf9323256e7b65c617b0589043f70c61 1. Added time(latency) to BLIS benchmarking raw data. 2. Combined libblis.a and libcblas_armplusdsp.a. 3. Added doxygen documentation. --- diff --git a/Makefile b/Makefile index b03b973..c8d56f4 100644 --- a/Makefile +++ b/Makefile @@ -27,9 +27,9 @@ DSPlibs: cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); make install; \ cd ../$(LINALG_TICBLAS_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); cd ../lib; \ echo "combining BLIS, CBLAS, and TICBLAS libraries into one: libcblas.ae66"; \ - mkdir -p objs; cd objs; rm *; ar x ../../../blis/install/$(BLIS_CFG)/lib/libblis.ae66; mmv 'cblas*.o' 'blis_cblas#1.o'; \ + mkdir -p objs; cd objs; rm -f *; ar x ../../../blis/install/$(BLIS_CFG)/lib/libblis.ae66; mmv 'cblas*.o' 'blis_cblas#1.o'; \ ar -x ../../../cblas/lib/C66/libcblas.ae66; ar -x ../libticblas.ae66; chmod +rw *;cd ../../..; \ - mkdir -p lib; cd lib; rm *; ar -cr libcblas.ae66 ../ticblas/lib/objs/*; cd .. + mkdir -p lib; cd lib; rm -f *; ar -cr libcblas.ae66 ../ticblas/lib/objs/*; cd .. ARMlibs: cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; \ @@ -38,8 +38,11 @@ ARMlibs: ARMplusDSP: DSPlibs ARMlibs cd $(LINALG_BLASACC_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET); cd ../..; \ - cp $(LINALG_BLASACC_DIR)/lib/libcblas_armplusdsp.a ./lib; \ - cp $(LINALG_BLIS_DIR)/install/arm/lib/libblis.a ./lib; \ + cd lib; rm -f *; \ + cp ../$(LINALG_BLASACC_DIR)/lib/libcblas_armplusdsp.a .; \ + cp ../$(LINALG_BLIS_DIR)/install/arm/lib/libblis.a .; \ + ar -x libblis.a; mmv "cblas_*.o" "blis_cblas_#1.o"; ar -x libcblas_armplusdsp.a; rm *.a; \ + ar -cr libcblas_armplusdsp.a *.o; rm *.o; cd ..; \ cp $(LINALG_CLAPACK_DIR)/lapack_ARM.a ./lib/liblapack.a; \ cp $(LINALG_CLAPACK_DIR)/libcblaswr_ARM.a ./lib/libcblaswr.a; \ cp $(LINALG_CLAPACK_DIR)/F2CLIBS/libf2c_ARM.a ./lib/libf2c.a @@ -86,7 +89,6 @@ installARMplusDSPlib: install -m 755 -d ${DESTDIR}/lib cp $(CBLAS_HEADERS) ${DESTDIR}/include cp $(CLAPACK_HEADERS) ${DESTDIR}/include - cp ./lib/libblis.a ${DESTDIR}/lib cp ./lib/libcblas_armplusdsp.a ${DESTDIR}/lib cp ./lib/liblapack.a ${DESTDIR}/lib cp ./lib/libcblaswr.a ${DESTDIR}/lib diff --git a/blasblisacc/src/ti_cblas_mem_config.c b/blasblisacc/src/ti_cblas_mem_config.c index 7b9c5ab..2c72d94 100644 --- a/blasblisacc/src/ti_cblas_mem_config.c +++ b/blasblisacc/src/ti_cblas_mem_config.c @@ -61,7 +61,7 @@ int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_ ||(smem_size_med > msmc_buf_size) /* provided MSMC memory */ ||(smem_size_slow > ddr_buf_size) /* provided DDR memory */ ) { - return(TICBLAS_INIT_ERROR); + return(TICBLAS_ERROR); } /* Configure L1D if necessary */ @@ -105,7 +105,7 @@ int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_ } if(l1d_cfg_err || l2_cfg_err) { - return(TICBLAS_INIT_ERROR); + return(TICBLAS_ERROR); } #ifdef TI_CBLAS_DEBUG @@ -179,7 +179,9 @@ int bli_l3_mem_reconfig(size_t l1D_SRAM_size_orig, size_t l2_SRAM_size_orig) return(TICBLAS_SUCCESS); } /* bli_l3_mem_reconfig */ -/* This function will be removed. Function tiCblasNew() will be used instead. */ +/*============================================================================== + * This function initializes BLIS before first CBLAS call is made. + *============================================================================*/ void ti_bli_init_dsp(char *l3_buf, char *l2_buf) { #ifdef TI_CBLAS_DEBUG @@ -189,15 +191,19 @@ void ti_bli_init_dsp(char *l3_buf, char *l2_buf) printf("Before calling bli_init, malloc_size is %d.\n", malloc_size); #endif - bli_init(); + tiCblasNew(); #ifdef TI_CBLAS_DEBUG printf("After calling bli_init, malloc_size is %d.\n", malloc_size); #endif } -/* This function will be removed. Function tiCblasDelete() will be used instead. */ +/*============================================================================== + * This function frees all memories allocated by ti_bli_init_dsp. + *============================================================================*/ void ti_bli_finalize_dsp(void) { - bli_finalize(); + tiCblasDelete(); } + +/* Nothing after this line */ diff --git a/blis/testsuite/parselog.pl b/blis/testsuite/parselog.pl new file mode 100755 index 0000000..e52e2a9 --- /dev/null +++ b/blis/testsuite/parselog.pl @@ -0,0 +1,15 @@ +#!/usr/bin/perl -sw + +my $input_file = $ARGV[0]; +my $output_file = $ARGV[1]; +open( my $fh_in, '<', $input_file ) or die "Can't open $input_file: $!"; +open( my $fh_out, '>', $output_file); + +while ( my $line = <$fh_in> ) { + if ( $line =~ /blis_/ ) { + print $fh_out $line; + } +} + +close $fh_in; +close $fh_out \ No newline at end of file diff --git a/blis/testsuite/src/test_addm.c b/blis/testsuite/src/test_addm.c index da27a3a..cd6bd16 100644 --- a/blis/testsuite/src/test_addm.c +++ b/blis/testsuite/src/test_addm.c @@ -56,7 +56,7 @@ void libblis_test_addm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_addm_impl( iface_t iface, @@ -115,7 +115,7 @@ void libblis_test_addm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { double time_min = 1e9; @@ -168,8 +168,9 @@ void libblis_test_addm_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. libblis_test_addm_check( &alpha, &beta, &x, &y, resid ); diff --git a/blis/testsuite/src/test_addv.c b/blis/testsuite/src/test_addv.c index b890994..afa54dc 100644 --- a/blis/testsuite/src/test_addv.c +++ b/blis/testsuite/src/test_addv.c @@ -56,7 +56,7 @@ void libblis_test_addv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_addv_impl( iface_t iface, @@ -114,7 +114,7 @@ void libblis_test_addv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { double time_min = 1e9; @@ -164,9 +164,10 @@ void libblis_test_addv_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; - + perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; + // Perform checks. libblis_test_addv_check( &alpha, &beta, &x, &y, resid ); diff --git a/blis/testsuite/src/test_axpy2v.c b/blis/testsuite/src/test_axpy2v.c index a622a1b..155e2d4 100644 --- a/blis/testsuite/src/test_axpy2v.c +++ b/blis/testsuite/src/test_axpy2v.c @@ -56,7 +56,7 @@ void libblis_test_axpy2v_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_axpy2v_impl( iface_t iface, @@ -124,7 +124,7 @@ void libblis_test_axpy2v_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -193,8 +193,9 @@ void libblis_test_axpy2v_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( z ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( z ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_axpy2v_check( &alpha1, &alpha2, &x, &y, &z, &z_save, resid ); diff --git a/blis/testsuite/src/test_axpyf.c b/blis/testsuite/src/test_axpyf.c index e85defc..6968708 100644 --- a/blis/testsuite/src/test_axpyf.c +++ b/blis/testsuite/src/test_axpyf.c @@ -56,7 +56,7 @@ void libblis_test_axpyf_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_axpyf_impl( iface_t iface, @@ -122,7 +122,7 @@ void libblis_test_axpyf_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -196,8 +196,9 @@ void libblis_test_axpyf_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_axpyf_check( &alpha, &a, &x, &y, &y_save, resid ); diff --git a/blis/testsuite/src/test_axpym.c b/blis/testsuite/src/test_axpym.c index da5124b..c1d2c04 100644 --- a/blis/testsuite/src/test_axpym.c +++ b/blis/testsuite/src/test_axpym.c @@ -56,7 +56,7 @@ void libblis_test_axpym_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_axpym_impl( iface_t iface, @@ -120,7 +120,7 @@ void libblis_test_axpym_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -182,8 +182,9 @@ void libblis_test_axpym_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_axpym_check( &alpha, &x, &y, &y_save, resid ); diff --git a/blis/testsuite/src/test_axpyv.c b/blis/testsuite/src/test_axpyv.c index ee237cf..76ff570 100644 --- a/blis/testsuite/src/test_axpyv.c +++ b/blis/testsuite/src/test_axpyv.c @@ -56,7 +56,7 @@ void libblis_test_axpyv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_axpyv_impl( iface_t iface, @@ -120,7 +120,7 @@ void libblis_test_axpyv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -233,13 +233,14 @@ void libblis_test_axpyv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m )*test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m )*test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_copym.c b/blis/testsuite/src/test_copym.c index 66a7bbd..3a01242 100644 --- a/blis/testsuite/src/test_copym.c +++ b/blis/testsuite/src/test_copym.c @@ -56,7 +56,7 @@ void libblis_test_copym_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_copym_impl( iface_t iface, @@ -114,7 +114,7 @@ void libblis_test_copym_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { double time_min = 1e9; @@ -158,8 +158,9 @@ void libblis_test_copym_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. libblis_test_copym_check( &x, &y, resid ); diff --git a/blis/testsuite/src/test_copyv.c b/blis/testsuite/src/test_copyv.c index e854d5d..fb6e2af 100644 --- a/blis/testsuite/src/test_copyv.c +++ b/blis/testsuite/src/test_copyv.c @@ -56,7 +56,7 @@ void libblis_test_copyv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_copyv_impl( iface_t iface, @@ -114,7 +114,7 @@ void libblis_test_copyv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { double time_min = 1e9; @@ -187,13 +187,14 @@ void libblis_test_copyv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 1.0 * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_dotaxpyv.c b/blis/testsuite/src/test_dotaxpyv.c index 4fa0fbb..dbce8de 100644 --- a/blis/testsuite/src/test_dotaxpyv.c +++ b/blis/testsuite/src/test_dotaxpyv.c @@ -56,7 +56,7 @@ void libblis_test_dotaxpyv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_dotaxpyv_impl( iface_t iface, @@ -126,7 +126,7 @@ void libblis_test_dotaxpyv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -213,8 +213,9 @@ void libblis_test_dotaxpyv_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( z ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m + 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( z ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_dotaxpyv_check( &alpha, &xt, &x, &y, &rho, &z, &z_save, resid ); diff --git a/blis/testsuite/src/test_dotv.c b/blis/testsuite/src/test_dotv.c index 0fac9b9..f6b7ea9 100644 --- a/blis/testsuite/src/test_dotv.c +++ b/blis/testsuite/src/test_dotv.c @@ -56,7 +56,7 @@ void libblis_test_dotv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_dotv_impl( iface_t iface, @@ -116,7 +116,7 @@ void libblis_test_dotv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -210,13 +210,14 @@ void libblis_test_dotv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) *test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m ) *test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_dotxaxpyf.c b/blis/testsuite/src/test_dotxaxpyf.c index b436147..6a7a55e 100644 --- a/blis/testsuite/src/test_dotxaxpyf.c +++ b/blis/testsuite/src/test_dotxaxpyf.c @@ -56,7 +56,7 @@ void libblis_test_dotxaxpyf_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_dotxaxpyf_impl( iface_t iface, @@ -132,7 +132,7 @@ void libblis_test_dotxaxpyf_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -225,8 +225,9 @@ void libblis_test_dotxaxpyf_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_dotxaxpyf_check( &alpha, &at, &a, &w, &x, &beta, &y, &z, &y_save, &z_save, resid ); diff --git a/blis/testsuite/src/test_dotxf.c b/blis/testsuite/src/test_dotxf.c index d9a21c4..6167182 100644 --- a/blis/testsuite/src/test_dotxf.c +++ b/blis/testsuite/src/test_dotxf.c @@ -56,7 +56,7 @@ void libblis_test_dotxf_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_dotxf_impl( iface_t iface, @@ -124,7 +124,7 @@ void libblis_test_dotxf_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -201,8 +201,9 @@ void libblis_test_dotxf_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_dotxf_check( &alpha, &a, &x, &beta, &y, &y_save, resid ); diff --git a/blis/testsuite/src/test_dotxv.c b/blis/testsuite/src/test_dotxv.c index fc1aa0a..95f6c5e 100644 --- a/blis/testsuite/src/test_dotxv.c +++ b/blis/testsuite/src/test_dotxv.c @@ -56,7 +56,7 @@ void libblis_test_dotxv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_dotxv_impl( iface_t iface, @@ -121,7 +121,7 @@ void libblis_test_dotxv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -202,8 +202,9 @@ void libblis_test_dotxv_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_dotxv_check( &alpha, &x, &y, &beta, &rho, &rho_save, resid ); diff --git a/blis/testsuite/src/test_gemm.c b/blis/testsuite/src/test_gemm.c index f384490..33ef0c8 100644 --- a/blis/testsuite/src/test_gemm.c +++ b/blis/testsuite/src/test_gemm.c @@ -55,7 +55,7 @@ void libblis_test_gemm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_gemm_impl( iface_t iface, @@ -126,7 +126,7 @@ void libblis_test_gemm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -170,8 +170,11 @@ void libblis_test_gemm_experiment( test_params_t* params, // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, sc_str[0], m, k, &a ); + //printf("Created object a, buffer address is 0x%x.\n", (unsigned int)bli_obj_buffer(a)); + libblis_test_mobj_create( params, datatype, transb, sc_str[1], k, n, &b ); + //printf("Created object b, buffer address is 0x%x.\n", (unsigned int)bli_obj_buffer(b)); #ifdef BLIS_ENABLE_MULTITHREAD_TEST for(i = 0; i < test_way; i++) @@ -187,6 +190,8 @@ void libblis_test_gemm_experiment( test_params_t* params, libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[2], m, n, &c_save ); #endif + //printf("Created object c, buffer address is 0x%x.\n", (unsigned int)bli_obj_buffer(c[0])); + //printf("Created object c_save, buffer address is 0x%x.\n", (unsigned int)bli_obj_buffer(c_save[0])); // Set alpha and beta. #ifdef BLIS_ENABLE_MULTITHREAD_TEST @@ -284,12 +289,13 @@ void libblis_test_gemm_experiment( test_params_t* params, // Estimate the performance of the best experiment repeat. #ifdef BLIS_ENABLE_MULTITHREAD_TEST - *perf = ( 2.0 * m * n * k ) / time_min * test_way / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n * k ) / time_min * test_way / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0; #else - *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; // Perform checks. #ifdef BLIS_ENABLE_MULTITHREAD_TEST @@ -432,7 +438,7 @@ void libblis_test_gemm_impl( iface_t iface, cblas_b = (float *) bli_obj_buffer( *b ); cblas_c = (float *) bli_obj_buffer( *c ); -// printf("test_gemm %d %d %d %d %d\n", order, transA, transB, lda, ldb); + //printf("test_gemm %d %d %d %d %d, 0x%x, 0x%x, 0x%x\n", order, transA, transB, lda, ldb, (unsigned int)cblas_a,(unsigned int)cblas_b,(unsigned int)cblas_c); cblas_sgemm(order, transA, transB, m, n, k, *cblas_alpha, cblas_a, lda, cblas_b, ldb, *cblas_beta, cblas_c, ldc); } @@ -447,6 +453,7 @@ void libblis_test_gemm_impl( iface_t iface, cblas_b = (double *) bli_obj_buffer( *b ); cblas_c = (double *) bli_obj_buffer( *c ); + //printf("test_gemm %d %d %d %d %d, 0x%x, 0x%x, 0x%x\n", order, transA, transB, lda, ldb, (unsigned int)cblas_a,(unsigned int)cblas_b,(unsigned int)cblas_c); cblas_dgemm(order, transA, transB, m, n, k, *cblas_alpha, cblas_a, lda, cblas_b, ldb, *cblas_beta, cblas_c, ldc); } diff --git a/blis/testsuite/src/test_gemm_ukr.c b/blis/testsuite/src/test_gemm_ukr.c index 5506bed..9cf8623 100644 --- a/blis/testsuite/src/test_gemm_ukr.c +++ b/blis/testsuite/src/test_gemm_ukr.c @@ -56,7 +56,7 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_gemm_ukr_impl( iface_t iface, @@ -131,7 +131,7 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -238,8 +238,9 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n * k ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_gemm_ukr_check( &alpha, &a, &b, &beta, &c, &c_save, resid ); diff --git a/blis/testsuite/src/test_gemmtrsm_ukr.c b/blis/testsuite/src/test_gemmtrsm_ukr.c index 87d7f1b..eab4d44 100644 --- a/blis/testsuite/src/test_gemmtrsm_ukr.c +++ b/blis/testsuite/src/test_gemmtrsm_ukr.c @@ -56,7 +56,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_gemmtrsm_ukr_impl( iface_t iface, @@ -143,7 +143,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -289,8 +289,9 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n * k + 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( b ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n * k + 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( b ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_gemmtrsm_ukr_check( side, &alpha, diff --git a/blis/testsuite/src/test_gemv.c b/blis/testsuite/src/test_gemv.c index 7d61148..f59d1d4 100644 --- a/blis/testsuite/src/test_gemv.c +++ b/blis/testsuite/src/test_gemv.c @@ -56,7 +56,7 @@ void libblis_test_gemv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_gemv_impl( iface_t iface, @@ -124,7 +124,7 @@ void libblis_test_gemv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -260,13 +260,14 @@ void libblis_test_gemv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n ) * test_way/ time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n ) * test_way/ time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_ger.c b/blis/testsuite/src/test_ger.c index d938533..2139260 100644 --- a/blis/testsuite/src/test_ger.c +++ b/blis/testsuite/src/test_ger.c @@ -56,7 +56,7 @@ void libblis_test_ger_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_ger_impl( iface_t iface, @@ -122,7 +122,7 @@ void libblis_test_ger_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -254,13 +254,14 @@ void libblis_test_ger_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; // Perform checks. #ifdef BLIS_ENABLE_MULTITHREAD_TEST diff --git a/blis/testsuite/src/test_hemm.c b/blis/testsuite/src/test_hemm.c index a77cada..1e7a528 100644 --- a/blis/testsuite/src/test_hemm.c +++ b/blis/testsuite/src/test_hemm.c @@ -56,7 +56,7 @@ void libblis_test_hemm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_hemm_impl( iface_t iface, @@ -130,7 +130,7 @@ void libblis_test_hemm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -280,13 +280,14 @@ void libblis_test_hemm_experiment( test_params_t* params, // Estimate the performance of the best experiment repeat. #ifdef BLIS_ENABLE_MULTITHREAD_TEST - *perf = ( 2.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0; #else - *perf = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; // Perform checks. #ifdef BLIS_ENABLE_MULTITHREAD_TEST diff --git a/blis/testsuite/src/test_hemv.c b/blis/testsuite/src/test_hemv.c index 682761c..7b1a447 100644 --- a/blis/testsuite/src/test_hemv.c +++ b/blis/testsuite/src/test_hemv.c @@ -56,7 +56,7 @@ void libblis_test_hemv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_hemv_impl( iface_t iface, @@ -125,7 +125,7 @@ void libblis_test_hemv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -280,13 +280,14 @@ void libblis_test_hemv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) *test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) *test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_her.c b/blis/testsuite/src/test_her.c index 592860c..660c31f 100644 --- a/blis/testsuite/src/test_her.c +++ b/blis/testsuite/src/test_her.c @@ -56,7 +56,7 @@ void libblis_test_her_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_her_impl( iface_t iface, @@ -122,7 +122,7 @@ void libblis_test_her_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -258,13 +258,14 @@ void libblis_test_her_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a[0]) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a[0]) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_her2.c b/blis/testsuite/src/test_her2.c index 7bae1e1..c77f813 100644 --- a/blis/testsuite/src/test_her2.c +++ b/blis/testsuite/src/test_her2.c @@ -56,7 +56,7 @@ void libblis_test_her2_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_her2_impl( iface_t iface, @@ -124,7 +124,7 @@ void libblis_test_her2_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -267,13 +267,14 @@ void libblis_test_her2_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_her2k.c b/blis/testsuite/src/test_her2k.c index ff60f64..e921367 100644 --- a/blis/testsuite/src/test_her2k.c +++ b/blis/testsuite/src/test_her2k.c @@ -56,7 +56,7 @@ void libblis_test_her2k_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_her2k_impl( iface_t iface, @@ -128,7 +128,7 @@ void libblis_test_her2k_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -295,15 +295,15 @@ void libblis_test_her2k_experiment( test_params_t* params, #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; #endif - + perf->time = time_min; // Perform checks. #ifdef BLIS_ENABLE_MULTITHREAD_TEST diff --git a/blis/testsuite/src/test_herk.c b/blis/testsuite/src/test_herk.c index a2e4bfc..56517fd 100644 --- a/blis/testsuite/src/test_herk.c +++ b/blis/testsuite/src/test_herk.c @@ -56,7 +56,7 @@ void libblis_test_herk_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_herk_impl( iface_t iface, @@ -126,7 +126,7 @@ void libblis_test_herk_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -290,13 +290,15 @@ void libblis_test_herk_experiment( test_params_t* params, // Estimate the performance of the best experiment repeat. #ifdef BLIS_ENABLE_MULTITHREAD_TEST - *perf = ( 1.0 * m * m * k ) * test_way/ time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m * k ) * test_way/ time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0; #else - *perf = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; + // Perform checks. #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_libblis.c b/blis/testsuite/src/test_libblis.c index 00249a5..3281c4b 100644 --- a/blis/testsuite/src/test_libblis.c +++ b/blis/testsuite/src/test_libblis.c @@ -1125,7 +1125,7 @@ void libblis_test_op_driver( test_params_t* params, char*, // pc_str (current param string) char*, // sc_str (current storage string) unsigned int, // p_cur (current problem size) - double*, // perf + perf_t*, // perf double* ) ) // residual { unsigned int n_mstorage = params->n_mstorage; @@ -1155,7 +1155,8 @@ void libblis_test_op_driver( test_params_t* params, unsigned int p_cur, pi; unsigned int dt, pci, sci, i, j, o; - double perf, resid; + perf_t perf; + double resid; char* pass_str; char blank_str[32]; char funcname_str[64]; @@ -1468,6 +1469,8 @@ void libblis_test_op_driver( test_params_t* params, n_spaces = MAX_FUNC_STRING_LENGTH - strlen( funcname_str ); fill_string_with_n_spaces( blank_str, n_spaces ); + strcat(funcname_str,blank_str); + // Print all dimensions to a single string. libblis_test_build_dims_string( op, p_cur, dims_str ); @@ -1478,35 +1481,35 @@ void libblis_test_op_driver( test_params_t* params, if ( params->output_matlab_format ) { libblis_test_fprintf( stdout, - "%s%s( %3u, 1:%u ) = [%s %7.3lf %8.2le ]; %c %s\n", - funcname_str, blank_str, pi, n_dims_print + 2, - dims_str, perf, resid, + "%s( %3u, 1:%u ) = [%s %8.2le %7.3lf %8.2le ]; %c %s\n", + funcname_str, pi, n_dims_print + 2, + dims_str, perf.time, perf.gflops, resid, OUTPUT_COMMENT_CHAR, pass_str ); // Also output to a file if requested (and successfully opened). if ( output_stream ) libblis_test_fprintf( output_stream, - "%s%s( %3u, 1:%u ) = [%s %7.3lf %8.2le ]; %c %s\n", - funcname_str, blank_str, pi, n_dims_print + 2, - dims_str, perf, resid, + "%s( %3u, 1:%u ) = [%s %8.2le %7.3lf %8.2le ]; %c %s\n", + funcname_str, pi, n_dims_print + 2, + dims_str, perf.time, perf.gflops, resid, OUTPUT_COMMENT_CHAR, pass_str ); } else { libblis_test_fprintf( stdout, - "%s%s %s %7.3lf %8.2le %s\n", - funcname_str, blank_str, - dims_str, perf, resid, + "%s %s %8.2le %7.3lf %8.2le %s\n", + funcname_str, + dims_str, perf.time, perf.gflops, resid, pass_str ); // Also output to a file if requested (and successfully opened). if ( output_stream ) libblis_test_fprintf( output_stream, - "%s%s %s %7.3lf %8.2le %s\n", - funcname_str, blank_str, - dims_str, perf, resid, + "%s %s %8.2le %7.3lf %8.2le %s\n", + funcname_str, + dims_str, perf.time, perf.gflops, resid, pass_str ); } @@ -1600,7 +1603,7 @@ void libblis_test_build_dims_string( test_op_t* op, if ( op->dimset == BLIS_TEST_DIMS_MF ) { //sprintf( &dims_str[strlen(dims_str)], " %5u %5u", - sprintf( dims_str, " %5u %5u", + sprintf( dims_str, " %5u\t %5u\t", ( unsigned int ) libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ), @@ -1609,7 +1612,7 @@ void libblis_test_build_dims_string( test_op_t* op, else if ( op->dimset == BLIS_TEST_DIMS_K ) { //sprintf( &dims_str[strlen(dims_str)], " %5u %5u %5u", - sprintf( dims_str, " %5u %5u %5u", + sprintf( dims_str, " %5u\t %5u\t %5u\t", ( unsigned int ) op->dim_aux[0], ( unsigned int ) op->dim_aux[1], ( unsigned int ) @@ -1619,7 +1622,7 @@ void libblis_test_build_dims_string( test_op_t* op, else if ( op->dimset == BLIS_TEST_NO_DIMS ) { //sprintf( &dims_str[strlen(dims_str)], " %5u %5u", - sprintf( dims_str, " %5u %5u", + sprintf( dims_str, " %5u\t %5u\t", ( unsigned int ) op->dim_aux[0], ( unsigned int ) op->dim_aux[1] ); } @@ -1630,13 +1633,40 @@ void libblis_test_build_dims_string( test_op_t* op, sprintf( dims_str, "%s", "" ); // Print all dimensions to a single string. - for ( i = 0; i < op->n_dims; ++i ) - { - sprintf( &dims_str[strlen(dims_str)], " %5u", - ( unsigned int ) - libblis_test_get_dim_from_prob_size( op->dim_spec[i], + if(op->dimset == BLIS_TEST_DIMS_MN) { + sprintf( &dims_str[strlen(dims_str)], " %5u\t", + ( unsigned int ) + libblis_test_get_dim_from_prob_size( op->dim_spec[0], + p_cur ) ); + sprintf( &dims_str[strlen(dims_str)], " %5u\t", + ( unsigned int ) + libblis_test_get_dim_from_prob_size( op->dim_spec[1], + p_cur ) ); + sprintf( &dims_str[strlen(dims_str)], " \t"); + + } + else if(op->dimset == BLIS_TEST_DIMS_MK) { + sprintf( &dims_str[strlen(dims_str)], " %5u\t", + ( unsigned int ) + libblis_test_get_dim_from_prob_size( op->dim_spec[0], + p_cur ) ); + sprintf( &dims_str[strlen(dims_str)], " \t"); + sprintf( &dims_str[strlen(dims_str)], " %5u\t", + ( unsigned int ) + libblis_test_get_dim_from_prob_size( op->dim_spec[1], + p_cur ) ); + } + else { + for ( i = 0; i < op->n_dims; ++i ) + { + sprintf( &dims_str[strlen(dims_str)], " %5u\t", + ( unsigned int ) + libblis_test_get_dim_from_prob_size( op->dim_spec[i], p_cur ) ); + } } + + } } @@ -1665,7 +1695,7 @@ void libblis_test_build_col_labels_string( test_op_t* op, char* l_str ) n_spaces = 6; fill_string_with_n_spaces( blank_str, n_spaces ); - sprintf( &l_str[strlen(l_str)], "%s", blank_str ); + sprintf( &l_str[strlen(l_str)], "%s\t", blank_str ); if ( op->dimset == BLIS_TEST_DIMS_MNK || op->dimset == BLIS_TEST_DIMS_MN || @@ -1674,21 +1704,27 @@ void libblis_test_build_col_labels_string( test_op_t* op, char* l_str ) op->dimset == BLIS_TEST_DIMS_K || op->dimset == BLIS_TEST_DIMS_MF || op->dimset == BLIS_TEST_NO_DIMS ) - sprintf( &l_str[strlen(l_str)], " %5s", "m" ); + sprintf( &l_str[strlen(l_str)], " %5s", "m\t" ); + else + sprintf( &l_str[strlen(l_str)], "\t" ); if ( op->dimset == BLIS_TEST_DIMS_MNK || op->dimset == BLIS_TEST_DIMS_MN || op->dimset == BLIS_TEST_DIMS_K || op->dimset == BLIS_TEST_DIMS_MF || op->dimset == BLIS_TEST_NO_DIMS ) - sprintf( &l_str[strlen(l_str)], " %5s", "n" ); + sprintf( &l_str[strlen(l_str)], " %5s", "n\t" ); + else + sprintf( &l_str[strlen(l_str)], "\t" ); if ( op->dimset == BLIS_TEST_DIMS_MNK || op->dimset == BLIS_TEST_DIMS_MK || op->dimset == BLIS_TEST_DIMS_K ) - sprintf( &l_str[strlen(l_str)], " %5s", "k" ); + sprintf( &l_str[strlen(l_str)], " %5s", "k\t" ); + else + sprintf( &l_str[strlen(l_str)], "\t" ); - sprintf( &l_str[strlen(l_str)], "%s", " gflops resid result" ); + sprintf( &l_str[strlen(l_str)], "%s", " \t seconds\t gflops\t resid\t result" ); } @@ -2037,7 +2073,8 @@ void libblis_test_parse_message( FILE* output_stream, char* message, va_list arg // Add the final type specifier, and null-terminate the string. format_spec[cf] = message[c]; - format_spec[cf+1] = '\0'; + format_spec[cf+1] = '\t'; + format_spec[cf+2] = '\0'; // Switch based on type, since we can't predict what will // va_args() will return. @@ -2156,11 +2193,11 @@ void libblis_test_parse_command_line( int argc, char** argv ) -void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ) +void libblis_test_check_empty_problem( obj_t* c, perf_t* perf, double* resid ) { if ( bli_obj_has_zero_dim( *c ) ) { - *perf = 0.0; + perf->gflops = 0.0; *resid = 0.0; } } diff --git a/blis/testsuite/src/test_libblis.h b/blis/testsuite/src/test_libblis.h index 0afb19c..054bae4 100644 --- a/blis/testsuite/src/test_libblis.h +++ b/blis/testsuite/src/test_libblis.h @@ -71,7 +71,7 @@ #define INPUT_BUFFER_SIZE 256 #define MAX_FILENAME_LENGTH 1000 #define MAX_BINARY_NAME_LENGTH 256 -#define MAX_FUNC_STRING_LENGTH 26 +#define MAX_FUNC_STRING_LENGTH 37 #define FLOPS_PER_UNIT_PERF 1e9 #define MAX_NUM_MSTORAGE 4 @@ -281,6 +281,12 @@ typedef struct double warnpass; } thresh_t; +typedef struct +{ + double time; + unsigned long cycles; + double gflops; +} perf_t; // // --- Prototypes -------------------------------------------------------------- @@ -350,7 +356,7 @@ void libblis_test_op_driver( test_params_t* params, char*, // pc_str (current param string) char*, // sc_str (current storage string) unsigned int, // p_cur (current problem size) - double*, // perf + perf_t*, // perf double* ) ); // residual // --- Generate experiment string labels --- @@ -411,7 +417,7 @@ void libblis_test_parse_command_line( int argc, char** argv ); // --- Miscellaneous --- -void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); +void libblis_test_check_empty_problem( obj_t* c, perf_t* perf, double* resid ); // diff --git a/blis/testsuite/src/test_normfm.c b/blis/testsuite/src/test_normfm.c index 487735c..1b08a83 100644 --- a/blis/testsuite/src/test_normfm.c +++ b/blis/testsuite/src/test_normfm.c @@ -56,7 +56,7 @@ void libblis_test_normfm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_normfm_impl( iface_t iface, @@ -113,7 +113,7 @@ void libblis_test_normfm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -162,8 +162,9 @@ void libblis_test_normfm_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. libblis_test_normfm_check( &beta, &x, &norm, resid ); diff --git a/blis/testsuite/src/test_normfv.c b/blis/testsuite/src/test_normfv.c index 9b35b9f..49fb8ee 100644 --- a/blis/testsuite/src/test_normfv.c +++ b/blis/testsuite/src/test_normfv.c @@ -56,7 +56,7 @@ void libblis_test_normfv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_normfv_impl( iface_t iface, @@ -113,7 +113,7 @@ void libblis_test_normfv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -191,13 +191,14 @@ void libblis_test_normfv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 2.0 * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_randm.c b/blis/testsuite/src/test_randm.c index cef126d..a5962cb 100644 --- a/blis/testsuite/src/test_randm.c +++ b/blis/testsuite/src/test_randm.c @@ -56,7 +56,7 @@ void libblis_test_randm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_randm_impl( iface_t iface, @@ -110,7 +110,7 @@ void libblis_test_randm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -149,8 +149,9 @@ void libblis_test_randm_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. // For randm(), we don't return a meaningful residual/diff, since we can't diff --git a/blis/testsuite/src/test_randv.c b/blis/testsuite/src/test_randv.c index b74100f..1990ff2 100644 --- a/blis/testsuite/src/test_randv.c +++ b/blis/testsuite/src/test_randv.c @@ -56,7 +56,7 @@ void libblis_test_randv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_randv_impl( iface_t iface, @@ -110,7 +110,7 @@ void libblis_test_randv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -149,8 +149,9 @@ void libblis_test_randv_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. // For randv(), we don't return a meaningful residual/diff, since we can't diff --git a/blis/testsuite/src/test_scal2m.c b/blis/testsuite/src/test_scal2m.c index 2816c8a..63d830b 100644 --- a/blis/testsuite/src/test_scal2m.c +++ b/blis/testsuite/src/test_scal2m.c @@ -56,7 +56,7 @@ void libblis_test_scal2m_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_scal2m_impl( iface_t iface, @@ -119,7 +119,7 @@ void libblis_test_scal2m_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -181,8 +181,9 @@ void libblis_test_scal2m_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_scal2m_check( &alpha, &x, &y, &y_save, resid ); diff --git a/blis/testsuite/src/test_scal2v.c b/blis/testsuite/src/test_scal2v.c index 184b500..06c3a43 100644 --- a/blis/testsuite/src/test_scal2v.c +++ b/blis/testsuite/src/test_scal2v.c @@ -56,7 +56,7 @@ void libblis_test_scal2v_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_scal2v_impl( iface_t iface, @@ -119,7 +119,7 @@ void libblis_test_scal2v_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -179,8 +179,9 @@ void libblis_test_scal2v_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_scal2v_check( &alpha, &x, &y, &y_save, resid ); diff --git a/blis/testsuite/src/test_scalm.c b/blis/testsuite/src/test_scalm.c index 1c08b87..d0ce2a1 100644 --- a/blis/testsuite/src/test_scalm.c +++ b/blis/testsuite/src/test_scalm.c @@ -56,7 +56,7 @@ void libblis_test_scalm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_scalm_impl( iface_t iface, @@ -115,7 +115,7 @@ void libblis_test_scalm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -175,8 +175,9 @@ void libblis_test_scalm_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 6.0; + perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 6.0; + perf->time = time_min; // Perform checks. libblis_test_scalm_check( &beta, &y, &y_save, resid ); diff --git a/blis/testsuite/src/test_scalv.c b/blis/testsuite/src/test_scalv.c index e4559e2..eceae19 100644 --- a/blis/testsuite/src/test_scalv.c +++ b/blis/testsuite/src/test_scalv.c @@ -56,7 +56,7 @@ void libblis_test_scalv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_scalv_impl( iface_t iface, @@ -116,7 +116,7 @@ void libblis_test_scalv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -230,13 +230,14 @@ void libblis_test_scalv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m ) *test_way/ time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y[0] ) ) *perf *= 6.0; + perf->gflops = ( 1.0 * m ) *test_way/ time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 6.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 6.0; + perf->gflops = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 6.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_setm.c b/blis/testsuite/src/test_setm.c index 316e8ed..6a71f37 100644 --- a/blis/testsuite/src/test_setm.c +++ b/blis/testsuite/src/test_setm.c @@ -56,7 +56,7 @@ void libblis_test_setm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_setm_impl( iface_t iface, @@ -112,7 +112,7 @@ void libblis_test_setm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -158,8 +158,9 @@ void libblis_test_setm_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. libblis_test_setm_check( &beta, &x, resid ); diff --git a/blis/testsuite/src/test_setv.c b/blis/testsuite/src/test_setv.c index dbf7023..e75d9b2 100644 --- a/blis/testsuite/src/test_setv.c +++ b/blis/testsuite/src/test_setv.c @@ -56,7 +56,7 @@ void libblis_test_setv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_setv_impl( iface_t iface, @@ -112,7 +112,7 @@ void libblis_test_setv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -156,8 +156,9 @@ void libblis_test_setv_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. libblis_test_setv_check( &beta, &x, resid ); diff --git a/blis/testsuite/src/test_subm.c b/blis/testsuite/src/test_subm.c index eca95e0..b662bbf 100644 --- a/blis/testsuite/src/test_subm.c +++ b/blis/testsuite/src/test_subm.c @@ -56,7 +56,7 @@ void libblis_test_subm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_subm_impl( iface_t iface, @@ -115,7 +115,7 @@ void libblis_test_subm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { double time_min = 1e9; @@ -168,8 +168,9 @@ void libblis_test_subm_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. libblis_test_subm_check( &alpha, &beta, &x, &y, resid ); diff --git a/blis/testsuite/src/test_subv.c b/blis/testsuite/src/test_subv.c index 2b7f8c3..633e6e7 100644 --- a/blis/testsuite/src/test_subv.c +++ b/blis/testsuite/src/test_subv.c @@ -56,7 +56,7 @@ void libblis_test_subv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_subv_impl( iface_t iface, @@ -115,7 +115,7 @@ void libblis_test_subv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { double time_min = 1e9; @@ -165,8 +165,9 @@ void libblis_test_subv_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 2.0; + perf->gflops = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 2.0; + perf->time = time_min; // Perform checks. libblis_test_subv_check( &alpha, &beta, &x, &y, resid ); diff --git a/blis/testsuite/src/test_symm.c b/blis/testsuite/src/test_symm.c index 165e64e..04e6dc7 100644 --- a/blis/testsuite/src/test_symm.c +++ b/blis/testsuite/src/test_symm.c @@ -56,7 +56,7 @@ void libblis_test_symm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_symm_impl( iface_t iface, @@ -130,7 +130,7 @@ void libblis_test_symm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -281,13 +281,15 @@ void libblis_test_symm_experiment( test_params_t* params, // Estimate the performance of the best experiment repeat. #ifdef BLIS_ENABLE_MULTITHREAD_TEST - *perf = ( 2.0 * mn_side * m * n ) *test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * mn_side * m * n ) *test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0; #else - *perf = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; + // Perform checks. #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_symv.c b/blis/testsuite/src/test_symv.c index 89ec05d..b2798a7 100644 --- a/blis/testsuite/src/test_symv.c +++ b/blis/testsuite/src/test_symv.c @@ -56,7 +56,7 @@ void libblis_test_symv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_symv_impl( iface_t iface, @@ -125,7 +125,7 @@ void libblis_test_symv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -283,13 +283,14 @@ void libblis_test_symv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_syr.c b/blis/testsuite/src/test_syr.c index 27b4c09..782dada 100644 --- a/blis/testsuite/src/test_syr.c +++ b/blis/testsuite/src/test_syr.c @@ -56,7 +56,7 @@ void libblis_test_syr_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_syr_impl( iface_t iface, @@ -122,7 +122,7 @@ void libblis_test_syr_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -258,13 +258,14 @@ void libblis_test_syr_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_syr2.c b/blis/testsuite/src/test_syr2.c index 5488bb4..45c2c79 100644 --- a/blis/testsuite/src/test_syr2.c +++ b/blis/testsuite/src/test_syr2.c @@ -56,7 +56,7 @@ void libblis_test_syr2_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_syr2_impl( iface_t iface, @@ -124,7 +124,7 @@ void libblis_test_syr2_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -266,13 +266,14 @@ void libblis_test_syr2_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( a ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_syr2k.c b/blis/testsuite/src/test_syr2k.c index d590031..c0a0201 100644 --- a/blis/testsuite/src/test_syr2k.c +++ b/blis/testsuite/src/test_syr2k.c @@ -56,7 +56,7 @@ void libblis_test_syr2k_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_syr2k_impl( iface_t iface, @@ -128,7 +128,7 @@ void libblis_test_syr2k_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -296,13 +296,15 @@ void libblis_test_syr2k_experiment( test_params_t* params, // Estimate the performance of the best experiment repeat. #ifdef BLIS_ENABLE_MULTITHREAD_TEST - *perf = ( 2.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0; #else - *perf = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 2.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; + #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main for(i = 0; i < test_way; i++) diff --git a/blis/testsuite/src/test_syrk.c b/blis/testsuite/src/test_syrk.c index 8e072ce..64473fd 100644 --- a/blis/testsuite/src/test_syrk.c +++ b/blis/testsuite/src/test_syrk.c @@ -56,7 +56,7 @@ void libblis_test_syrk_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_syrk_impl( iface_t iface, @@ -126,7 +126,7 @@ void libblis_test_syrk_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -287,14 +287,15 @@ void libblis_test_syrk_experiment( test_params_t* params, #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m * k ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; // Perform checks. #ifdef BLIS_ENABLE_MULTITHREAD_TEST diff --git a/blis/testsuite/src/test_trmm.c b/blis/testsuite/src/test_trmm.c index 8e59ac8..1c294e1 100644 --- a/blis/testsuite/src/test_trmm.c +++ b/blis/testsuite/src/test_trmm.c @@ -56,7 +56,7 @@ void libblis_test_trmm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_trmm_impl( iface_t iface, @@ -126,7 +126,7 @@ void libblis_test_trmm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -279,14 +279,16 @@ void libblis_test_trmm_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( b[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( b[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( b ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( b ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; + #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main for(i = 0; i < test_way; i++) diff --git a/blis/testsuite/src/test_trmm3.c b/blis/testsuite/src/test_trmm3.c index a36069b..446ebcf 100644 --- a/blis/testsuite/src/test_trmm3.c +++ b/blis/testsuite/src/test_trmm3.c @@ -56,7 +56,7 @@ void libblis_test_trmm3_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_trmm3_impl( iface_t iface, @@ -130,7 +130,7 @@ void libblis_test_trmm3_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -227,8 +227,9 @@ void libblis_test_trmm3_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( c ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_trmm3_check( side, &alpha, &a, &b, &beta, &c, &c_save, resid ); diff --git a/blis/testsuite/src/test_trmv.c b/blis/testsuite/src/test_trmv.c index 4147a16..b8fb0f0 100644 --- a/blis/testsuite/src/test_trmv.c +++ b/blis/testsuite/src/test_trmv.c @@ -56,7 +56,7 @@ void libblis_test_trmv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_trmv_impl( iface_t iface, @@ -121,7 +121,7 @@ void libblis_test_trmv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -269,13 +269,14 @@ void libblis_test_trmv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/blis/testsuite/src/test_trsm.c b/blis/testsuite/src/test_trsm.c index 75cb9cc..9658563 100644 --- a/blis/testsuite/src/test_trsm.c +++ b/blis/testsuite/src/test_trsm.c @@ -56,7 +56,7 @@ void libblis_test_trsm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_trsm_impl( iface_t iface, @@ -126,7 +126,7 @@ void libblis_test_trsm_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -277,14 +277,16 @@ void libblis_test_trsm_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( b[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * mn_side * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( b[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( b ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * mn_side * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( b ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; + #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main for(i = 0; i < test_way; i++) diff --git a/blis/testsuite/src/test_trsm_ukr.c b/blis/testsuite/src/test_trsm_ukr.c index 8d12d44..0740bfc 100644 --- a/blis/testsuite/src/test_trsm_ukr.c +++ b/blis/testsuite/src/test_trsm_ukr.c @@ -56,7 +56,7 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_trsm_ukr_impl( iface_t iface, @@ -129,7 +129,7 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -241,8 +241,9 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, } // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( b ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( b ) ) perf->gflops *= 4.0; + perf->time = time_min; // Perform checks. libblis_test_trsm_ukr_check( side, &a, &c, &b, resid ); diff --git a/blis/testsuite/src/test_trsv.c b/blis/testsuite/src/test_trsv.c index ccd1a79..f2e4371 100644 --- a/blis/testsuite/src/test_trsv.c +++ b/blis/testsuite/src/test_trsv.c @@ -56,7 +56,7 @@ void libblis_test_trsv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ); void libblis_test_trsv_impl( iface_t iface, @@ -121,7 +121,7 @@ void libblis_test_trsv_experiment( test_params_t* params, char* pc_str, char* sc_str, unsigned int p_cur, - double* perf, + perf_t* perf, double* resid ) { unsigned int n_repeats = params->n_repeats; @@ -264,13 +264,14 @@ void libblis_test_trsv_experiment( test_params_t* params, } #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x[0] ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x[0] ) ) perf->gflops *= 4.0; #else // Estimate the performance of the best experiment repeat. - *perf = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; - if ( bli_obj_is_complex( x ) ) *perf *= 4.0; + perf->gflops = ( 1.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( x ) ) perf->gflops *= 4.0; #endif + perf->time = time_min; #ifdef BLIS_ENABLE_MULTITHREAD_TEST // Check output of each thread, and send max residue to main diff --git a/docs/doxygen/doxycfg.txt b/docs/doxygen/doxycfg.txt new file mode 100644 index 0000000..74a10ee --- /dev/null +++ b/docs/doxygen/doxycfg.txt @@ -0,0 +1,1781 @@ +# Doxyfile 1.7.6.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project. +# +# All text after a hash (#) is considered a comment and will be ignored. +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" "). + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or sequence of words) that should +# identify the project. Note that if you do not use Doxywizard you need +# to put quotes around the project name if it contains spaces. + +PROJECT_NAME = "LINALG " + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "Linear Algebra Library" + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = ./docs/doxygen + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = YES + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding +# "class=itcl::class" will allow you to use the command class in the +# itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and +# unions with only public data fields will be shown inline in the documentation +# of the scope in which they are defined (i.e. file, namespace, or group +# documentation), provided this scope is documented. If set to NO (the default), +# structs, classes, and unions are shown on a separate page (for HTML and Man +# pages) or section (for LaTeX and RTF). + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penalty. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will roughly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +SYMBOL_CACHE_SIZE = 0 + +# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be +# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given +# their name and scope. Since this can be an expensive process and often the +# same symbol appear multiple times in the code, doxygen keeps a cache of +# pre-resolved symbols. If the cache is too small doxygen will become slower. +# If the cache is too large, memory is wasted. The cache size is given by this +# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = NO + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. The create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files +# containing the references data. This must be a list of .bib files. The +# .bib extension is automatically appended if omitted. Using this command +# requires the bibtex tool to be installed. See also +# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style +# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this +# feature you need bibtex and perl available in the search path. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = ./ticblas ./docs/doxygen + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = *.c *.h *.dox + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = ./docs/doxygen/images + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is advised to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when +# changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# style sheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the style sheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) +# at top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. Since the tabs have the same information as the +# navigation tree you can set this option to NO if you already set +# GENERATE_TREEVIEW to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. +# Since the tree basically has the same information as the tab index you +# could consider to set DISABLE_INDEX to NO when enabling this option. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = NO + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to the +# mathjax.org site, so you can quickly see the result without installing +# MathJax, but it is strongly recommended to install a local copy of MathJax +# before deployment. + +MATHJAX_RELPATH = http://www.mathjax.org/mathjax + +# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension +# names that should be enabled during MathJax rendering. + +MATHJAX_EXTENSIONS = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvantages are that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See +# http://en.wikipedia.org/wiki/BibTeX for more info. + +LATEX_BIB_STYLE = plain + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load style sheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will use the Helvetica font for all dot files that +# doxygen generates. When you want a differently looking font you can specify +# the font name using DOT_FONTNAME. You need to make sure dot is able to find +# the font, which can be done by putting it in a standard location or by setting +# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the Helvetica font. +# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to +# set the path where dot can find it. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. If you choose svg you need to set +# HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible in IE 9+ (other browsers do not have this requirement). + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# Note that this requires a modern browser other than Internet Explorer. +# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you +# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible. Older versions of IE do not have SVG support. + +INTERACTIVE_SVG = NO + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/docs/doxygen/mainpage.dox b/docs/doxygen/mainpage.dox new file mode 100644 index 0000000..638242e --- /dev/null +++ b/docs/doxygen/mainpage.dox @@ -0,0 +1,25 @@ +/* + * This is a little header file which doxygen parses to generate the main + * documentation page + */ + +/*! \mainpage LINALG User's Guide + * + * @section introduction Introduction + * LINALG is TI's Linear Algebra Library, supporting two types of TI platforms: + * - ARM+DSP platforms such as AM57xx and Keystone II devices + * - DSP-only platforms such as Keystone I devices + * + * LINALG includes BLAS and LAPACK: + * - BLAS is based on BLIS (https://github.com/flame/blis) and available on both ARM+DSP and DSP-only plaforms. + * - LAPACK is based on CLAPACK 3.2.1 (http://www.netlib.org/clapack/) and available only on ARM+DSP platforms. + * + * @section linalgapi LINALG API + * For ARM+DSP library, LINALG API is the standard CBLAS and CLAPACK API, accessible through ARM (host). + * For DSP-only library, LINALG API includes the standard CBLAS API as well as TI CBLAS API extension. + * + * - CBLAS API: http://www.netlib.org/blas/#_cblas + * - CLAPACK API: http://www.netlib.org/clapack/ + * - @ref ti_cblas_api + * + */ diff --git a/docs/linalg_user_guide.html b/docs/linalg_user_guide.html new file mode 120000 index 0000000..2fbd5ff --- /dev/null +++ b/docs/linalg_user_guide.html @@ -0,0 +1 @@ +doxygen/html/index.html \ No newline at end of file diff --git a/examples/make.inc b/examples/make.inc index d1a70af..e4d7634 100644 --- a/examples/make.inc +++ b/examples/make.inc @@ -20,7 +20,8 @@ CFLAGS = -g -O2 -I$(TARGET_ROOTDIR)/usr/include -I$(LINALG_DIR)/include LIB_DIR = $(LINALG_DIR)/lib/ LD_FLAGS=-L$(TARGET_ROOTDIR)/lib -L$(TARGET_ROOTDIR)/usr/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/usr/lib -BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread +#BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread +BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread LAPACKLIB = $(LIB_DIR)libcblaswr.a $(LIB_DIR)liblapack.a $(LIB_DIR)libf2c.a $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread %.o: %.c diff --git a/examples/matmpy/main.c b/examples/matmpy/main.c index 94f6558..3063ec8 100644 --- a/examples/matmpy/main.c +++ b/examples/matmpy/main.c @@ -84,7 +84,7 @@ int main() int t; double checksum; char *ti_cblas_offload_env; - int numtests = 1; + int numtests = 10; /* configuration */ m = k = n = 1000; diff --git a/ticblas/src/ticblas.c b/ticblas/src/ticblas.c index 5543557..55dfe2e 100644 --- a/ticblas/src/ticblas.c +++ b/ticblas/src/ticblas.c @@ -121,14 +121,14 @@ int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size, ||((mem_medium_base == NULL) || (mem_medium_size < BLAS_MEM_SIZE_MEDIUM) ) ||((mem_slow_base == NULL) || (mem_slow_size < BLAS_MEM_SIZE_SLOW) ) ) { - return(TICBLAS_INIT_ERROR); - } - else { + return(TICBLAS_ERROR); + } + else { lib_smem_vinit(blas_mem_handle, mem_vfast_base, mem_vfast_size); lib_smem_finit(blas_mem_handle, mem_fast_base, mem_fast_size); lib_smem_minit(blas_mem_handle, mem_medium_base, mem_medium_size); lib_smem_sinit(blas_mem_handle, mem_slow_base, mem_slow_size); - + pool_mk_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE); pool_kn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE); pool_mn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE); @@ -141,36 +141,42 @@ int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size, pool_kn_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE); pool_mn_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE); - if( (pool_mk_mem_L1 == NULL) - ||(pool_kn_mem_L1 == NULL) - ||(pool_mn_mem_L1 == NULL) - ||(pool_mk_mem_L2 == NULL) - ||(pool_kn_mem_L2 == NULL) - ||(pool_mn_mem_L2 == NULL) - ||(pool_mk_mem_L3 == NULL) - ||(pool_kn_mem_L3 == NULL) + if( (pool_mk_mem_L1 == NULL) + ||(pool_kn_mem_L1 == NULL) + ||(pool_mn_mem_L1 == NULL) + ||(pool_mk_mem_L2 == NULL) + ||(pool_kn_mem_L2 == NULL) + ||(pool_mn_mem_L2 == NULL) + ||(pool_mk_mem_L3 == NULL) + ||(pool_kn_mem_L3 == NULL) ||(pool_mn_mem_L3 == NULL) ) { - return(TICBLAS_INIT_ERROR); - } - else { - bli_mem_init(); + return(TICBLAS_ERROR); + } + else { + bli_mem_init(); return(TICBLAS_SUCCESS); - } + } } } /* tiCblasInit */ int tiCblasNew() { - bli_init(); - - return(TICBLAS_SUCCESS); + if(bli_init() == BLIS_SUCCESS) { + return(TICBLAS_SUCCESS); + } + else { + return(TICBLAS_ERROR); + } } int tiCblasDelete() { - bli_finalize(); - - return(TICBLAS_SUCCESS); + if(bli_finalize() == BLIS_SUCCESS) { + return(TICBLAS_SUCCESS); + } + else { + return(TICBLAS_ERROR); + } } /* Nothing after this line */ diff --git a/ticblas/ticblas.h b/ticblas/ticblas.h index 2dff96d..611118d 100644 --- a/ticblas/ticblas.h +++ b/ticblas/ticblas.h @@ -25,20 +25,84 @@ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ -#ifndef TICBLAS_H -#define TICBLAS_H +#ifndef _TICBLAS_H +#define _TICBLAS_H #include -#define TICBLAS_SUCCESS (0) -#define TICBLAS_INIT_ERROR (-1) +/** @defgroup ti_cblas_api CBLAS API Extension for TI-DSP + * @{ + */ +/** @} */ +/** @addtogroup ti_cblas_api + * @{ + * @name Error Return Codes + */ +/*@{*/ +#define TICBLAS_SUCCESS (0) /**< Success. No error. */ +#define TICBLAS_ERROR (-1) /**< Failure. */ +/*@}*/ +/** @} */ + +/** + * @ingroup ti_cblas_api + * @brief Function tiCblasGetSizes() returns the required size of each of the + * memory types defined by the Library Architecture and Framework + * (LibArch) + * + * @param[out] smem_size_vfast size of very fast shared memory + * @param[out] smem_size_fast size of fast shared memory + * @param[out] smem_size_medium size of medium speed shared memory + * @param[out] smem_size_slow size of slow shared memory + * + */ void tiCblasGetSizes(size_t *smem_size_vfast, size_t *smem_size_fast, size_t *smem_size_medium, size_t *smem_size_slow); +/** + * @ingroup ti_cblas_api + * @brief Function tiCblasNew() creates an instance for CBLAS. + * + * @remarks tiCblasNew() MUST be called before tiCblasInit(). + * + * @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS + * @retval TICBLAS_ERROR @copydoc TICBLAS_ERROR + */ +int tiCblasNew(); + +/** + * @ingroup ti_cblas_api + * @brief Function tiCblasInit() performs heap initialization for CBLAS + * to do memory allocations. + * + * @remarks tiCblasInit() must NOT be called before tiCblasNew(). + * + * @param[in] mem_vfast_base base of very fast shared memory + * @param[in] mem_vfast_size size of very fast shared memory + * @param[in] mem_fast_base base of fast shared memory + * @param[in] mem_fast_size size of fast shared memory + * @param[in] mem_medium_base base of medium speed shared memory + * @param[in] mem_medium_size size of medium speed shared memory + * @param[in] mem_slow_base base of slow shared memory + * @param[in] mem_slow_size size of slow shared memory + * + * @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS + * @retval TICBLAS_ERROR @copydoc TICBLAS_ERROR + */ int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size, void * mem_fast_base, size_t mem_fast_size, void * mem_medium_base, size_t mem_medium_size, void * mem_slow_base, size_t mem_slow_size); -#endif /* TICBLAS_H */ +/** + * @ingroup ti_cblas_api + * @brief Function tiCblasDelete() deletes the instance of CBLAS created by + * tiCblasNew(). + * + * @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS + * @retval TICBLAS_ERROR @copydoc TICBLAS_ERROR + */ +int tiCblasDelete(); + +#endif /* _TICBLAS_H */