summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 0833183)
raw | patch | inline | side by side (parent: 0833183)
author | Jianzhong Xu <xuj@ti.com> | |
Tue, 14 Jun 2016 17:44:05 +0000 (17:44 +0000) | ||
committer | Jianzhong Xu <xuj@ti.com> | |
Tue, 14 Jun 2016 17:44:05 +0000 (17:44 +0000) |
examples/dsponly/dgemm_test/dgemm_test.c | patch | blob | history | |
src/ti/linalg/ticblas/src/ticblas.c | patch | blob | history | |
src/ti/linalg/ticblas/ticblas.h | patch | blob | history |
index a7d5f057e384e09851d97e5004c52febfb459639..b7ae762072ac9fa4269c3265682f87774f7b10f6 100644 (file)
*****************************************************************************/
/******************************************************************************
* FILE: dgemm_test.c
+* Purpose: to test BLAS function DGEMM.
+*
+* DGEMM performs one of the matrix-matrix operations
+*
+* C := alpha*op( A )*op( B ) + beta*C,
+*
+* where op( X ) is one of
+*
+* op( X ) = X or op( X ) = X**T,
+*
+* alpha and beta are scalars, and A, B and C are matrices, with op(A)
+* an m by k matrix, op(B) a k by n matrix and C an m by n matrix.
******************************************************************************/
-#include <omp.h>
-#include <string.h>
+#include <string.h>
#include <stdio.h>
-#include <ti/libarch/libarch.h>
-#include <ti/linalg/ticblas.h>
-#include <ti/linalg/cblas.h>
-
-#define FLOPS_PER_UNIT_PERF 1e9
+#include <omp.h> /* OpenMP header */
+#include <ti/libarch/libarch.h> /* Library Architecture header */
+#include <ti/linalg/ticblas.h> /* TI CBLAS API extension header */
+#include <ti/linalg/cblas.h> /* Standard CBLAS header */
extern void cleanup_after_ticblas();
extern void prepare_for_ticblas();
int main (int argc, char *argv[])
{
- double *A, *B, *C, *C_copy;
- int m, n, k;
- double alpha, beta, precision_diff, time, time_diff, gflops;
-
- int nthreads, tid;
+ double *A, *B, *C, *C_copy; /* matrices */
+ int m, n, k; /* matrix dimensions */
+ double alpha, beta; /* scalars for matrix multiplication */
+ double precision_diff; /* precision difference between TI DGEMM implementation
+ and a reference implementation */
+ double time, time_diff; /* to compute time spent on DGEMM */
+ double gflops; /* convert time to GFLOPS */
+ int nthreads, tid; /* number of OpenMp threads and thread id */
/* Verify OpenMP working properly */
#pragma omp parallel private(nthreads, tid)
A = (double *)malloc( m*k*sizeof( double ) );
B = (double *)malloc( k*n*sizeof( double ) );
C = (double *)malloc( m*n*sizeof( double ) );
-
-// printf("Matrix A, B, C address: 0x%x, 0x%x, 0x%x\n", (unsigned int)A, (unsigned int)B, (unsigned int)C);
C_copy = (double *)malloc( m*n*sizeof( double ) );
if (A == NULL || B == NULL || C == NULL || C_copy == NULL) {
printf( "\nERROR: Can't allocate memory for matrices. Aborting... \n\n");
/* Generate matrices */
matrix_gen(A, B, C, m, k, n);
- memcpy(C_copy, C, m*n*sizeof(double));
+ memcpy(C_copy, C, m*n*sizeof(double));
- /* Call standard CBLAS API for dgemm */
+ /* Call standard CBLAS API to do matrix multiplication */
time = omp_get_wtime();
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, k, B, n, beta, C, n);
time_diff = omp_get_wtime() - time;
- gflops = ( 2.0 * m * n * k ) / time_diff / FLOPS_PER_UNIT_PERF;
+ gflops = ( 2.0 * m * n * k ) / time_diff / 1e9;
printf("DGEMM time for (m,n,k) = (%d,%d,%d) is %e, GFLOPS is %e.\n", m,n,k, time_diff, gflops);
/* Straightforward matrix multiplication as reference */
index d7795d51b6b0c7286f3e74241f945d228c993402..cc1c3eb7835e28d9f6d61515062e28aeb2fc90fa 100644 (file)
*===========================================================================*/\r
\r
/* Define memory descriptors for memory management */\r
-lib_memdscr_t blas_mem_vfast;\r
-lib_memdscr_t blas_mem_fast;\r
-lib_memdscr_t blas_mem_medium;\r
-lib_memdscr_t blas_mem_slow;\r
+lib_memdscr_t blas_mem_vfast; /* memory descriptor for very fast scratch memory */\r
+lib_memdscr_t blas_mem_fast; /* memory descriptor for fast scratch memory */\r
+lib_memdscr_t blas_mem_medium; /* memory descriptor for medium speed scratch memory */\r
+lib_memdscr_t blas_mem_slow; /* memory descriptor for slow scratch memory */\r
\r
-/* Define a memory descriptor array */\r
+/* Define an array of memory descriptors by BLAS */\r
lib_memdscr_t * blas_memdscr_tab[LIB_MEMTYPE_N] = {\r
&blas_mem_vfast,\r
&blas_mem_fast,\r
/*==============================================================================\r
* This internal function returns the address of the memory descriptor array.\r
*============================================================================*/\r
-void * blasGetMemHandle()\r
+void * blasGetMemHandle(void)\r
{\r
return((void *)&blas_memdscr_tab[0]);\r
} /* blasGetMemHandle */\r
bli_get_mem_sizes(&mem_vfast_size_req, &mem_fast_size_req, \r
&mem_medium_size_req, &mem_slow_size_req);\r
\r
- /* Verify supplied memories meet requirements */ \r
+ /* Verify supplied memory meet requirements */ \r
if( ( (mem_vfast_base == NULL) || (mem_vfast_size < mem_vfast_size_req) )\r
||( (mem_fast_base == NULL) || (mem_fast_size < mem_fast_size_req) )\r
||( (mem_medium_base == NULL) || (mem_medium_size < mem_medium_size_req) )\r
/*==============================================================================\r
* External API. Refer to ticblas.h for detailed documentation.\r
*============================================================================*/\r
-int tiCblasNew()\r
+int tiCblasNew(void)\r
{\r
if(bli_init() == BLIS_SUCCESS) {\r
return(TICBLAS_SUCCESS);\r
/*==============================================================================\r
* External API. Refer to ticblas.h for detailed documentation.\r
*============================================================================*/\r
-int tiCblasDelete()\r
+int tiCblasDelete(void)\r
{\r
if(bli_finalize() == BLIS_SUCCESS) {\r
return(TICBLAS_SUCCESS);\r
index 813da186ada12acc6d3b9f1525fe183c1e5f63ee..3a9aa503b18f6c88d4188f0f73464c92b2ad2efd 100644 (file)
* the memory types defined by the Library Architecture and \r
* Framework (LibArch).\r
*\r
- * @remarks Memory types are categorized into scratch and permanent memories:\r
+ * @remarks Memory types are categorized into scratch and permanent memory:\r
* - Scratch memory content doesn't need to be stored from call to call;\r
* - Permanent memory content must be stored across multiple calls.\r
* For example, if an application uses library A and library B, the \r
* scratch memory of library A and library B can be shared/overlapped,\r
* but permanent memory may not be shared/overlapped. \r
*\r
- * Scratch and permanent memories are each categorized into four types\r
+ * Scratch and permanent memory are each categorized into four types\r
* based on speed:\r
* - very fast memory, e.g. L1D;\r
* - fast memory, e.g. L2;\r
* - medium speed memory, e.g. L3/MSMC;\r
* - slow memory, e.g. DDR.\r
*\r
- * @param[out] smem_size_vfast size of very fast shared memory \r
- * @param[out] smem_size_fast size of fast shared memory \r
- * @param[out] smem_size_medium size of medium speed shared memory \r
- * @param[out] smem_size_slow size of slow shared memory \r
+ * @param[out] smem_size_vfast size of very fast scratch memory \r
+ * @param[out] smem_size_fast size of fast scratch memory \r
+ * @param[out] smem_size_medium size of medium speed scratch memory \r
+ * @param[out] smem_size_slow size of slow scratch memory \r
*\r
*/\r
void tiCblasGetSizes(size_t *smem_size_vfast, size_t *smem_size_fast, \r
* @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS\r
* @retval TICBLAS_ERROR_NEW @copydoc TICBLAS_ERROR_NEW\r
*/\r
-int tiCblasNew();\r
+int tiCblasNew(void);\r
\r
/**\r
* @ingroup ti_cblas_api\r
* @brief Function tiCblasInit() performs heap initialization for TI CBLAS \r
* to do dynamic memory allocations. \r
*\r
- * @remarks Users must allocate memories according to the requirements \r
+ * @remarks Users must allocate memory according to the requirements \r
* given by tiCblasGetSizes() and pass the bases and sizes to this\r
* function. \r
*\r
* needs to be called every time a level 3 CBLAS function is called\r
* on DSP. \r
*\r
- * @param[in] mem_vfast_base base of very fast shared memory \r
- * @param[in] mem_vfast_size size of very fast shared memory \r
- * @param[in] mem_fast_base base of fast shared memory \r
- * @param[in] mem_fast_size size of fast shared memory \r
- * @param[in] mem_medium_base base of medium speed shared memory \r
- * @param[in] mem_medium_size size of medium speed shared memory \r
- * @param[in] mem_slow_base base of slow shared memory \r
- * @param[in] mem_slow_size size of slow shared memory \r
+ * @param[in] smem_vfast_base base of very fast scratch memory \r
+ * @param[in] smem_vfast_size size of very fast scratch memory \r
+ * @param[in] smem_fast_base base of fast scratch memory \r
+ * @param[in] smem_fast_size size of fast scratch memory \r
+ * @param[in] smem_medium_base base of medium speed scratch memory \r
+ * @param[in] smem_medium_size size of medium speed scratch memory \r
+ * @param[in] smem_slow_base base of slow scratch memory \r
+ * @param[in] smem_slow_size size of slow scratch memory \r
*\r
* @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS\r
* @retval TICBLAS_ERROR_NOMEM @copydoc TICBLAS_ERROR_NOMEM\r
* @retval TICBLAS_ERROR_MEMALLOC @copydoc TICBLAS_ERROR_MEMALLOC\r
*/\r
-int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size,\r
- void * mem_fast_base, size_t mem_fast_size,\r
- void * mem_medium_base, size_t mem_medium_size,\r
- void * mem_slow_base, size_t mem_slow_size);\r
+int tiCblasInit(void * smem_vfast_base, size_t smem_vfast_size,\r
+ void * smem_fast_base, size_t smem_fast_size,\r
+ void * smem_medium_base, size_t smem_medium_size,\r
+ void * smem_slow_base, size_t smem_slow_size);\r
\r
/**\r
* @ingroup ti_cblas_api\r
- * @brief Function tiCblasDelete() deletes global structures and frees memories\r
- * of CBLAS created by tiCblasNew(). \r
+ * @brief Function tiCblasDelete() deletes global structures and frees \r
+ * memory of CBLAS created by tiCblasNew(). \r
*\r
* @retval TICBLAS_SUCCESS @copydoc TICBLAS_SUCCESS\r
* @retval TICBLAS_ERROR_DELETE @copydoc TICBLAS_ERROR_DELETE\r
*/\r
-int tiCblasDelete();\r
+int tiCblasDelete(void);\r
\r
#endif /* _TICBLAS_H */\r