1 /******************************************************************************
2 * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <math.h>
32 #include <time.h>
34 #include "cblas.h"
36 /* Number of elements in matrix to display */
38 /* For profiling */
39 #define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
40 #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
41 t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
42 #define fout stdout
44 double *A, *B, *C;
45 int m, n, k;
46 double alpha, beta;
47 struct timespec t0, t1;
48 double secs = 0.0;
50 static void report_flops(double secs, int m, int n, int k, int N)
51 {
52 fprintf(fout,"Total time for %d tests: %8.6fs, %5.3f Gflops\n",
53 N, secs, (float)N*2*m*n*k / (secs * 1e9));
54 }
56 double matrix_mult(void) {
57 int i,j;
58 for (i = 0; i < (m*k); i++) {
59 A[i] = (double)rand()/RAND_MAX;
60 }
62 for (i = 0; i < (k*n); i++) {
63 B[i] = (double)rand()/RAND_MAX;
64 }
66 for (i = 0; i < (m*n); i++) {
67 C[i] = 0.0;
68 }
70 tick();
71 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, k, B, n, beta, C, n);
72 secs += tock();
74 /* We do a simplistic checksum across a subset of the result matrix */
75 double checksum = 0.0;
76 for (i=0; i<m; i++)
77 for (j=0; j<n; j++)
78 checksum += C[j+i*n];
79 return checksum;
80 }
82 int main()
83 {
84 int t;
85 double checksum;
86 char *ti_cblas_offload_env;
87 int numtests = 10;
89 /* configuration */
90 m = k = n = 1000;
91 alpha = 0.7;
92 beta = 1.3;
94 /* allocate the matrices */
95 A = (double *)__malloc_ddr( m*k*sizeof( double ) );
96 B = (double *)__malloc_ddr( k*n*sizeof( double ) );
97 C = (double *)__malloc_ddr( m*n*sizeof( double ) );
98 if (A == NULL || B == NULL || C == NULL) {
99 printf( "\nERROR: Can't allocate memory for matrices. Aborting... \n\n");
100 __free_ddr(A);
101 __free_ddr(B);
102 __free_ddr(C);
103 return 1;
104 }
106 srand(123456789);
108 /* Check the environment variable that controls offloading */
109 ti_cblas_offload_env = getenv("TI_CBLAS_OFFLOAD");
110 if(ti_cblas_offload_env == NULL) {
111 printf("Environment variable TI_CBLAS_OFFLOAD is not defined.");
112 printf("Use default offloading configuration:\n");
113 printf("\tBLAS level 1: running on ARM.\n");
114 printf("\tBLAS level 2: running on ARM.\n");
115 printf("\tBLAS level 3: running on ARM or DSP based on matrix sizes.\n");
116 }
117 else {
118 printf("TI_CBLAS_OFFLOAD is defined as %s\n", ti_cblas_offload_env);
119 }
121 printf ("A(%ix%i) X B(%ix%i) => C(%ix%i)\n", m, k, k, n, m, n);
123 printf("Warming caches (by doing a single matrix-multiply)..\n");
124 checksum = matrix_mult();
126 /* reset secs, so we can now begin the real timing */
127 secs = 0;
129 printf("Now doing %d tests after warming caches\n", numtests);
130 for (t=0; t<numtests; t++) {
131 checksum += matrix_mult();
132 }
133 report_flops(secs, m, n, k, numtests);
135 printf("Passed.\n");
137 __free_ddr(A);
138 __free_ddr(B);
139 __free_ddr(C);
141 return 0;
142 }