1 /******************************************************************************
2 * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <math.h>
32 #include <time.h>
34 #include "cblas.h"
36 /* Number of elements in matrix to display */
38 /* For profiling */
39 #define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
40 #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
41 t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
42 #define fout stdout
43 #define REF_CHECKSUM (1925318033.9552)
45 double *A, *B, *C;
46 int m, n, k;
47 double alpha, beta;
48 struct timespec t0, t1;
49 double secs = 0.0;
51 static void report_flops(double secs, int m, int n, int k, int N)
52 {
53 fprintf(fout,"Total time for %d tests: %8.6fs, %5.3f Gflops\n",
54 N, secs, (float)N*2*m*n*k / (secs * 1e9));
55 }
57 double matrix_mult(void) {
58 int i,j;
59 for (i = 0; i < (m*k); i++) {
60 A[i] = (double)rand()/RAND_MAX;
61 }
63 for (i = 0; i < (k*n); i++) {
64 B[i] = (double)rand()/RAND_MAX;
65 }
67 for (i = 0; i < (m*n); i++) {
68 C[i] = 0.0;
69 }
71 tick();
72 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, k, B, n, beta, C, n);
73 secs += tock();
75 /* We do a simplistic checksum across a subset of the result matrix */
76 double checksum = 0.0;
77 for (i=0; i<m; i++)
78 for (j=0; j<n; j++)
79 checksum += C[j+i*n];
80 return checksum;
81 }
83 int main()
84 {
85 int t;
86 double checksum;
87 char *ti_cblas_offload_env;
88 int numtests = 10;
90 /* configuration */
91 m = k = n = 500;
92 alpha = 0.7;
93 beta = 1.3;
95 /* allocate the matrices */
96 A = (double *)__malloc_ddr( m*k*sizeof( double ) );
97 B = (double *)__malloc_ddr( k*n*sizeof( double ) );
98 C = (double *)__malloc_ddr( m*n*sizeof( double ) );
99 if (A == NULL || B == NULL || C == NULL) {
100 printf( "\nERROR: Can't allocate memory for matrices. Aborting... \n\n");
101 __free_ddr(A);
102 __free_ddr(B);
103 __free_ddr(C);
104 return 1;
105 }
107 srand(123456789);
109 /* Check the environment variable that controls offloading */
110 ti_cblas_offload_env = getenv("TI_CBLAS_OFFLOAD");
111 if(ti_cblas_offload_env == NULL) {
112 printf("Environment variable TI_CBLAS_OFFLOAD is not defined.");
113 printf("Use default offloading configuration:\n");
114 printf("\tBLAS level 1: running on ARM.\n");
115 printf("\tBLAS level 2: running on ARM.\n");
116 printf("\tBLAS level 3: running on ARM or DSP based on matrix sizes.\n");
117 }
118 else {
119 printf("TI_CBLAS_OFFLOAD is defined as %s\n", ti_cblas_offload_env);
120 }
122 printf ("A(%ix%i) X B(%ix%i) => C(%ix%i)\n", m, k, k, n, m, n);
124 printf("Warming caches (by doing a single matrix-multiply)..\n");
125 checksum = matrix_mult();
127 /* reset secs, so we can now begin the real timing */
128 secs = 0;
130 printf("Now doing %d tests after warming caches\n", numtests);
131 for (t=0; t<numtests; t++) {
132 checksum += matrix_mult();
133 }
134 report_flops(secs, m, n, k, numtests);
136 /* Check results. Note: REF_CHECKSUM depends on rand seed and matrix sizes. */
137 //if(abs(checksum-REF_CHECKSUM) < 0.0001) {
138 printf("Passed.\n");
139 //}
141 __free_ddr(A);
142 __free_ddr(B);
143 __free_ddr(C);
145 return 0;
146 }