1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <time.h>
6 #include "cblas.h"
8 /* Number of elements in matrix to display */
10 /* For profiling */
11 #define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
12 #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
13 t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
14 #define fout stdout
17 double *A, *B, *C;
18 int m, n, k;
19 double alpha, beta;
20 struct timespec t0, t1;
21 double secs = 0.0;
23 static void report_flops(double secs, int m, int n, int k, int N)
24 {
25 fprintf(fout,"Total time for %d tests: %8.6fs, %5.3f Mflops\n",
26 N, secs, (float)N*m*n*(2*k-1) / (secs * 1e6));
27 }
29 double matrix_mult(void) {
30 int i,j;
31 for (i = 0; i < (m*k); i++) {
32 A[i] = (double)rand()/RAND_MAX;
33 }
35 for (i = 0; i < (k*n); i++) {
36 B[i] = (double)rand()/RAND_MAX;
37 }
39 for (i = 0; i < (m*n); i++) {
40 C[i] = 0.0;
41 }
43 tick();
44 cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, k, B, n, beta, C, n);
45 secs += tock();
47 /* We do a simplistic checksum across a subset of the result matrix */
48 double checksum = 0.0;
49 for (i=0; i<m; i++)
50 for (j=0; j<n; j++)
51 checksum += C[j+i*n];
52 return checksum;
53 }
55 int main()
56 {
57 int t;
58 double checksum;
59 char *ti_cblas_offload_env;
60 int numtests = 10;
62 /* configuration */
63 m = k = n = 1000;
64 alpha = 0.7;
65 beta = 1.3;
67 /* allocate the matrices */
68 A = (double *)malloc( m*k*sizeof( double ) );
69 B = (double *)malloc( k*n*sizeof( double ) );
70 C = (double *)malloc( m*n*sizeof( double ) );
71 if (A == NULL || B == NULL || C == NULL) {
72 printf( "\nERROR: Can't allocate memory for matrices. Aborting... \n\n");
73 free(A);
74 free(B);
75 free(C);
76 return 1;
77 }
79 srand(123456789);
81 /* Force BLAS execution on ARM due to insufficient MSMC memory.
82 This will be removed later. */
83 putenv("TI_CBLAS_OFFLOAD=000");
85 /* Check the environment variable that controls offloading */
86 ti_cblas_offload_env = getenv("TI_CBLAS_OFFLOAD");
87 if(ti_cblas_offload_env == NULL) {
88 printf("TI_CBLAS_OFFLOAD is not defined.\n");
89 }
90 else {
91 printf("TI_CBLAS_OFFLOAD is defined as %s\n", ti_cblas_offload_env);
92 }
94 printf ("A(%ix%i) X B(%ix%i) => C(%ix%i)\n", m, k, k, n, m, n);
96 printf("Warming caches (by doing a single matrix-multiply)..\n");
97 checksum = matrix_mult();
99 /* reset secs, so we can now begin the real timing */
100 secs = 0;
102 printf("Now doing %d tests after warming caches\n", numtests);
103 for (t=0; t<numtests; t++)
104 checksum += matrix_mult();
105 report_flops(secs, m, n, k, numtests);
107 printf("Result CHECKSUM: %16.4f\n", checksum);
109 free(A);
110 free(B);
111 free(C);
113 return 0;
114 }