1 /******************************************************************************
2 * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <math.h>
31 #include <time.h>
33 #include "cblas.h"
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 #include "cblas.h"
38 #ifdef __cplusplus
39 }
40 #endif
42 #define TUNING_START_SIZE_RECTAN_MATRIX 64
43 #define NUM_MATRIX_SIZE_TO_BENCHMARK 4
44 #define HAS_MEMORY 1
45 #define NO_MEMORY 0
46 #define OFFLOAD 1
47 #define NO_OFFLOAD 0
49 #define NUM_TEST_RUN 1
52 /*-----------------------------------------------------------------------------
53 * Timing Setup
54 *----------------------------------------------------------------------------*/
55 struct timespec t0,t1;
56 #define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
57 #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
58 t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
60 /*-----------------------------------------------------------------------------
61 * Global Variables
62 *----------------------------------------------------------------------------*/
63 double alpha = 0.7;
64 double beta = 0.3;
65 enum CBLAS_ORDER order = CblasColMajor;
66 //enum CBLAS_ORDER order = CblasRowMajor;
67 enum CBLAS_TRANSPOSE transA = CblasNoTrans;
68 enum CBLAS_TRANSPOSE transB = CblasNoTrans;
70 extern int TI_CBLAS_L3_OFFLOAD;
71 /*-----------------------------------------------------------------------------
72 * Prototypes
73 *----------------------------------------------------------------------------*/
74 int run_dgemm(int M, int N, int K, float *time, float *gflops);
76 /*-----------------------------------------------------------------------------
77 * MAIN
78 *----------------------------------------------------------------------------*/
79 int main()
80 {
81 int num_size, dgemm_err;
82 int M, N, K, m, n, k;
83 int M_pre, N_pre, K_pre, M_start_size, N_start_size;
84 float time_secs_arm, gflops_arm, time_secs_dsp, gflops_dsp, time_secs_opt, gflops_opt;
85 FILE *fp_time, *fp_flops;
87 fp_time = fopen("dgemm_time.dat","w");
88 fp_flops = fopen("dgemm_flops.dat","w");
90 srand(12345);
92 /* setting up TI CBLAS during first call */
93 run_dgemm(100, 100, 100, &time_secs_arm, &gflops_arm);
95 /* sweep M, K, and N */
96 for (M=TUNING_START_SIZE_RECTAN_MATRIX,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2)
97 {
98 for (N=TUNING_START_SIZE_RECTAN_MATRIX,n=0; n<NUM_MATRIX_SIZE_TO_BENCHMARK; n++,N*=2)
99 {
100 for (K=TUNING_START_SIZE_RECTAN_MATRIX,k=0; k<NUM_MATRIX_SIZE_TO_BENCHMARK; k++,K*=2)
101 {
102 printf("Running DGEMM for (M,N,K) = (%d,%d,%d).\n", M,N,K);
104 TI_CBLAS_L3_OFFLOAD = 0;
105 dgemm_err = run_dgemm(M, N, K, &time_secs_arm, &gflops_arm);
107 if(dgemm_err == -1) { /* out of memory for DSP offloading */
108 printf("Out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
109 }
110 else {
111 TI_CBLAS_L3_OFFLOAD = 1;
112 dgemm_err = run_dgemm(M, N, K, &time_secs_dsp, &gflops_dsp);
114 TI_CBLAS_L3_OFFLOAD = 2;
115 dgemm_err = run_dgemm(M, N, K, &time_secs_opt, &gflops_opt);
117 fprintf(fp_time, "%6d\t%6d\t%6d\t%10.8e\t%10.8e\t%10.8e\n",
118 M, N, K, time_secs_arm, time_secs_dsp, time_secs_opt);
119 fprintf(fp_flops, "%6d\t%6d\t%6d\t%10.8e\t%10.8e\t%10.8e\n",
120 M, N, K, gflops_arm, gflops_dsp, gflops_opt);
121 }
122 }
123 }
124 }
126 fclose(fp_time);
127 fclose(fp_flops);
129 return 0;
130 }
133 int run_dgemm(int M, int N, int K, float *time, float *gflops)
134 {
135 int iter;
136 long long i;
137 double time_secs, total_time;
138 double operation_count = 2.0*(double)M*(double)N*(double)K;
139 double total_GFLOPS = 0.0f;
140 int err_code = 0;
142 total_time = 0.0;
143 for (iter = 0; iter < NUM_TEST_RUN; iter++)
144 {
145 /*-------------------------------------------------------------------------
146 * Allocate space for the matrices.
147 *------------------------------------------------------------------------*/
148 double *A = (double *) __malloc_ddr((long long)M*(long long)K*(long long)sizeof(double));
149 double *B = (double *) __malloc_ddr((long long)K*(long long)N*(long long)sizeof(double));
150 double *C = (double *) __malloc_ddr((long long)M*(long long)N*(long long)sizeof(double));
152 if (!A || !B || !C)
153 {
154 printf("Could not allocate enough space for the arrays!");
155 if(A) __free_ddr(A);
156 if(B) __free_ddr(B);
157 if(C) __free_ddr(C);
159 return (-1);
160 }
162 /*-------------------------------------------------------------------------
163 * Initialize matrices
164 *------------------------------------------------------------------------*/
165 for (i = 0; i < (long long)M*K; ++i) A[i] = (double)rand()/RAND_MAX;// (double)(rand() % 5 + 1);
166 for (i = 0; i < (long long)K*N; ++i) B[i] = (double)rand()/RAND_MAX;// (double)(rand() % 5 + 1);
167 for (i = 0; i < (long long)M*N; ++i) C[i] = 0;
169 int lda = ((order == CblasColMajor && transA == CblasNoTrans) ||
170 (order == CblasRowMajor && transA == CblasTrans)) ? M : K;
172 int ldb = ((order == CblasColMajor && transB == CblasNoTrans) ||
173 (order == CblasRowMajor && transB == CblasTrans)) ? K : N;
175 int ldc = (order == CblasColMajor) ? M : N;
177 fflush(stdout);
179 /*------------------------------------------------------------------------
180 * Run and time dgemm
181 *-----------------------------------------------------------------------*/
182 tick();
183 cblas_dgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
184 time_secs = tock();
185 total_time += time_secs;
186 total_GFLOPS += operation_count/time_secs*1e-9;
187 /*
188 if(M==4096 && K==256 && N==16) {
189 FILE *file_a = fopen("mat_a.dat","w");
190 FILE *file_b = fopen("mat_b.dat","w");
191 FILE *file_c = fopen("mat_c.dat","w");
193 for(i=0; i < M*K; ++i) fprintf(file_a, "%1.10e\n",A[i]);
194 for(i=0; i < K*N; ++i) fprintf(file_b, "%1.10e\n",B[i]);
195 for(i=0; i < M*N; ++i) fprintf(file_c, "%1.10e\n",C[i]);
196 }
197 */
199 __free_ddr(A);
200 __free_ddr(B);
201 __free_ddr(C);
202 }
204 *gflops = total_GFLOPS / (double)NUM_TEST_RUN;
205 *time = total_time / (double)NUM_TEST_RUN;
207 return err_code;
208 }