[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSPF_dp_mat_mul_gemm / c66 / DSPF_dp_mat_mul_gemm_d.c
1 /* ======================================================================= */
2 /* DSPF_dp_mat_mul_gemm_d.c -- Matrix Multiply \93y = a*x1*x2+y" */
3 /* Driver code; tests kernel and reports result in stdout */
4 /* */
5 /* Rev 0.0.2 */
6 /* */
7 /* Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ */
8 /* */
9 /* */
10 /* Redistribution and use in source and binary forms, with or without */
11 /* modification, are permitted provided that the following conditions */
12 /* are met: */
13 /* */
14 /* Redistributions of source code must retain the above copyright */
15 /* notice, this list of conditions and the following disclaimer. */
16 /* */
17 /* Redistributions in binary form must reproduce the above copyright */
18 /* notice, this list of conditions and the following disclaimer in the */
19 /* documentation and/or other materials provided with the */
20 /* distribution. */
21 /* */
22 /* Neither the name of Texas Instruments Incorporated nor the names of */
23 /* its contributors may be used to endorse or promote products derived */
24 /* from this software without specific prior written permission. */
25 /* */
26 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
27 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
28 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */
29 /* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
30 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
31 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT */
32 /* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
33 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY */
34 /* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
35 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE */
36 /* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
37 /* */
38 /* ======================================================================= */
40 #include <stdio.h>
41 #include <time.h>
42 #include <stdlib.h>
43 #include <limits.h>
44 #include <c6x.h>
46 /* ======================================================================= */
47 /* Interface header files for the natural C and optimized C code */
48 /* ======================================================================= */
49 #include "DSPF_dp_mat_mul_gemm_cn.h"
50 #include "DSPF_dp_mat_mul_gemm.h"
52 /* Defines */
53 #if defined(__TI_EABI__)
54 #define kernel_size _kernel_size
55 #endif
57 extern char kernel_size;
58 #define FORMULA_SIZE 8
59 #define FORMULA_DEVIDE 2
60 #define CYCLE_FORMULA_R1_PT1 12
61 #define CYCLE_FORMULA_R1_PT2 8
62 #define CYCLE_FORMULA_R2_PT1 12
63 #define CYCLE_FORMULA_R2_PT2 8
64 #define CYCLE_FORMULA_C2_PT1 12
65 #define CYCLE_FORMULA_C2_PT2 8
66 /* inverse of [12*12*12 12*12 12*12 12*12 12 12 12 1] */
67 /* [12*12* 8 12*12 12* 8 12* 8 12 12 8 1] */
68 /* [12* 8*12 12* 8 12*12 8*12 12 8 12 1] */
69 /* [ 8*12*12 8*12 8*12 12*12 8 12 12 1] */
70 /* [12* 8* 8 12* 8 12* 8 8* 8 12 8 8 1] */
71 /* [ 8*12* 8 8*12 8* 8 12* 8 8 12 8 1] */
72 /* [ 8* 8*12 8* 8 8*12 8*12 8 8 12 1] */
73 /* [ 8* 8* 8 8* 8 8* 8 8* 8 8 8 8 1] */
74 float form_inv[FORMULA_SIZE][FORMULA_SIZE] =
75 {{ 0.0156, -0.0156, -0.0156, -0.0156, 0.0156, 0.0156, 0.0156, -0.0156},
76 {-0.1250, 0.1875, 0.1250, 0.1250, -0.1875, -0.1875, -0.1250, 0.1875},
77 {-0.1250, 0.1250, 0.1875, 0.1250, -0.1875, -0.1250, -0.1875, 0.1875},
78 {-0.1250, 0.1250, 0.1250, 0.1875, -0.1250, -0.1875, -0.1875, 0.1875},
79 { 1.0000, -1.5000, -1.5000, -1.0000, 2.2500, 1.5000, 1.5000, -2.2500},
80 { 1.0000, -1.5000, -1.0000, -1.5000, 1.5000, 2.2500, 1.5000, -2.2500},
81 { 1.0000, -1.0000, -1.5000, -1.5000, 1.5000, 1.5000, 2.2500, -2.2500},
82 {-8.0000, 12.0000, 12.0000, 12.0000, -18.0000, -18.0000, -18.0000, 27.0000}
83 };
84 float form_temp [FORMULA_SIZE];
85 int form_cycle [FORMULA_SIZE];
86 int form_result[FORMULA_SIZE];
88 /* ======================================================================= */
89 /* Tell compiler arrays are double word alligned */
90 /* ======================================================================= */
91 #pragma DATA_ALIGN(ptr_y_opt, 8);
92 #pragma DATA_ALIGN(ptr_y_cn, 8);
93 #pragma DATA_ALIGN(ptr_x1, 8);
94 #pragma DATA_ALIGN(ptr_x2, 8);
96 /* ======================================================================= */
97 /* Parameters of fixed dataset */
98 /* ======================================================================= */
99 #define NR1 (16)
100 #define NC1 (16)
101 #define NC2 (16)
103 double ptr_y_opt[NR1 * NC2];
104 double ptr_y_cn[NR1 * NC2];
105 double ptr_x1[NR1 * NC1];
106 double ptr_x2[NC1 * NC2];
108 /* ======================================================================= */
109 /* Prototypes for timing functions */
110 /* ======================================================================= */
111 clock_t time_opt(int r1, int c1r2, int c2);
112 clock_t time_cn(int r1, int c1r2, int c2);
114 /* ======================================================================= */
115 /* Prototypes for utility functions */
116 /* ======================================================================= */
117 void UTIL_fillRandSP(double *ptr_x, int N, double factor);
119 /* ======================================================================= */
120 /* Main -- Top level driver for testing the algorithm */
121 /* ======================================================================= */
122 void main()
123 {
124 clock_t t_overhead, t_start, t_stop, t_opt, t_cn;
125 int i, j=1, r1 = NR1, c1r2 = NC1, c2 = NC2;
126 int form_error = 0;
127 float pct_diff, max_pct_diff = 0;
129 /* ------------------------------------------------------------------- */
130 /* Compute the overhead of calling clock twice to get timing info */
131 /* ------------------------------------------------------------------- */
132 /* Initialize timer for clock */
133 TSCL= 0,TSCH=0;
134 t_start = _itoll(TSCH, TSCL);
135 t_stop = _itoll(TSCH, TSCL);
136 t_overhead = t_stop - t_start;
138 /* ------------------------------------------------------------------- */
139 /* Generate random inputs in range (-10, 10). */
140 /* ------------------------------------------------------------------- */
141 UTIL_fillRandSP(ptr_x1, NR1 * NC1, 10.0);
142 UTIL_fillRandSP(ptr_x2, NC1 * NC2, 10.0);
144 for(r1 = 8, i = 1; r1 <= NR1; r1 += 4) {
145 for(c1r2 = 8; c1r2 <= NC1; c1r2 += 4) {
146 for(c2 = 8; c2 <= NC2; i++, c2 += 4) {
147 /* ------------------------------------------------------------- */
148 /* Clear state/output buffers with fixed values. */
149 /* ------------------------------------------------------------- */
150 UTIL_fillRandSP(ptr_y_cn, NC1 * NC2, 10.0);
151 memcpy(ptr_y_opt, ptr_y_cn, sizeof(ptr_y_cn));
153 /* ------------------------------------------------------------- */
154 /* Call the individual timing routines */
155 /* ------------------------------------------------------------- */
156 t_opt = time_opt(r1, c1r2, c2) - t_overhead;
157 t_cn = time_cn(r1, c1r2, c2) - t_overhead;
159 printf("DSPF_dp_mat_mul_gemm\tIter#: %d\t", j++);
161 /* ------------------------------------------------------------- */
162 /* compute percent difference and track max difference */
163 /* ------------------------------------------------------------- */
164 for (i = 0; i < r1 * c2; i++) {
165 pct_diff = (ptr_y_cn[i] - ptr_y_opt[i]) / ptr_y_cn[i] * 100.0;
166 if (pct_diff < 0) pct_diff *= -1;
167 if (pct_diff > max_pct_diff) max_pct_diff = pct_diff;
168 }
169 if (max_pct_diff > 0.05)
170 printf("Result Failure, max_pct_diff=%f\n", max_pct_diff);
171 else
172 printf("Result Successful ");
174 /* ------------------------------------------------------------- */
175 /* Print timing results */
176 /* ------------------------------------------------------------- */
177 printf("\tNR = %.2d\tNC1 = %.2d\tNC2 = %.2d\tnatC: %d\toptC: %d\n", r1, c1r2, c2, t_cn, t_opt);
178 if (c2 == CYCLE_FORMULA_C2_PT1 && c1r2 == CYCLE_FORMULA_R2_PT1 && r1 == CYCLE_FORMULA_R1_PT1)
179 form_cycle[0] = t_opt;
180 if (c2 == CYCLE_FORMULA_C2_PT1 && c1r2 == CYCLE_FORMULA_R2_PT2 && r1 == CYCLE_FORMULA_R1_PT1)
181 form_cycle[1] = t_opt;
182 if (c2 == CYCLE_FORMULA_C2_PT2 && c1r2 == CYCLE_FORMULA_R2_PT1 && r1 == CYCLE_FORMULA_R1_PT1)
183 form_cycle[2] = t_opt;
184 if (c2 == CYCLE_FORMULA_C2_PT1 && c1r2 == CYCLE_FORMULA_R2_PT1 && r1 == CYCLE_FORMULA_R1_PT2)
185 form_cycle[3] = t_opt;
186 if (c2 == CYCLE_FORMULA_C2_PT2 && c1r2 == CYCLE_FORMULA_R2_PT2 && r1 == CYCLE_FORMULA_R1_PT1)
187 form_cycle[4] = t_opt;
188 if (c2 == CYCLE_FORMULA_C2_PT1 && c1r2 == CYCLE_FORMULA_R2_PT2 && r1 == CYCLE_FORMULA_R1_PT2)
189 form_cycle[5] = t_opt;
190 if (c2 == CYCLE_FORMULA_C2_PT2 && c1r2 == CYCLE_FORMULA_R2_PT1 && r1 == CYCLE_FORMULA_R1_PT2)
191 form_cycle[6] = t_opt;
192 if (c2 == CYCLE_FORMULA_C2_PT2 && c1r2 == CYCLE_FORMULA_R2_PT2 && r1 == CYCLE_FORMULA_R1_PT2)
193 form_cycle[7] = t_opt;
194 }
195 }
196 }
198 /* Provide memory information */
199 #ifdef __TI_COMPILER_VERSION__ // for TI compiler only
200 printf("Memory: %d bytes\n", &kernel_size);
201 #endif
203 /* Provide profiling information */
204 for (i = 0; i < FORMULA_SIZE; i++) {
205 form_temp[i] = 0;
206 for (j = 0; j < FORMULA_SIZE; j++) {
207 form_temp[i] += form_inv[i][j] * form_cycle[j];
208 }
209 if (i != (FORMULA_SIZE-1)) {
210 form_result[i] = (int) (form_temp[i] * FORMULA_DEVIDE + 0.5);
211 if ((form_result[i] - form_temp[i] * FORMULA_DEVIDE) > 0.1 ||
212 (form_result[i] - form_temp[i] * FORMULA_DEVIDE) < -0.1) {
213 form_error = 1;
214 }
215 }
216 else {
217 form_result[i] = (int) (form_temp[i] + 0.5);
218 }
219 }
221 if (!form_error) {
222 printf("Cycles: ");
223 if (form_result[0])
224 printf("%d/%d*r1*c2*c1 + ", form_result[0], FORMULA_DEVIDE);
225 if (form_result[1])
226 printf("%d/%d*r1*c2 + ", form_result[1], FORMULA_DEVIDE);
227 if (form_result[2])
228 printf("%d/%d*r1*c1 + ", form_result[2], FORMULA_DEVIDE);
229 if (form_result[3])
230 printf("%d/%d*c2*c1 + ", form_result[3], FORMULA_DEVIDE);
231 if (form_result[4])
232 printf("%d/%d*r1 + ", form_result[4], FORMULA_DEVIDE);
233 if (form_result[5])
234 printf("%d/%d*c2 + ", form_result[5], FORMULA_DEVIDE);
235 if (form_result[6])
236 printf("%d/%d*c1 + ", form_result[6], FORMULA_DEVIDE);
237 if (form_result[7])
238 printf("%d\n", form_result[7]);
239 }
240 else
241 printf("Cycles Formula Not Available\n");
243 }
245 /* ======================================================================= */
246 /* Prototypes for timing functions */
247 /* ======================================================================= */
248 clock_t time_cn(int r1, int c1r2, int c2)
249 {
250 clock_t t_start, t_stop;
252 /* ------------------------------------------------------------------- */
253 /* Measure the cycle count */
254 /* ------------------------------------------------------------------- */
255 t_start = _itoll(TSCH, TSCL);
256 DSPF_dp_mat_mul_gemm_cn(ptr_x1, 1000.0, r1, c1r2, ptr_x2, c2, ptr_y_cn);
257 t_stop = _itoll(TSCH, TSCL);
259 return t_stop - t_start;
260 }
262 clock_t time_opt(int r1, int c1r2, int c2)
263 {
264 clock_t t_start, t_stop;
266 /* ------------------------------------------------------------------- */
267 /* Measure the cycle count */
268 /* ------------------------------------------------------------------- */
269 t_start = _itoll(TSCH, TSCL);
270 DSPF_dp_mat_mul_gemm(ptr_x1, 1000.0, r1, c1r2, ptr_x2, c2, ptr_y_opt);
271 t_stop = _itoll(TSCH, TSCL);
273 return t_stop - t_start;
274 }
276 void UTIL_fillRandSP(double *ptr_x, int N, double factor)
277 {
278 double rand_midpoint = RAND_MAX / 2.0;
279 int i;
281 // fill array with floats in the range (-factor, factor)
282 for (i = 0; i < N; i++)
283 ptr_x[i] = ((rand() - rand_midpoint) / rand_midpoint) * factor;
284 }
285 /* ======================================================================= */
286 /* End of file: DSPF_dp_mat_mul_gemm_d.c */
287 /* ----------------------------------------------------------------------- */
288 /* Copyright (c) 2011 Texas Instruments, Incorporated. */
289 /* All Rights Reserved. */
290 /* ======================================================================= */