1 /* ======================================================================= */
2 /* DSPF_dp_svd_d.c -- Singular Value Decomposition using double precision */
3 /* */
4 /* */
5 /* Rev 1.0.0 */
6 /* */
7 /* Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ */
8 /* */
9 /* */
10 /* Redistribution and use in source and binary forms, with or without */
11 /* modification, are permitted provided that the following conditions */
12 /* are met: */
13 /* */
14 /* Redistributions of source code must retain the above copyright */
15 /* notice, this list of conditions and the following disclaimer. */
16 /* */
17 /* Redistributions in binary form must rep roduce the above copyright */
18 /* notice, this list of conditions and the following disclaimer in the */
19 /* documentation and/or other materials provided with the */
20 /* distribution. */
21 /* */
22 /* Neither the name of Texas Instruments Incorporated nor the names of */
23 /* its contributors may be used to endorse or promote products derived */
24 /* from this software without specific prior written permission. */
25 /* */
26 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
27 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCSVDING, BUT NOT */
28 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */
29 /* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
30 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
31 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCSVDING, BUT NOT */
32 /* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
33 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY */
34 /* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
35 /* (INCSVDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE */
36 /* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
37 /* */
38 /* ======================================================================= */
39 #include <stdlib.h>
40 #include <stdio.h>
41 #include <string.h>
42 #include <math.h>
43 #include <c6x.h>
45 /* ======================================================================= */
46 /* Interface header files for the natural C and optimized C code */
47 /* ======================================================================= */
48 #include "DSPF_dp_svd_cn.h"
49 #include "DSPF_dp_svd.h"
51 /* ======================================================================= */
52 /* define kernel size constant */
53 /* ======================================================================= */
54 #if defined(__TI_EABI__)
55 #define kernel_size _kernel_size
56 #endif
58 /* ======================================================================= */
59 /* extern parameters */
60 /* ======================================================================= */
61 extern char kernel_size;
63 /* ======================================================================= */
64 /* define size constants */
65 /* ======================================================================= */
67 #define MAX_NROWS 128 /* max is 169 for L2SRAM */
68 #define MAX_NCOLS 128 /* max is 169 for L2SRAM */
69 #define MAX_SIZE (MAX_NROWS*MAX_NCOLS)
70 #define MAX_U_SIZE (MAX_NROWS*MAX_NROWS)
71 #define MAX_V_SIZE (MAX_NCOLS*MAX_NCOLS)
72 #define MAX_V_VECTOR_SIZE MAX_NROWS
74 /* ======================================================================= */
75 /* formula arrays */
76 /* ======================================================================= */
77 #define CYCLE_FORMULA_ORDER_PT1 64
78 #define CYCLE_FORMULA_ORDER_PT2 128
79 #define FORMULA_SIZE 2
80 long long form_cycles[FORMULA_SIZE];
82 /* ======================================================================= */
83 /* double word alignment of arrays */
84 /* ======================================================================= */
85 #pragma DATA_SECTION(A,".far:matrix")
86 #pragma DATA_ALIGN(A,8);
87 double A[MAX_SIZE];
89 #pragma DATA_SECTION(U,".far:matrix")
90 #pragma DATA_ALIGN(U,8);
91 double U[MAX_U_SIZE];
93 #pragma DATA_SECTION(V,".far:matrix")
94 #pragma DATA_ALIGN(V,8);
95 double V[MAX_V_SIZE];
97 #pragma DATA_SECTION(U1,".far:matrix")
98 #pragma DATA_ALIGN(U1,8);
99 double U1[MAX_U_SIZE];
101 #pragma DATA_SECTION(diag,".far:matrix")
102 #pragma DATA_ALIGN(diag,8);
103 double diag[MAX_NROWS];
105 #pragma DATA_SECTION(superdiag,".far:matrix")
106 #pragma DATA_ALIGN(superdiag,8);
107 double superdiag[MAX_NROWS];
109 /* ======================================================================= */
110 /* Main -- Top level driver for testing the algorithm */
111 /* ======================================================================= */
113 int check_decompostion(int Nrows,int Ncols,double *A,double *U,double *diag,double *V,double *U1,double *max_error,double *avg_error_SVD,char print_txt[16]);
114 int check_transformation(int Nrows,int Ncols,double *U,double *V);
116 void main(void) {
118 int test,Nrows,Ncols;
119 int pass,pass_cn,pass_opt;
120 int row,col,status;
121 double max_error_cn,max_error,avg_error_SVD_cn,avg_error_SVD;
122 long long t_overhead, t_start, t_stop, SVD_t, SVD_cn_t;
123 char print_txt[16];
125 /* --------------------------------------------------------------------- */
126 /* Compute the overhead of calling clock twice to get timing info */
127 /* --------------------------------------------------------------------- */
128 TSCL= 0,TSCH=0;
129 t_start = _itoll(TSCH, TSCL);
130 t_stop = _itoll(TSCH, TSCL);
131 t_overhead = t_stop - t_start;
133 /* --------------------------------------------------------------------- */
134 /* process test cases */
135 /* --------------------------------------------------------------------- */
136 for (test=1;test<=11;test++) {
137 printf("DSPF_dp_svd\tIter#: %d\t", test);
139 switch(test) {
140 case 1: { Nrows=4;Ncols=2; // 4x2
141 A[0]=1;A[1]=2;A[2]=3;A[3]=4;A[4]=5;A[5]=6;A[6]=7;A[7]=8;
142 break;
143 }
144 case 2: { Nrows=3;Ncols=3; // 3x3
145 A[0]=1;A[1]=1;A[2]=1;
146 A[3]=1;A[4]=2;A[5]=3;
147 A[6]=1;A[7]=3;A[8]=6;
148 break;
149 }
150 case 3: { Nrows=3;Ncols=3; // 3x3
151 A[0]=1;A[1]=2;A[2]=3;
152 A[3]=4;A[4]=5;A[5]=6;
153 A[6]=7;A[7]=8;A[8]=0;
154 break;
155 }
156 case 4: { Nrows=3;Ncols=3; // 3x3 singular
157 A[0]=0;A[1]=1;A[2]=2;
158 A[3]=3;A[4]=4;A[5]=5;
159 A[6]=6;A[7]=7;A[8]=8;
160 break;
161 }
162 case 5: { Nrows=5;Ncols=4; // 5x4
163 A[ 0]=1;A[ 1]=0;A[ 2]=0;A[ 3]=0;
164 A[ 4]=0;A[ 5]=0;A[ 6]=0;A[ 7]=4;
165 A[ 8]=0;A[ 9]=3;A[10]=0;A[11]=0;
166 A[12]=0;A[13]=0;A[14]=0;A[15]=0;
167 A[16]=2;A[17]=0;A[18]=0;A[19]=0;
168 break;
169 }
170 case 6: { Nrows=8;Ncols=5; // 8x5
171 A[ 0]=22;A[ 1]=10;A[ 2]= 2;A[ 3]= 3;A[ 4]= 7;
172 A[ 5]=14;A[ 6]= 7;A[ 7]=10;A[ 8]= 0;A[ 9]= 8;
173 A[10]=-1;A[11]=13;A[12]=-1;A[13]=-11;A[14]= 3;
174 A[15]=-3;A[16]=-2;A[17]=13;A[18]= -2;A[19]= 4;
175 A[20]= 9;A[21]= 8;A[22]= 1;A[23]= -2;A[24]= 4;
176 A[25]= 9;A[26]= 1;A[27]=-7;A[28]= 5;A[29]=-1;
177 A[30]= 2;A[31]=-6;A[32]= 6;A[33]= 5;A[34]= 1;
178 A[35]= 4;A[36]= 5;A[37]= 0;A[38]= -2;A[39]= 2;
179 break;
180 }
181 case 7: { Nrows=5;Ncols=5; // 5x5
182 A[ 0]=1;A[ 1]=0;A[ 2]=0;A[ 3]=0;A[ 4]=0;
183 A[ 5]=0;A[ 6]=1;A[ 7]=0;A[ 8]=0;A[ 9]=0;
184 A[10]=0;A[11]=0;A[12]=1;A[13]=0;A[14]=0;
185 A[15]=0;A[16]=0;A[17]=0;A[18]=1;A[19]=0;
186 A[20]=0;A[21]=0;A[22]=0;A[23]=0;A[24]=1;
187 break;
188 }
189 case 8: { Nrows=4;Ncols=5; // 4x5
190 A[ 0]=1;A[ 1]=0;A[ 2]=0;A[ 3]=0;A[ 4]=2;
191 A[ 5]=0;A[ 6]=0;A[ 7]=3;A[ 8]=0;A[ 9]=0;
192 A[10]=0;A[11]=0;A[12]=0;A[13]=0;A[14]=0;
193 A[15]=0;A[16]=4;A[17]=0;A[18]=0;A[19]=0;
194 break;
195 }
196 case 9: { Nrows=2;Ncols=4; // 2x4
197 A[0]=1;A[1]=2;A[2]=3;A[3]=4;
198 A[4]=5;A[5]=6;A[6]=7;A[7]=8;
199 break;
200 }
201 case 10: {
202 Nrows=CYCLE_FORMULA_ORDER_PT1;
203 Ncols=Nrows;
204 srand(1);
205 for (row=0;row<Nrows;row++) {
206 for (col=0;col<Ncols;col++) {
207 A[row*Ncols+col]=(double)(rand())/((double)RAND_MAX);
208 }
209 }
210 break;
211 }
212 case 11: {
213 Nrows=CYCLE_FORMULA_ORDER_PT2;
214 Ncols=Nrows;
215 srand(1);
216 for (row=0;row<Nrows;row++) {
217 for (col=0;col<Ncols;col++) {
218 A[row*Ncols+col]=(double)(rand())/((double)RAND_MAX);
219 }
220 }
221 break;
222 }
223 }
225 /* ------------------------------------------------------------------- */
226 /* decompose A */
227 /* ------------------------------------------------------------------- */
228 TSCL= 0,TSCH=0;
229 t_start = _itoll(TSCH, TSCL);
230 status=DSPF_dp_svd_cn(Nrows,Ncols,A,U,V,U1,diag,superdiag);
231 t_stop = _itoll(TSCH, TSCL);
232 SVD_cn_t = t_stop-t_start-t_overhead;
233 if (status==-1) {
234 printf("natural C SV decomposition failed!\n");
235 }
236 /* ------------------------------------------------------------------- */
237 /* check decomposition: U*D*transpose(V)=A */
238 /* ------------------------------------------------------------------- */
239 strcpy(print_txt,"natural: ");
240 pass_cn=check_decompostion(Nrows,Ncols,A,U,diag,V,U1,&max_error_cn,&avg_error_SVD_cn,print_txt);
242 /* ------------------------------------------------------------------- */
243 /* check transformation: U*U'=V*V'=I */
244 /* ------------------------------------------------------------------- */
245 pass_cn=pass_cn && check_transformation(Nrows,Ncols,U,V);
247 /* ------------------------------------------------------------------- */
248 /* check transformation: U*U'=V*V'=I */
249 /* ------------------------------------------------------------------- */
250 pass_opt=pass_opt && check_transformation(Nrows,Ncols,U,V);
252 /* ------------------------------------------------------------------- */
253 /* decompose A using optimized C */
254 /* ------------------------------------------------------------------- */
255 TSCL= 0,TSCH=0;
256 t_start = _itoll(TSCH, TSCL);
257 status=DSPF_dp_svd(Nrows,Ncols,A,U,V,U1,diag,superdiag);
258 t_stop = _itoll(TSCH, TSCL);
259 if (status==-1) {
260 printf("optimized C SV decomposition failed!\n");
261 }
262 SVD_t=t_stop-t_start-t_overhead;
264 /* ------------------------------------------------------------------- */
265 /* check decomposition: U*D*transpose(V)=A */
266 /* ------------------------------------------------------------------- */
267 strcpy(print_txt,"optimized: ");
268 pass_opt=check_decompostion(Nrows,Ncols,A,U,diag,V,U1,&max_error,&avg_error_SVD,print_txt);
270 pass=(pass_cn)&&(pass_opt);
271 if (pass) {
272 printf("Result Successful");
273 printf("\torder=%2dx%2d\tnatC: %lld\toptC: %lld\n", Nrows, Ncols, SVD_cn_t, SVD_t);
274 } else {
275 printf("Result Failure\torder=%2dx%2d\n", Nrows, Ncols);
276 }
278 if (Nrows==CYCLE_FORMULA_ORDER_PT1) {
279 form_cycles[0] = SVD_t;
280 } else if (Nrows==CYCLE_FORMULA_ORDER_PT2) {
281 form_cycles[1] = SVD_t;
282 }
283 } /* for (test=1;test<=10;test++) */
285 /* ------------------------------------------------------------------- */
286 /* provide memory and cycles information */
287 /* ------------------------------------------------------------------- */
288 #ifdef __TI_COMPILER_VERSION__ // for TI compiler only
289 printf("Memory: %d bytes\n", &kernel_size);
290 #endif
292 printf("Cycles: %lld (order=%d) %lld (order=%d)\n",form_cycles[0],CYCLE_FORMULA_ORDER_PT1,form_cycles[1],CYCLE_FORMULA_ORDER_PT2);
294 }
296 int check_decompostion(int Nrows,int Ncols,double *A,double *U,double *diag,double *V,double *U1,double *max_error,double *avg_error_SVD,char print_txt[16]) {
298 int pass,row,col,k;
299 double total_error,tolerance,sum,error,max_err;
301 pass=1;
302 total_error=0;
303 tolerance=0.000001;
304 max_err=0;
305 #ifndef ENABLE_REDUCED_FORM
306 for (row=0;row<Nrows;row++) {
307 for (col=0;col<Ncols;col++) {
308 if (col<Nrows) {
309 U1[col+row*Ncols]=U[col+row*Nrows]*diag[col];
310 } else {
311 U1[col+row*Ncols]=0;
312 }
313 }
314 }
315 for (row=0;row<Nrows;row++) {
316 for (col=0;col<Ncols;col++) {
317 sum=0;
318 for (k=0;k<Ncols;k++) {
319 sum+=U1[k+row*Ncols]*V[k+col*Ncols];
320 }
321 error=fabs(A[col+row*Ncols]-sum);
322 if (error>max_err) {
323 max_err=error;
324 }
325 #ifndef ENABLE_PROFILE
326 if ((error>tolerance)||(isnan(error))) {
327 pass=0;
328 printf("%s orig=%e calc=%e error=%e\n",print_txt,A[col+row*Ncols],sum,error);
329 break;
330 } else {
331 total_error+=error;
332 }
333 #else
334 total_error+=error;
335 #endif
336 }
337 if (pass==0) {
338 break;
339 }
340 }
341 #else
342 if (Nrows>=Ncols) {
343 for (row=0;row<Nrows;row++) {
344 for (col=0;col<Ncols;col++) {
345 U1[col+row*Ncols]=U[col+row*Ncols]*diag[col];
346 }
347 }
348 for (row=0;row<Nrows;row++) {
349 for (col=0;col<Ncols;col++) {
350 sum=0;
351 for (k=0;k<Ncols;k++) {
352 sum+=U1[k+row*Ncols]*V[k+col*Ncols];
353 }
354 error=fabs(A[col+row*Ncols]-sum);
355 if (error>max_err) {
356 max_err=error;
357 }
358 #ifndef ENABLE_PROFILE
359 if ((error>tolerance)||(isnan(error))) {
360 pass=0;
361 printf("%s orig=%e calc=%e error=%e\n",print_txt,A[col+row*Ncols],sum,error);
362 break;
363 } else {
364 total_error+=error;
365 }
366 #else
367 total_error+=error;
368 #endif
369 }
370 if (pass==0) {
371 break;
372 }
373 }
374 } else { /* Nrows<Ncols */
375 for (row=0;row<Nrows;row++) {
376 for (col=0;col<Nrows;col++) {
377 U1[col+row*Nrows]=U[col+row*Nrows]*diag[col];
378 }
379 }
380 for (row=0;row<Nrows;row++) {
381 for (col=0;col<Ncols;col++) {
382 sum=0;
383 for (k=0;k<Nrows;k++) {
384 sum+=U1[k+row*Nrows]*V[k+col*Nrows];
385 }
386 error=fabs(A[col+row*Ncols]-sum);
387 if (error>max_err) {
388 max_err=error;
389 }
390 #ifndef ENABLE_PROFILE
391 if ((error>tolerance)||(isnan(error))) {
392 pass=0;
393 printf("%s orig=%e calc=%e error=%e\n",print_txt,A[col+row*Ncols],sum,error);
394 break;
395 } else {
396 total_error+=error;
397 }
398 #else
399 total_error+=error;
400 #endif
401 }
402 if (pass==0) {
403 break;
404 }
405 }
406 } /* if (Nrows>=Ncols) */
407 #endif
408 *avg_error_SVD=total_error/((double)Nrows*(double)Ncols);
409 *max_error=max_err;
411 return pass;
412 }
414 int check_transformation(int Nrows,int Ncols,double *U,double *V) {
416 int row,col,k,width,pass;
417 double sum,error,tolerance;
419 pass=1;
420 tolerance=0.000001;
421 /* check U*U'=I */
422 #ifndef ENABLE_REDUCED_FORM
423 width=Nrows;
424 for (row=0;row<Nrows;row++) {
425 for (col=0;col<width;col++) {
426 sum=0;
427 for (k=0;k<width;k++) {
428 sum+=U[k+row*width]*U[k+col*width];
429 }
430 if (row==col) {
431 error=fabs(sum-1.0);
432 } else {
433 error=fabs(sum);
434 }
435 if (error>tolerance) {
436 pass=0;
437 }
438 }
439 }
441 /* check V*V'=I */
442 for (row=0;row<Ncols;row++) {
443 for (col=0;col<Ncols;col++) {
444 sum=0;
445 for (k=0;k<Ncols;k++) {
446 sum+=V[k+row*Ncols]*V[k+col*Ncols];
447 }
448 if (row==col) {
449 error=fabs(sum-1.0);
450 } else {
451 error=fabs(sum);
452 }
453 if (error>tolerance) {
454 pass=0;
455 }
456 }
457 }
458 #else
459 if (Nrows>=Ncols) { /* check V only */
460 /* check V*V'=I */
461 for (row=0;row<Ncols;row++) {
462 for (col=0;col<Ncols;col++) {
463 sum=0;
464 for (k=0;k<Ncols;k++) {
465 sum+=V[k+row*Ncols]*V[k+col*Ncols];
466 }
467 if (row==col) {
468 error=fabs(sum-1.0);
469 } else {
470 error=fabs(sum);
471 }
472 if (error>tolerance) {
473 pass=0;
474 }
475 }
476 }
477 } else { /* check U only */
478 /* check U*U'=I */
479 width=Nrows;
480 for (row=0;row<Nrows;row++) {
481 for (col=0;col<width;col++) {
482 sum=0;
483 for (k=0;k<width;k++) {
484 sum+=U[k+row*width]*U[k+col*width];
485 }
486 if (row==col) {
487 error=fabs(sum-1.0);
488 } else {
489 error=fabs(sum);
490 }
491 if (error>tolerance) {
492 pass=0;
493 }
494 }
495 }
496 }
497 #endif
499 return pass;
500 }
502 /* ======================================================================= */
503 /* End of file: DSPF_dp_svd_d.c */
504 /* ------------------------------------------------------------------------*/
505 /* Copyright (c) 2013 Texas Instruments, Incorporated. */
506 /* All Rights Reserved. */
507 /* ======================================================================= */