1 /******************************************************************************
2 * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <time.h>
33 #include <math.h>
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 #include "cblas.h"
39 #include "f2c.h"
40 #include "blaswrap.h"
41 #include "clapack.h"
42 #ifdef __cplusplus
43 }
44 #endif
46 /*-----------------------------------------------------------------------------
47 * Timing Setup
48 *----------------------------------------------------------------------------*/
49 struct timespec t0,t1;
50 #define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
51 #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
52 t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
54 /* Auxiliary routines prototypes */
55 void print_eigenvalues(char * desc, int n, double * vector_r, double * vector_i);
56 void print_eigenvectors(char * desc, int n, double * vector_i, double * v, int ldv);
57 void form_diag_matrix(doublecomplex *matrix_diag, doublereal *vector_r, doublereal *vector_i, int n);
58 void form_eigen_matrix(doublecomplex *mat, int n, double* vector_i, double* v, int ldv);
59 void print_real_matrix(char* desc, doublereal *mat, int n, int m, int ldv);
60 void print_complex_matrix(char* desc, doublecomplex *mat, int n, int m, int ldv);
61 doublereal compute_evd_error(doublecomplex *matrix_reconstruct, doublereal *matrix_original, int n, int ld);
62 void print_performance(char * func_name, int *matrix_sizes, float *time, int num_test, int num_run);
64 /* Parameters */
65 #define MAX_ALLOWED_ERR (1e-10)
67 /* Main program */
69 int print_data;
71 int main(int argc, char *argv[])
72 {
73 /* Locals */
74 float time, time_ev, time_cinv, *time_ev_all_runs, *time_cinv_all_runs;
75 integer num_test, num_run, i, j, n, n_min, n_inc;
76 integer lda, ldl, ldr, info, lwork;
77 doublereal wkopt;
78 doublereal* work;
79 integer *vector_pivot, *vector_work;
80 doublereal max_error_evd;
81 doublereal *matrix_A, *matrix_A_cpy, *matrix_R, *matrix_L;
82 doublereal *vector_diag, *vector_diagl, *vector_diagr, *vector_r, *vector_i;
83 doublecomplex *matrix_D, *matrix_P, *matrix_invP, *matrix_A_cmplx, *work_dcomplex, lwork_opt;
84 int *matrix_sizes;
85 integer const_int6 = 6;
86 doublereal const_dbr1 = 1.;
87 doublereal const_dbr0 = 0.;
88 doublecomplex const_dbc1 = {1.,0.};
89 doublecomplex const_dbc0 = {0.,0.};
90 integer rand_seed[4] = {10,123,278,3579}; /* numbers must lie between 0 and 4095 */
92 fprintf(stdout, "\n\n======= LAPACK example: eigen decomposition =======\n");
93 if(argc == 1) { /* no command line arguments, use default */
94 num_test = 3;
95 n_min = 100;
96 n_inc = 100;
97 num_run = 1;
98 print_data = 0;
99 }
100 else if (argc < 6) {
101 printf("Usage: eig <num_test> <start_size> <size_inc> <num_run per size> <print data or not (1 or 0)>\n");
102 exit(0);
103 }
104 else {
105 num_test = atoi(argv[1]);
106 n_min = atoi(argv[2]);
107 n_inc = atoi(argv[3]);
108 num_run = atoi(argv[4]);
109 print_data = atoi(argv[5]);
110 }
111 rand_seed[0] = 4675;
113 matrix_sizes = (int*)malloc(sizeof(int)*num_test);
114 time_ev_all_runs = (float *)malloc(sizeof(float)*num_test*num_run);
115 time_cinv_all_runs = (float *)malloc(sizeof(float)*num_test*num_run);
117 for(i=0, n=n_min; i<num_test; i++, n+=n_inc)
118 {
119 lda = ldl = ldr = n;
120 matrix_sizes[i] = n;
122 printf("\n\nStart EVD for matrix size %d. \n", (int)n);
124 j = 0;
125 while(j<num_run)
126 {
128 /* Allocate memory for matrix and vectors */
129 matrix_A = (doublereal *)__malloc_ddr(sizeof(doublereal)*n*n);
130 matrix_A_cpy = (doublereal *)malloc(sizeof(doublereal)*n*n);
131 vector_pivot = (integer *)__malloc_ddr(sizeof(integer)*n);
132 vector_work = (integer *)malloc(sizeof(integer)*n); /* not used in the example */
133 vector_diag = (doublereal *)malloc(sizeof(doublereal)*n); /* not used in the example */
134 vector_diagl = (doublereal *)malloc(sizeof(doublereal)*n); /* not used in the example */
135 vector_diagr = (doublereal *)malloc(sizeof(doublereal)*n); /* not used in the example */
136 matrix_D = (doublecomplex *)__malloc_ddr(n*n*sizeof(doublecomplex));
137 matrix_P = (doublecomplex *)__malloc_ddr(n*n*sizeof(doublecomplex));
138 matrix_invP = (doublecomplex *)__malloc_ddr(n*n*sizeof(doublecomplex));
139 vector_r = (doublereal *)__malloc_ddr(sizeof(doublereal)*n);
140 vector_i = (doublereal *)__malloc_ddr(sizeof(doublereal)*n);
141 matrix_L = (doublereal *)__malloc_ddr(sizeof(doublereal)*n*n); /* left eigenvectors */
142 matrix_R = (doublereal *)__malloc_ddr(sizeof(doublereal)*n*n); /* right eigenvectors */
144 /* Generate a N by N random matrix using LAPACK auxiliary testing routine. */
145 printf("\nStart test run %d out of %d. \n", (int)(j+1), (int)num_run);
146 printf("Generate %d by %d random matrix. \n", (int)n, (int)n);
147 dlatmr_(&n, &n, "S", rand_seed, "N", vector_diag, &const_int6, &const_dbr1,
148 &const_dbr1, "T", "N", vector_diagl, &const_int6, &const_dbr1, vector_diagr,
149 &const_int6, &const_dbr1, "N", vector_pivot, &n, &n, &const_dbr0,
150 &const_dbr1, "N", matrix_A, &n, vector_work, &info);
151 //EDMA_Free();
152 if(info != 0) {
153 printf("Random matrix generation has a problem. Returned error value is %d. Start a new run. \n", (int)info);
154 goto START_NEW_RUN;
155 }
157 print_real_matrix("Generated random matrix (double real): ", matrix_A, n, n, lda);
158 memcpy((void *)matrix_A_cpy, (void *)matrix_A, sizeof(doublereal)*n*n);
160 //time = read_cycle();
161 tick();
163 /* Query and allocate the optimal workspace for eigenproblem solution. */
164 lwork = -1;
165 dgeev_("N", "V", &n, matrix_A, &lda, vector_r, vector_i, matrix_L, &ldl, matrix_R, &ldr,
166 &wkopt, &lwork, &info); /* to find right eigenvectors only */
168 lwork = (int)wkopt; /* get optimal workspace size returned by dgeev_() */
169 work = (doublereal*)__malloc_ddr( lwork*sizeof(doublereal) );
171 /* Find eigenvectors and eigenvalues: AP = PD, where colums of P are eigenvectors,
172 and diagonal elements of D are eigenvalues. */
173 dgeev_("N", "Vectors", &n, matrix_A, &lda, vector_r, vector_i, matrix_L, &ldl, matrix_R, &ldr,
174 work, &lwork, &info); /* find right eigenvectors only */
175 time_ev = tock();;
176 time_ev_all_runs[i*num_run+j] = time_ev;
178 printf("Perform eigen-decomposition such that AP = PD.\n");
180 __free_ddr( (void*)work );
182 /* Check for convergence */
183 if( info > 0 ) {
184 printf("The eigen solution algorithm failed to compute eigenvalues.");
185 printf("To generate a new matrix to test the algorithm.\n");
186 goto START_NEW_RUN;
187 }
189 printf("Eigen-decomposition finished successfully.\n");
190 print_eigenvalues( "Eigenvalues", n, vector_r, vector_i );
191 print_eigenvectors( "Right eigenvectors", n, vector_i, matrix_R, ldr );
193 /* Form matrix P using right eigenvectors as the columns. */
194 form_eigen_matrix(matrix_P, n, vector_i, matrix_R, ldr);
195 print_complex_matrix("Eigenvector matrix P:", matrix_P, n, n, n);
197 /* Form matrix D using eigenvalues as the diagonal elements. */
198 form_diag_matrix(matrix_D, vector_r, vector_i, n);
199 print_complex_matrix("Complex diagonal matrix D: ", matrix_D, n, n, n);
201 /* Compute inverse of P: first perform LU decomposition on P. */
202 printf("Compute inverse of P such that A = PDP^-1.\n");
204 zlacpy_("Full", &n, &n, matrix_P, &n, matrix_invP, &n);
206 tick();
207 zgetrf_(&n, &n, matrix_invP, &n, vector_pivot, &info);
208 time_cinv = tock(); /* time for complex matrix inversion */
210 if( info != 0 ) {
211 printf("Return error value of zgetrf_(): %d.\n", (int)info);
212 printf("The LU decomposition failed.");
213 printf("To generate a new matrix to test the algorithm.\n");
214 goto START_NEW_RUN;
215 }
217 print_complex_matrix("matrix P LU decomposition results", matrix_invP, n, n, n);
219 tick();
221 /* Query and allocate the optimal workspace for matrix inversion
222 based on LU decompostion. */
223 lwork = -1;
224 zgetri_(&n, matrix_invP, &n, vector_pivot, &lwork_opt, &lwork, &info);
226 lwork = (int)lwork_opt.r; /* get optimal workspace size returned by zgetri_() */
227 work_dcomplex = (doublecomplex *)malloc( lwork*sizeof(doublecomplex) );
229 /* Compute inverse of P based on LUD results. */
230 zgetri_(&n, matrix_invP, &n, vector_pivot, work_dcomplex, &lwork, &info);
231 free( (void*)work_dcomplex );
233 time_cinv += tock();
234 time_cinv_all_runs[i*num_run+j] = time_cinv;
236 if( info != 0 ) {
237 printf("Return error value of zgetri_(): %d.\n", (int)info);
238 printf("Matrix P inversion through LU decomposition failed.");
239 printf("To generate a new matrix to test the algorithm.\n");
240 goto START_NEW_RUN;
241 }
242 printf("Matrix P inversion finished successfully.");
243 print_complex_matrix("Inverse of eigenvector matrix P:", matrix_invP, n, n, n);
245 /* Compute P*D*inv(P). */
246 work_dcomplex = (doublecomplex *)malloc(sizeof(doublecomplex)*n*n);
247 zgemm_("N", "N", &n, &n, &n, &const_dbc1, matrix_P, &n, matrix_D, &n, &const_dbc0, work_dcomplex, &n);
248 //EDMA_Free();
250 matrix_A_cmplx = matrix_P; /* reuse memory */
251 zgemm_("N", "N", &n, &n, &n, &const_dbc1, work_dcomplex, &n, matrix_invP, &n, &const_dbc0, matrix_A_cmplx, &n);
252 print_complex_matrix("P*D*inv(P):", matrix_A_cmplx, n, n, n);
253 print_real_matrix("Original matrix A", matrix_A_cpy, n, n, n);
255 /* Compute error: A-P*D*inv(P) */
256 max_error_evd = compute_evd_error(matrix_A_cmplx, matrix_A_cpy, n, lda);
258 printf("\nMaximum error of EVD: %e\n", max_error_evd);
260 if(max_error_evd > MAX_ALLOWED_ERR) {
261 printf("Error is out of tolerance range.\n");
262 exit(-1);
263 }
265 free( (void*)work_dcomplex );
267 j++;
269 START_NEW_RUN:
270 printf("Current run finished. \n");
272 /* Free workspace */
273 __free_ddr((void *)matrix_A);
274 __free_ddr((void *)vector_pivot);
275 free((void *)vector_work );
276 free((void *)vector_diag );
277 free((void *)vector_diagl);
278 free((void *)vector_diagr);
279 __free_ddr((void *)vector_r);
280 __free_ddr((void *)vector_i);
281 __free_ddr((void *)matrix_L);
282 __free_ddr((void *)matrix_R);
283 free((void *)matrix_A_cpy);
284 __free_ddr((void *)matrix_D);
285 __free_ddr((void *)matrix_P);
286 __free_ddr((void *)matrix_invP);
287 } /* while(j<num_run) */
289 } /* for(i=0, n=n_min; i<num_test; i++, n+=n_inc) */
291 print_performance("dgeev", matrix_sizes, time_ev_all_runs, num_test, num_run);
292 print_performance("zgetrf and zgetri", matrix_sizes, time_cinv_all_runs, num_test, num_run);
294 printf("Passed.\n");
296 exit( 0 );
297 } /* End of DGEEV Example */
299 /* Auxiliary routine: printing eigenvalues */
300 void print_eigenvalues( char* desc, int n, double* vector_r, double* vector_i )
301 {
302 int j;
304 if (!print_data)
305 return;
307 printf( "\n %s\n", desc );
309 for( j = 0; j < n; j++ ) {
310 if( vector_i[j] == (double)0.0 ) {
311 printf( " %10.5f", vector_r[j] );
312 } else {
313 printf( " (%10.5f,%10.5f)", vector_r[j], vector_i[j] );
314 }
315 }
316 printf( "\n" );
317 }
319 /* Auxiliary routine: printing eigenvectors */
320 void print_eigenvectors( char* desc, int n, double* vector_i, double* v, int ldv )
321 {
322 int i, j;
324 if (!print_data)
325 return;
327 printf( "\n %s\n", desc );
329 for( i = 0; i < n; i++ ) {
330 j = 0;
331 while( j < n ) {
332 if( vector_i[j] == (double)0.0 ) {
333 printf( " %10.5f", v[i+j*ldv] );
334 j++;
335 } else {
336 printf( " (%10.5f,%10.5f)", v[i+j*ldv], v[i+(j+1)*ldv] );
337 printf( " (%10.5f,%10.5f)", v[i+j*ldv], -v[i+(j+1)*ldv] );
338 j += 2;
339 }
340 }
341 printf( "\n" );
342 }
343 }
345 void form_eigen_matrix(doublecomplex *mat, int n, double* vector_i, double* v, int ldv)
346 {
347 int i, j;
349 for( i = 0; i < n; i++ ) {
350 j = 0;
351 while( j < n ) {
352 if( vector_i[j] == (double)0.0 ) {
353 mat[i+j*ldv].r = v[i+j*ldv];
354 mat[i+j*ldv].i = 0;
355 j++;
356 } else {
357 mat[i+j*ldv].r = v[i+j*ldv];
358 mat[i+j*ldv].i = v[i+(j+1)*ldv];
359 mat[i+(j+1)*ldv].r = v[i+j*ldv];
360 mat[i+(j+1)*ldv].i = -v[i+(j+1)*ldv];
361 j += 2;
362 }
363 }
364 }
365 }
367 void form_diag_matrix(doublecomplex *matrix_diag, doublereal *vector_r, doublereal *vector_i, int n)
368 {
369 int i, j;
371 for(i=0; i<n; i++) {
372 for(j=0; j<n; j++) {
373 matrix_diag[i+j*n].r = 0.;
374 matrix_diag[i+j*n].i = 0.;
375 }
376 }
378 for(i=0; i<n; i++) {
379 matrix_diag[i+i*n].r = vector_r[i];
380 matrix_diag[i+i*n].i = vector_i[i];
381 }
382 }
384 void print_real_matrix(char* desc, double *mat, int m, int n, int ldv )
385 {
386 int i, j;
388 if (!print_data)
389 return;
391 printf( "\n %s\n", desc );
392 for(i=0; i<m; i++) {
393 for(j=0; j<n; j++) {
394 printf( " %10.5f ", mat[i+j*ldv]);
395 }
396 printf( "\n" );
397 }
398 }
400 void print_complex_matrix(char* desc, doublecomplex *mat, int m, int n, int ldv )
401 {
402 int i, j;
403 double real, imag;
405 if (!print_data)
406 return;
408 printf( "\n %s\n", desc );
409 for(i=0; i<m; i++) {
410 for(j=0; j<n; j++) {
411 real = mat[i+j*ldv].r;
412 imag = mat[i+j*ldv].i;
413 if(imag >= 0) {
414 printf( " %10.5f + %10.5fi", real, imag);
415 }
416 else {
417 printf( " %10.5f - %10.5fi", real, fabs(imag));
418 }
419 }
420 printf( "\n" );
421 }
422 }
424 doublereal compute_evd_error(doublecomplex *matrix_reconstruct, doublereal *matrix_original, int n, int ld)
425 {
426 int i, j;
427 doublereal err_real, err_imag;
428 doublereal abs_err;
429 doublereal max_abs_err = 0.;
431 for(i=0; i<n; i++) {
432 for(j=0; j<n; j++) {
433 err_real = matrix_reconstruct[i+j*ld].r - matrix_original[i+j*ld];
434 err_imag = matrix_reconstruct[i+j*ld].i;
436 abs_err = sqrt(err_real*err_real + err_imag*err_imag);
438 if(abs_err > max_abs_err) {
439 max_abs_err = abs_err;
440 }
441 }
442 }
444 return max_abs_err;
445 }
447 void print_performance(char * func_name, int *matrix_sizes, float *time, int num_test, int num_run)
448 {
449 int i, j;
450 float inst, min, max;
451 float tot, avg;
453 printf("Time (second) of %s for all matrix sizes:\n", func_name);
454 printf("Matrix size ");
455 printf(" Min Max Average ");
456 printf("\n");
458 for(i=0; i<num_test; i++)
459 {
460 printf("%11d ", matrix_sizes[i]);
461 tot = 0.;
462 max = 0.;
463 min = 1e100;
464 for(j=0; j<num_run; j++)
465 {
466 inst = time[i*num_run+j];
467 tot += (double)inst;
469 if(inst>max) max = inst;
470 if(inst<min) min = inst;
471 }
472 avg = tot/(double)num_run;
473 printf("%12f%12f%12f\n", min, max, avg);
474 }
475 }