1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <time.h>
5 #ifdef __cplusplus
6 extern "C" {
7 #endif
8 #include "cblas.h"
9 #ifdef __cplusplus
10 }
11 #endif
13 /*-----------------------------------------------------------------------------
14 * Timing Setup
15 *----------------------------------------------------------------------------*/
16 struct timespec t0,t1;
17 #define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
18 #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
19 t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
21 extern int dgetrf_(int *, int *, double *, int *, int *, int *);
22 extern int dgetri_(int *, double *, int *, int *, double *, int *, int *);
23 extern int dgeqrf_(int *, int *, double *, int *, double *, double *, int *, int *);
24 extern int dtrtri_(char *, char *, int *, double *, int *, int *);
26 extern int dlatmr_(int *, int *, char *, int *, char *,
27 double *, int *, double *, double *, char *, char *,
28 double *, int *, double *, double *, int *,
29 double *, char *, int *, int *, int *,
30 double *, double *, char *, double *, int *,
31 int *, int *);
32 extern int dlacpy_(char *, int *, int *, double *, int *, double *, int *);
34 /* Auxiliary routines prototypes */
35 void print_int_vector( char* desc, int n, int* a );
36 void print_real_matrix(char* desc, double *mat, int n, int m, int ldv );
37 double compute_inverse_error(double *matrix_A, double *matrix_A_inverse, int n, int ld);
38 void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int num_run);
40 /* Parameters */
41 #define MATRIX_ORDER 5
42 #define LDA MATRIX_ORDER
44 /* Main program */
45 int print_data;
46 int main(int argc, char *argv[]) {
47 /* Locals */
48 float secs, secs_lud_inv, *secs_all_runs;
49 int num_test, num_run, i, j;
50 int n, n_min, n_inc, info, lwork, lda;
51 double lwork_opt, max_abs_error;
52 double *work;
53 double *matrix_A, *matrix_LUD;
54 double *vec_dbr_diagr, *vec_dbr_diag, *vec_dbr_diagl;
55 int *vec_int_pivot, *vec_int_work;
56 int *matrix_sizes;
58 int const_int6 = 6;
59 double const_dbr1 = 1.;
60 double const_dbr0 = 0.;
61 int rand_seed[4] = {10,123,278,3579}; /* numbers must lie between 0 and 4095 */
63 fprintf(stdout, "\n\n===== LAPACK example: matrix inversion through LUD =====\n");
64 if(argc == 1) { /* no command line arguments, use default */
65 num_test = 3;
66 n_min = 1000;
67 n_inc = 1000;
68 num_run = 1;
69 print_data = 0;
70 }
71 else if (argc < 6) {
72 printf("Usage: ludinv <num_test> <start_size> <size_inc> <num_run per size> <print data or not (1 or 0)>\n");
73 exit(0);
74 }
75 else {
76 num_test = atoi(argv[1]);
77 n_min = atoi(argv[2]);
78 n_inc = atoi(argv[3]);
79 num_run = atoi(argv[4]);
80 print_data = atoi(argv[5]);
81 }
82 rand_seed[0] = 4675;
84 secs_all_runs = (float *)malloc(sizeof(float)*num_test*num_run);
85 matrix_sizes = (int*)malloc(sizeof(int)*num_test);
87 for(i=0, n=n_min; i<num_test; i++, n+=n_inc)
88 {
89 lda = n;
90 matrix_sizes[i] = n;
92 printf("\n\nStart matrix inversion by LUD for matrix size %d. \n", n);
94 /* Operation counts of LUD inversion according to LAPACK Working Note 41 */
95 /*operation_counts = 2.*n*n*n - 1.5*n*n + 2.5*n; */
97 /* Allocate memory for matrix and vectors */
98 matrix_A = (double *)__malloc_ddr(sizeof(double)*n*n);
99 matrix_LUD = (double *)__malloc_ddr(sizeof(double)*n*n);
100 vec_int_pivot = (int *)__malloc_ddr(sizeof(int)*n);
101 vec_int_work = (int *)__malloc_ddr(sizeof(int)*n);
102 vec_dbr_diag = (double *)__malloc_ddr(sizeof(double)*n);
103 vec_dbr_diagl = (double *)__malloc_ddr(sizeof(double)*n);
104 vec_dbr_diagr = (double *)__malloc_ddr(sizeof(double)*n);
106 j = 0;
107 while(j<num_run) {
108 /* Generate a N by N random matrix */
109 printf("\nStart test run %d out of %d. \n", j+1, num_run);
110 printf("Generate %d by %d random matrix. \n", n, n);
111 dlatmr_(&n, &n, "S", rand_seed, "N", vec_dbr_diag, &const_int6, &const_dbr1,
112 &const_dbr1, "T", "N", vec_dbr_diagl, &const_int6, &const_dbr1, vec_dbr_diagr,
113 &const_int6, &const_dbr1, "N", vec_int_pivot, &n, &n, &const_dbr0,
114 &const_dbr1, "N", matrix_A, &lda, vec_int_work, &info);
115 if(info != 0) {
116 printf("Random matrix generation has a problem. Returned error value is %d. Start a new run. \n", info);
117 //goto START_NEW_RUN;
118 exit(0);
119 }
121 memcpy((void *)matrix_LUD, (void *)matrix_A, sizeof(double)*n*n);
123 print_real_matrix("Generated random matrix (double real): ", matrix_LUD, n, n, lda);
125 tick();
127 /* Computes LU factorization */
128 dgetrf_(&n, &n, matrix_LUD, &lda, vec_int_pivot, &info);
130 secs_lud_inv = tock();
132 if(info != 0) {
133 printf("LU factorization has a problem. Returned error value of dgetrf is %d. Start a new run. \n", info);
134 //goto START_NEW_RUN;
135 exit(0);
136 }
138 printf("LU decomposition finished. Time spent on LUD: %f seconds.\n", secs_lud_inv);
140 /* Print details of LU factorization */
141 print_real_matrix( "Details of LU factorization", matrix_LUD, n, n, lda);
143 /* Print pivot indices */
144 print_int_vector( "Pivot indices", n, vec_int_pivot );
146 tick();
148 /* Query and allocate the optimal workspace */
149 lwork = -1;
150 dgetri_(&n, matrix_LUD, &lda, vec_int_pivot, &lwork_opt, &lwork, &info);
151 lwork = (int)lwork_opt;
152 work = (double *)__malloc_ddr( lwork*sizeof(double) );
154 /* Compute inversion of matrix based on LU factorization */
155 dgetri_(&n, matrix_LUD, &lda, vec_int_pivot, work, &lwork, &info);
157 secs = tock();
159 __free_ddr((void *)work);
160 if(info != 0) {
161 printf("Matrix inversion through LU factorization has a problem.");
162 printf("Returned error value of dgetri is %d. Start a new run. \n", info);
163 //goto START_NEW_RUN;
164 exit(0);
165 }
167 secs_lud_inv += secs;
168 secs_all_runs[i*num_run+j] = secs_lud_inv;
169 printf("Matrix inversion finished. Time spent on inversion: %f seconds.\n", secs);
170 printf("Time spent on LUD and inversion: %f seconds.\n", secs_lud_inv);
172 /* Print solution */
173 print_real_matrix( "Inverse of matrix A through LU factorization:", matrix_LUD, n, n, lda);
175 /* Compute A*inv(A) and precision error: I-A*inv(A) */
176 max_abs_error = compute_inverse_error(matrix_A, matrix_LUD, n, lda);
177 printf("Maximum relative error of matrix inversion by LUD: %e\n", max_abs_error);
179 j++;
181 START_NEW_RUN:
182 printf("Current run finished. \n");
183 } /* while(j<num_run) */
185 __free_ddr((void *)matrix_A);
186 __free_ddr((void *)matrix_LUD);
187 __free_ddr((void *)vec_int_pivot);
188 __free_ddr((void *)vec_int_work);
189 __free_ddr((void *)vec_dbr_diag);
190 __free_ddr((void *)vec_dbr_diagl);
191 __free_ddr((void *)vec_dbr_diagr);
192 } /* for(i=0, n=n_min; i<num_test; i++, n+=n_inc) */
194 print_performance(matrix_sizes, secs_all_runs, num_test, num_run);
196 exit( 0 );
197 }
200 /* Auxiliary routine: printing a vector of integers */
201 void print_int_vector( char* desc, int n, int* a )
202 {
203 int j;
205 if (!print_data)
206 return;
208 printf( "\n %s\n", desc );
209 for( j = 0; j < n; j++ ) printf( " %6i", a[j] );
210 printf( "\n" );
211 }
213 /* Auxiliary routine: printing a real matrix */
214 void print_real_matrix(char* desc, double *mat, int m, int n, int ldv )
215 {
216 int i, j;
218 if (!print_data)
219 return;
221 printf( "\n %s\n", desc );
222 for(i=0; i<m; i++) {
223 for(j=0; j<n; j++) {
224 printf( " %10.5f ", mat[i+j*ldv]);
225 }
226 printf( "\n" );
227 }
228 }
231 double compute_inverse_error(double *matrix_A, double *matrix_A_inverse, int n, int ld)
232 {
233 int i, j;
234 double abs_err, relative_err;
235 double *matrix_A_mult_A_inv;
236 double max_err = 0.;
238 matrix_A_mult_A_inv = (double *)malloc(n*n*sizeof(double));
240 cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,n,n,n,1.0,matrix_A,ld,
241 matrix_A_inverse,ld,0.0,matrix_A_mult_A_inv,ld);
243 /* sqrt(sum(err^2))/N, sum(abs(err))/N, max(abs(err)) */
244 for(i=0; i<n; i++) {
245 for(j=0; j<n; j++) {
246 if(i==j) {
247 abs_err = abs(matrix_A_mult_A_inv[i+j*ld]-1.);
248 }
249 else {
250 abs_err = abs(matrix_A_mult_A_inv[i+j*ld]);
251 }
253 relative_err = abs_err / abs(matrix_A[i+j*ld]);
255 if(relative_err > max_err) {
256 max_err = relative_err;
257 }
258 }
259 }
261 free(matrix_A_mult_A_inv);
262 return max_err;
263 }
266 void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int num_run)
267 {
268 int i, j;
269 float inst, min, max;
270 float tot, avg;
272 printf("Seconds for all matrix sizes:\n");
273 printf("Matrix size ");
274 printf(" Min Max Average ");
275 printf("\n");
277 for(i=0; i<num_test; i++)
278 {
279 printf("%11d ", matrix_sizes[i]);
280 tot = 0.;
281 max = 0.;
282 min = 1e100;
283 for(j=0; j<num_run; j++)
284 {
285 inst = secs_runs[i*num_run+j];
286 tot += (double)inst;
288 if(inst>max) max = inst;
289 if(inst<min) min = inst;
290 }
291 avg = tot/(double)num_run;
292 printf("%12f%12f%12f\n", min, max, avg);
293 }
295 }