9dbbd2b99942c1b777e6b98341aaa5a9ef6542ca
1 /******************************************************************************
2 * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <time.h>
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 #include "cblas.h"
37 #ifdef __cplusplus
38 }
39 #endif
41 /*-----------------------------------------------------------------------------
42 * Timing Setup
43 *----------------------------------------------------------------------------*/
44 struct timespec t0,t1;
45 #define tick() clock_gettime(CLOCK_MONOTONIC, &t0);
46 #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
47 t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
49 /* LAPACK routines */
50 extern int dgetrf_(int *, int *, double *, int *, int *, int *);
51 extern int dgetri_(int *, double *, int *, int *, double *, int *, int *);
52 extern int dgeqrf_(int *, int *, double *, int *, double *, double *, int *, int *);
53 extern int dtrtri_(char *, char *, int *, double *, int *, int *);
55 extern int dlatmr_(int *, int *, char *, int *, char *,
56 double *, int *, double *, double *, char *, char *,
57 double *, int *, double *, double *, int *,
58 double *, char *, int *, int *, int *,
59 double *, double *, char *, double *, int *,
60 int *, int *);
61 extern int dlacpy_(char *, int *, int *, double *, int *, double *, int *);
63 /* Auxiliary routines prototypes */
64 void print_int_vector( char* desc, int n, int* a );
65 void print_real_matrix(char* desc, double *mat, int n, int m, int ldv );
66 double compute_inverse_error(double *matrix_A, double *matrix_A_inverse, int n, int ld);
67 void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int num_run);
69 /* Parameters */
70 #define MAX_ALLOWED_ERR (1e-20)
72 /* Main program */
73 int print_data;
74 int main(int argc, char *argv[]) {
75 /* Locals */
76 float secs, secs_lud_inv, *secs_all_runs;
77 int num_test, num_run, i, j;
78 int n, n_min, n_inc, info, lwork, lda;
79 double lwork_opt, max_abs_error;
80 double *work;
81 double *matrix_A, *matrix_LUD;
82 double *vec_dbr_diagr, *vec_dbr_diag, *vec_dbr_diagl;
83 int *vec_int_pivot, *vec_int_work;
84 int *matrix_sizes;
86 int const_int6 = 6;
87 double const_dbr1 = 1.;
88 double const_dbr0 = 0.;
89 int rand_seed[4] = {10,123,278,3579}; /* numbers must lie between 0 and 4095 */
91 fprintf(stdout, "\n\n===== LAPACK example: matrix inversion through LUD =====\n");
92 if(argc == 1) { /* no command line arguments, use default */
93 num_test = 3;
94 n_min = 1000;
95 n_inc = 1000;
96 num_run = 1;
97 print_data = 0;
98 }
99 else if (argc < 6) {
100 printf("Usage: ludinv <num_test> <start_size> <size_inc> <num_run per size> <print data or not (1 or 0)>\n");
101 exit(0);
102 }
103 else {
104 num_test = atoi(argv[1]);
105 n_min = atoi(argv[2]);
106 n_inc = atoi(argv[3]);
107 num_run = atoi(argv[4]);
108 print_data = atoi(argv[5]);
109 }
110 rand_seed[0] = 4675;
112 secs_all_runs = (float *)malloc(sizeof(float)*num_test*num_run);
113 matrix_sizes = (int*)malloc(sizeof(int)*num_test);
115 for(i=0, n=n_min; i<num_test; i++, n+=n_inc)
116 {
117 lda = n;
118 matrix_sizes[i] = n;
120 printf("\n\nStart matrix inversion by LUD for matrix size %d. \n", n);
122 /* Operation counts of LUD inversion according to LAPACK Working Note 41 */
123 /*operation_counts = 2.*n*n*n - 1.5*n*n + 2.5*n; */
125 /* Allocate memory for matrix and vectors */
126 matrix_A = (double *)__malloc_ddr(sizeof(double)*n*n);
127 matrix_LUD = (double *)__malloc_ddr(sizeof(double)*n*n);
128 vec_int_pivot = (int *)__malloc_ddr(sizeof(int)*n);
129 vec_int_work = (int *)__malloc_ddr(sizeof(int)*n);
130 vec_dbr_diag = (double *)__malloc_ddr(sizeof(double)*n);
131 vec_dbr_diagl = (double *)__malloc_ddr(sizeof(double)*n);
132 vec_dbr_diagr = (double *)__malloc_ddr(sizeof(double)*n);
134 j = 0;
135 while(j<num_run) {
136 /* Generate a N by N random matrix */
137 printf("\nStart test run %d out of %d. \n", j+1, num_run);
138 printf("Generate %d by %d random matrix. \n", n, n);
139 dlatmr_(&n, &n, "S", rand_seed, "N", vec_dbr_diag, &const_int6, &const_dbr1,
140 &const_dbr1, "T", "N", vec_dbr_diagl, &const_int6, &const_dbr1, vec_dbr_diagr,
141 &const_int6, &const_dbr1, "N", vec_int_pivot, &n, &n, &const_dbr0,
142 &const_dbr1, "N", matrix_A, &lda, vec_int_work, &info);
143 if(info != 0) {
144 printf("Random matrix generation has a problem. Returned error value is %d. Start a new run. \n", info);
145 //goto START_NEW_RUN;
146 exit(-1);
147 }
149 memcpy((void *)matrix_LUD, (void *)matrix_A, sizeof(double)*n*n);
151 print_real_matrix("Generated random matrix (double real): ", matrix_LUD, n, n, lda);
153 tick();
155 /* Computes LU factorization */
156 dgetrf_(&n, &n, matrix_LUD, &lda, vec_int_pivot, &info);
158 secs_lud_inv = tock();
160 if(info != 0) {
161 printf("LU factorization has a problem. Returned error value of dgetrf is %d. Start a new run. \n", info);
162 //goto START_NEW_RUN;
163 exit(-1);
164 }
166 printf("LU decomposition finished. Time spent on LUD: %f seconds.\n", secs_lud_inv);
168 /* Print details of LU factorization */
169 print_real_matrix( "Details of LU factorization", matrix_LUD, n, n, lda);
171 /* Print pivot indices */
172 print_int_vector( "Pivot indices", n, vec_int_pivot );
174 tick();
176 /* Query and allocate the optimal workspace */
177 lwork = -1;
178 dgetri_(&n, matrix_LUD, &lda, vec_int_pivot, &lwork_opt, &lwork, &info);
179 lwork = (int)lwork_opt;
180 work = (double *)__malloc_ddr( lwork*sizeof(double) );
182 /* Compute inversion of matrix based on LU factorization */
183 dgetri_(&n, matrix_LUD, &lda, vec_int_pivot, work, &lwork, &info);
185 secs = tock();
187 __free_ddr((void *)work);
188 if(info != 0) {
189 printf("Matrix inversion through LU factorization has a problem.");
190 printf("Returned error value of dgetri is %d. Start a new run. \n", info);
191 //goto START_NEW_RUN;
192 exit(-1);
193 }
195 secs_lud_inv += secs;
196 secs_all_runs[i*num_run+j] = secs_lud_inv;
197 printf("Matrix inversion finished. Time spent on inversion: %f seconds.\n", secs);
198 printf("Time spent on LUD and inversion: %f seconds.\n", secs_lud_inv);
200 /* Print solution */
201 print_real_matrix( "Inverse of matrix A through LU factorization:", matrix_LUD, n, n, lda);
203 /* Compute A*inv(A) and precision error: I-A*inv(A) */
204 max_abs_error = compute_inverse_error(matrix_A, matrix_LUD, n, lda);
205 printf("Maximum relative error of matrix inversion by LUD: %e\n", max_abs_error);
207 if(max_abs_error > MAX_ALLOWED_ERR) {
208 printf("Error is out of tolerance range.\n");
209 exit(-1);
210 }
212 j++;
214 START_NEW_RUN:
215 printf("Current run finished. \n");
216 } /* while(j<num_run) */
218 __free_ddr((void *)matrix_A);
219 __free_ddr((void *)matrix_LUD);
220 __free_ddr((void *)vec_int_pivot);
221 __free_ddr((void *)vec_int_work);
222 __free_ddr((void *)vec_dbr_diag);
223 __free_ddr((void *)vec_dbr_diagl);
224 __free_ddr((void *)vec_dbr_diagr);
225 } /* for(i=0, n=n_min; i<num_test; i++, n+=n_inc) */
227 print_performance(matrix_sizes, secs_all_runs, num_test, num_run);
229 printf("Passed.\n");
231 exit( 0 );
232 }
235 /* Auxiliary routine: printing a vector of integers */
236 void print_int_vector( char* desc, int n, int* a )
237 {
238 int j;
240 if (!print_data)
241 return;
243 printf( "\n %s\n", desc );
244 for( j = 0; j < n; j++ ) printf( " %6i", a[j] );
245 printf( "\n" );
246 }
248 /* Auxiliary routine: printing a real matrix */
249 void print_real_matrix(char* desc, double *mat, int m, int n, int ldv )
250 {
251 int i, j;
253 if (!print_data)
254 return;
256 printf( "\n %s\n", desc );
257 for(i=0; i<m; i++) {
258 for(j=0; j<n; j++) {
259 printf( " %10.5f ", mat[i+j*ldv]);
260 }
261 printf( "\n" );
262 }
263 }
266 double compute_inverse_error(double *matrix_A, double *matrix_A_inverse, int n, int ld)
267 {
268 int i, j;
269 double abs_err, relative_err;
270 double *matrix_A_mult_A_inv;
271 double max_err = 0.;
273 matrix_A_mult_A_inv = (double *)malloc(n*n*sizeof(double));
275 cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,n,n,n,1.0,matrix_A,ld,
276 matrix_A_inverse,ld,0.0,matrix_A_mult_A_inv,ld);
278 /* sqrt(sum(err^2))/N, sum(abs(err))/N, max(abs(err)) */
279 for(i=0; i<n; i++) {
280 for(j=0; j<n; j++) {
281 if(i==j) {
282 abs_err = abs(matrix_A_mult_A_inv[i+j*ld]-1.);
283 }
284 else {
285 abs_err = abs(matrix_A_mult_A_inv[i+j*ld]);
286 }
288 relative_err = abs_err / abs(matrix_A[i+j*ld]);
290 if(relative_err > max_err) {
291 max_err = relative_err;
292 }
293 }
294 }
296 free(matrix_A_mult_A_inv);
297 return max_err;
298 }
301 void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int num_run)
302 {
303 int i, j;
304 float inst, min, max;
305 float tot, avg;
307 printf("Seconds for all matrix sizes:\n");
308 printf("Matrix size ");
309 printf(" Min Max Average ");
310 printf("\n");
312 for(i=0; i<num_test; i++)
313 {
314 printf("%11d ", matrix_sizes[i]);
315 tot = 0.;
316 max = 0.;
317 min = 1e100;
318 for(j=0; j<num_run; j++)
319 {
320 inst = secs_runs[i*num_run+j];
321 tot += (double)inst;
323 if(inst>max) max = inst;
324 if(inst<min) min = inst;
325 }
326 avg = tot/(double)num_run;
327 printf("%12f%12f%12f\n", min, max, avg);
328 }
329 }