examples/ludinv/main.c

   1 /******************************************************************************
   2  * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
   3  *   All rights reserved.
   4  *
   5  *   Redistribution and use in source and binary forms, with or without
   6  *   modification, are permitted provided that the following conditions are met:
   7  *       * Redistributions of source code must retain the above copyright
   8  *         notice, this list of conditions and the following disclaimer.
   9  *       * Redistributions in binary form must reproduce the above copyright
  10  *         notice, this list of conditions and the following disclaimer in the
  11  *         documentation and/or other materials provided with the distribution.
  12  *       * Neither the name of Texas Instruments Incorporated nor the
  13  *         names of its contributors may be used to endorse or promote products
  14  *         derived from this software without specific prior written permission.
  15  *
  16  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17  *   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  *   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  *   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  20  *   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  21  *   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  22  *   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  23  *   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  24  *   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  25  *   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  26  *   THE POSSIBILITY OF SUCH DAMAGE.
  27  *****************************************************************************/
  28
  29 #include <stdlib.h>
  30 #include <stdio.h>
  31 #include <string.h>
  32 #include <time.h>
  33 #ifdef __cplusplus
  34 extern "C" {
  35 #endif
  36 #include "cblas.h"
  37 #ifdef __cplusplus
  38 }
  39 #endif
  40
  41 /*-----------------------------------------------------------------------------
  42 * Timing Setup
  43 *----------------------------------------------------------------------------*/
  44 struct timespec t0,t1;
  45 #define tick()  clock_gettime(CLOCK_MONOTONIC, &t0);
  46 #define tock() (clock_gettime(CLOCK_MONOTONIC, &t1), \
  47                         t1.tv_sec - t0.tv_sec + (t1.tv_nsec - t0.tv_nsec) / 1e9)
  48
  49 /* LAPACK routines */
  50 extern int dgetrf_(int *, int *, double *, int *, int *, int *);
  51 extern int dgetri_(int *, double *, int *, int *, double *, int *, int *);
  52 extern int dgeqrf_(int *, int *, double *, int *, double *, double *, int *, int *);
  53 extern int dtrtri_(char *, char *, int *, double *, int *, int *);
  54
  55 extern int dlatmr_(int *, int *, char *, int *, char *,
  56                    double *, int *, double *, double *, char *, char *,
  57                    double *, int *, double *, double *, int *,
  58                    double *, char *, int *, int *, int *,
  59                    double *, double *, char *, double *, int *,
  60                    int *, int *);
  61 extern int dlacpy_(char *, int *, int *, double *, int *, double *, int *);
  62
  63 /* Auxiliary routines prototypes */
  64 void print_int_vector( char* desc, int n, int* a );
  65 void print_real_matrix(char* desc, double *mat, int n, int m, int ldv );
  66 double compute_inverse_error(double *matrix_A, double *matrix_A_inverse, int n, int ld);
  67 void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int num_run);
  68
  69 /* Parameters */
  70 #define MAX_ALLOWED_ERR (1e-20)
  71
  72 /* Main program */
  73 int print_data;
  74 int main(int argc, char *argv[]) {
  75     /* Locals */
  76     float secs, secs_lud_inv, *secs_all_runs;
  77     int num_test, num_run, i, j;
  78     int n, n_min, n_inc, info, lwork, lda;
  79     double lwork_opt, max_abs_error;
  80     double *work;
  81     double *matrix_A, *matrix_LUD;
  82     double *vec_dbr_diagr, *vec_dbr_diag, *vec_dbr_diagl;
  83     int    *vec_int_pivot, *vec_int_work;
  84     int    *matrix_sizes;
  85
  86     int const_int6    = 6;
  87     double const_dbr1 = 1.;
  88     double const_dbr0 = 0.;
  89     int rand_seed[4] = {10,123,278,3579};  /* numbers must lie between 0 and 4095 */
  90
  91     fprintf(stdout, "\n\n===== LAPACK example: matrix inversion through LUD =====\n");
  92     if(argc == 1) { /* no command line arguments, use default */
  93         num_test = 3;
  94         n_min    = 1000;
  95         n_inc    = 1000;
  96         num_run  = 1;
  97         print_data = 0;
  98     }
  99     else if (argc < 6) {
 100         printf("Usage: ludinv <num_test> <start_size> <size_inc> <num_run per size> <print data or not (1 or 0)>\n");
 101         exit(0);
 102     }
 103     else {
 104         num_test = atoi(argv[1]);
 105         n_min    = atoi(argv[2]);
 106         n_inc    = atoi(argv[3]);
 107         num_run  = atoi(argv[4]);
 108         print_data = atoi(argv[5]);
 109     }
 110     rand_seed[0] = 4675;
 111
 112     secs_all_runs = (float *)malloc(sizeof(float)*num_test*num_run);
 113     matrix_sizes  = (int*)malloc(sizeof(int)*num_test);
 114
 115     for(i=0, n=n_min; i<num_test; i++, n+=n_inc)
 116     {
 117         lda = n;
 118         matrix_sizes[i] = n;
 119
 120         printf("\n\nStart matrix inversion by LUD for matrix size %d. \n", n);
 121
 122         /* Operation counts of LUD inversion according to LAPACK Working Note 41 */
 123         /*operation_counts = 2.*n*n*n - 1.5*n*n + 2.5*n;  */
 124
 125         /* Allocate memory for matrix and vectors */
 126         matrix_A      = (double *)__malloc_ddr(sizeof(double)*n*n);
 127         matrix_LUD    = (double *)__malloc_ddr(sizeof(double)*n*n);
 128         vec_int_pivot = (int    *)__malloc_ddr(sizeof(int)*n);
 129         vec_int_work  = (int    *)__malloc_ddr(sizeof(int)*n);
 130         vec_dbr_diag  = (double *)__malloc_ddr(sizeof(double)*n);
 131         vec_dbr_diagl = (double *)__malloc_ddr(sizeof(double)*n);
 132         vec_dbr_diagr = (double *)__malloc_ddr(sizeof(double)*n);
 133
 134         j = 0;
 135         while(j<num_run) {
 136             /* Generate a N by N random matrix */
 137             printf("\nStart test run %d out of %d. \n", j+1, num_run);
 138             printf("Generate %d by %d random matrix. \n", n, n);
 139             dlatmr_(&n, &n, "S", rand_seed, "N", vec_dbr_diag, &const_int6, &const_dbr1,
 140                     &const_dbr1, "T", "N", vec_dbr_diagl, &const_int6, &const_dbr1, vec_dbr_diagr,
 141                     &const_int6, &const_dbr1, "N", vec_int_pivot, &n, &n, &const_dbr0,
 142                     &const_dbr1, "N", matrix_A, &lda, vec_int_work, &info);
 143             if(info != 0) {
 144                 printf("Random matrix generation has a problem. Returned error value is %d. Start a new run. \n", info);
 145                 //goto START_NEW_RUN;
 146                 exit(-1);
 147             }
 148
 149             memcpy((void *)matrix_LUD, (void *)matrix_A, sizeof(double)*n*n);
 150
 151             print_real_matrix("Generated random matrix (double real): ", matrix_LUD, n, n, lda);
 152
 153             tick();
 154
 155             /* Computes LU factorization */
 156             dgetrf_(&n, &n, matrix_LUD, &lda, vec_int_pivot, &info);
 157
 158             secs_lud_inv = tock();
 159
 160             if(info != 0) {
 161                 printf("LU factorization has a problem. Returned error value of dgetrf is %d. Start a new run. \n", info);
 162                 //goto START_NEW_RUN;
 163                 exit(-1);
 164             }
 165
 166             printf("LU decomposition finished. Time spent on LUD: %f seconds.\n", secs_lud_inv);
 167
 168             /* Print details of LU factorization */
 169             print_real_matrix( "Details of LU factorization", matrix_LUD, n, n, lda);
 170
 171             /* Print pivot indices */
 172             print_int_vector( "Pivot indices", n, vec_int_pivot );
 173
 174             tick();
 175
 176             /* Query and allocate the optimal workspace */
 177             lwork = -1;
 178             dgetri_(&n, matrix_LUD, &lda, vec_int_pivot, &lwork_opt, &lwork, &info);
 179             lwork = (int)lwork_opt;
 180             work = (double *)__malloc_ddr( lwork*sizeof(double) );
 181
 182             /* Compute inversion of matrix based on LU factorization */
 183             dgetri_(&n, matrix_LUD, &lda, vec_int_pivot, work, &lwork, &info);
 184
 185             secs = tock();
 186
 187             __free_ddr((void *)work);
 188             if(info != 0) {
 189                 printf("Matrix inversion through LU factorization has a problem.");
 190                 printf("Returned error value of dgetri is %d. Start a new run. \n", info);
 191                 //goto START_NEW_RUN;
 192                 exit(-1);
 193             }
 194
 195             secs_lud_inv += secs;
 196             secs_all_runs[i*num_run+j] = secs_lud_inv;
 197             printf("Matrix inversion finished. Time spent on inversion: %f seconds.\n", secs);
 198             printf("Time spent on LUD and inversion: %f seconds.\n", secs_lud_inv);
 199
 200             /* Print solution */
 201             print_real_matrix( "Inverse of matrix A through LU factorization:", matrix_LUD, n, n, lda);
 202
 203             /* Compute A*inv(A) and precision error: I-A*inv(A) */
 204             max_abs_error = compute_inverse_error(matrix_A, matrix_LUD, n, lda);
 205             printf("Maximum relative error of matrix inversion by LUD: %e\n", max_abs_error);
 206
 207             if(max_abs_error > MAX_ALLOWED_ERR) {
 208                 printf("Error is out of tolerance range.\n");
 209                 exit(-1);
 210             }
 211
 212             j++;
 213
 214 START_NEW_RUN:
 215             printf("Current run finished. \n");
 216         } /* while(j<num_run) */
 217
 218         __free_ddr((void *)matrix_A);
 219         __free_ddr((void *)matrix_LUD);
 220         __free_ddr((void *)vec_int_pivot);
 221         __free_ddr((void *)vec_int_work);
 222         __free_ddr((void *)vec_dbr_diag);
 223         __free_ddr((void *)vec_dbr_diagl);
 224         __free_ddr((void *)vec_dbr_diagr);
 225     }  /* for(i=0, n=n_min; i<num_test; i++, n+=n_inc) */
 226
 227     print_performance(matrix_sizes, secs_all_runs, num_test, num_run);
 228
 229     printf("Passed.\n");
 230
 231     exit( 0 );
 232 }
 233
 234
 235 /* Auxiliary routine: printing a vector of integers */
 236 void print_int_vector( char* desc, int n, int* a )
 237 {
 238    int j;
 239
 240    if (!print_data)
 241      return;
 242
 243    printf( "\n %s\n", desc );
 244    for( j = 0; j < n; j++ ) printf( " %6i", a[j] );
 245    printf( "\n" );
 246 }
 247
 248 /* Auxiliary routine: printing a real matrix */
 249 void print_real_matrix(char* desc, double *mat, int m, int n, int ldv )
 250 {
 251    int i, j;
 252
 253    if (!print_data)
 254      return;
 255
 256    printf( "\n %s\n", desc );
 257    for(i=0; i<m; i++) {
 258       for(j=0; j<n; j++) {
 259          printf( " %10.5f ", mat[i+j*ldv]);
 260       }
 261       printf( "\n" );
 262   }
 263 }
 264
 265
 266 double compute_inverse_error(double *matrix_A, double *matrix_A_inverse, int n, int ld)
 267 {
 268    int i, j;
 269    double abs_err, relative_err;
 270    double *matrix_A_mult_A_inv;
 271    double max_err = 0.;
 272
 273    matrix_A_mult_A_inv = (double *)malloc(n*n*sizeof(double));
 274
 275    cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,n,n,n,1.0,matrix_A,ld,
 276                matrix_A_inverse,ld,0.0,matrix_A_mult_A_inv,ld);
 277
 278    /* sqrt(sum(err^2))/N, sum(abs(err))/N, max(abs(err))  */
 279    for(i=0; i<n; i++) {
 280       for(j=0; j<n; j++) {
 281         if(i==j) {
 282           abs_err = abs(matrix_A_mult_A_inv[i+j*ld]-1.);
 283         }
 284         else {
 285           abs_err = abs(matrix_A_mult_A_inv[i+j*ld]);
 286         }
 287
 288         relative_err = abs_err / abs(matrix_A[i+j*ld]);
 289
 290         if(relative_err > max_err) {
 291           max_err = relative_err;
 292         }
 293       }
 294    }
 295
 296    free(matrix_A_mult_A_inv);
 297    return max_err;
 298 }
 299
 300
 301 void print_performance(int *matrix_sizes, float *secs_runs, int num_test, int num_run)
 302 {
 303     int i, j;
 304     float inst, min, max;
 305     float tot, avg;
 306
 307     printf("Seconds for all matrix sizes:\n");
 308     printf("Matrix size   ");
 309     printf("    Min           Max        Average  ");
 310     printf("\n");
 311
 312     for(i=0; i<num_test; i++)
 313     {
 314         printf("%11d ", matrix_sizes[i]);
 315         tot = 0.;
 316         max = 0.;
 317         min = 1e100;
 318         for(j=0; j<num_run; j++)
 319           {
 320             inst = secs_runs[i*num_run+j];
 321             tot += (double)inst;
 322
 323             if(inst>max) max = inst;
 324             if(inst<min) min = inst;
 325           }
 326         avg = tot/(double)num_run;
 327         printf("%12f%12f%12f\n", min, max, avg);
 328     }
 329 }