ti/dsplib/src/DSPF_dp_lud_inv_cmplx/c66/DSPF_dp_lud_inv_cmplx.c

   1 /* ======================================================================= */
   2 /* DSPF_dp_lud_inv_cmplx.c - complex matrix inversion by LUD double precision*/
   3 /*                        optimized C Implementation                       */
   4 /*                                                                         */
   5 /* Rev 1.0.0                                                               */
   6 /*                                                                         */
   7 /* Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/  */
   8 /*                                                                         */
   9 /*                                                                         */
  10 /*  Redistribution and use in source and binary forms, with or without     */
  11 /*  modification, are permitted provided that the following conditions     */
  12 /*  are met:                                                               */
  13 /*                                                                         */
  14 /*    Redistributions of source code must retain the above copyright       */
  15 /*    notice, this list of conditions and the following disclaimer.        */
  16 /*                                                                         */
  17 /*    Redistributions in binary form must reproduce the above copyright    */
  18 /*    notice, this list of conditions and the following disclaimer in the  */
  19 /*    documentation and/or other materials provided with the               */
  20 /*    distribution.                                                        */
  21 /*                                                                         */
  22 /*    Neither the name of Texas Instruments Incorporated nor the names of  */
  23 /*    its contributors may be used to endorse or promote products derived  */
  24 /*    from this software without specific prior written permission.        */
  25 /*                                                                         */
  26 /*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    */
  27 /*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      */
  28 /*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  */
  29 /*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT   */
  30 /*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,  */
  31 /*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT       */
  32 /*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  */
  33 /*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  */
  34 /*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT    */
  35 /*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  */
  36 /*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.   */
  37 /*                                                                         */
  38 /* ======================================================================= */\
  39 #include <math.h>
  40 #include <string.h>
  41
  42 #pragma CODE_SECTION(DSPF_dp_lud_inverse_cmplx,".text:optimized");
  43
  44 #include "DSPF_dp_lud_inv_cmplx.h"
  45
  46 #define ENABLE_NR 1
  47 #define ENABLE_MATHLIB 1
  48
  49 #ifdef ENABLE_MATHLIB
  50 #include <ti/mathlib/src/atan2dp/c66/atan2dp.h>
  51 #include <ti/mathlib/src/cosdp/c66/cosdp.h>
  52 #include <ti/mathlib/src/sindp/c66/sindp.h>
  53 #endif
  54
  55 static void complex_dp_div(double x_real,double x_imag,double y_real,double y_imag,
  56                            double *z_real,double *z_imag);
  57 static void complex_dp_inv(double y_real,double y_imag,double *z_real,double *z_imag);
  58
  59
  60 int DSPF_dp_lud_inverse_cmplx(const int Nrows,unsigned short *restrict P,double *restrict L,
  61                               double *restrict U,double *restrict inv_A) {
  62
  63   int row,col,Ncols,k;
  64   double xreal,ximag,yreal,yimag,zreal,zimag;
  65   double factor_real,factor_imag,sum_real,sum_imag;
  66   double *inv_L,*inv_U,*inv_U_x_inv_L;
  67
  68   _nassert(Nrows>0);
  69   _nassert((int)P % 2 == 0);
  70   _nassert((int)L % 8 == 0);
  71   _nassert((int)U % 8 == 0);
  72   _nassert((int)inv_A % 8 == 0);
  73
  74   /* set inv_A matrix to identity */
  75   inv_L=&inv_A[0];
  76   Ncols=2*Nrows;
  77   memset(inv_L,0.0,sizeof(double)*Nrows*Ncols);
  78 #pragma MUST_ITERATE(1,,)
  79   for (row=0;row<Nrows;row++) {
  80         inv_L[2*row  +row*Ncols]=1;
  81   }
  82
  83   /* use Gauss Jordan algorithm to invert L whose result is in inv_L */
  84 #pragma MUST_ITERATE(1,,)
  85   for (col=0;col<Nrows-1;col++) {
  86         yreal=L[2*col  +col*Ncols];
  87         yimag=L[2*col+1+col*Ncols];
  88         complex_dp_inv(yreal,yimag,&zreal,&zimag);
  89
  90 #pragma MUST_ITERATE(1,,)
  91     for (row=col+1;row<Nrows;row++) {
  92       xreal=L[2*col  +row*Ncols];
  93       ximag=L[2*col+1+row*Ncols];
  94           factor_real=xreal*zreal-ximag*zimag;
  95           factor_imag=xreal*zimag+ximag*zreal;
  96
  97 #pragma MUST_ITERATE(1,,)
  98           for (k=0;k<row;k++) {
  99                 xreal=factor_real;
 100                 ximag=factor_imag;
 101                 yreal=inv_L[2*k  +col*Ncols];
 102                 yimag=inv_L[2*k+1+col*Ncols];
 103                 inv_L[2*k  +row*Ncols]-=xreal*yreal-ximag*yimag;
 104                 inv_L[2*k+1+row*Ncols]-=xreal*yimag+ximag*yreal;
 105           }
 106         }
 107   }
 108
 109   /* set inv_U matrix to identity */
 110   inv_U=&L[0];
 111   memset(inv_U,0.0,sizeof(double)*Nrows*Ncols);
 112 #pragma MUST_ITERATE(1,,)
 113   for (row=0;row<Nrows;row++) {
 114         inv_U[2*row  +row*Ncols]=1;
 115   }
 116
 117   /* use Gauss Jordan algorithm to invert U whose result is in L */
 118 #pragma MUST_ITERATE(1,,)
 119   for (col=Nrows-1;col>=1;col--) {
 120 #pragma MUST_ITERATE(1,,)
 121         for (row=col-1;row>=0;row--) {
 122           xreal=U[2*col  +row*Ncols];
 123           ximag=U[2*col+1+row*Ncols];
 124           yreal=U[2*col  +col*Ncols];
 125           yimag=U[2*col+1+col*Ncols];
 126           complex_dp_div(xreal,ximag,yreal,yimag,&zreal,&zimag);
 127           factor_real=zreal;
 128           factor_imag=zimag;
 129
 130 #pragma MUST_ITERATE(1,,)
 131           for (k=col;k<Nrows;k++) {
 132                 xreal=factor_real;
 133                 ximag=factor_imag;
 134                 yreal=inv_U[2*k  +col*Ncols];
 135                 yimag=inv_U[2*k+1+col*Ncols];
 136                 complex_dp_div(xreal,ximag,yreal,yimag,&zreal,&zimag);
 137                 inv_U[2*k  +row*Ncols]-=xreal*yreal-ximag*yimag;
 138                 inv_U[2*k+1+row*Ncols]-=xreal*yimag+ximag*yreal;
 139                 xreal=factor_real;
 140                 ximag=factor_imag;
 141                 yreal=U[2*k  +col*Ncols];
 142                 yimag=U[2*k+1+col*Ncols];
 143                 U[2*k  +row*Ncols]-=xreal*yreal-ximag*yimag;
 144                 U[2*k+1+row*Ncols]-=xreal*yimag+ximag*yreal;
 145           }
 146         }
 147   }
 148
 149   /* scale U & L to get identity matrix in U */
 150 #pragma MUST_ITERATE(1,,)
 151   for (row=Nrows-1;row>=0;row--) {
 152         yreal=U[2*row  +row*Ncols];
 153         yimag=U[2*row+1+row*Ncols];
 154         complex_dp_inv(yreal,yimag,&zreal,&zimag);
 155         factor_real=zreal;
 156         factor_imag=zimag;
 157
 158 #pragma MUST_ITERATE(1,,)
 159         for (col=0;col<Nrows;col++) {
 160           xreal=L[2*col  +row*Ncols];
 161           ximag=L[2*col+1+row*Ncols];
 162           L[2*col  +row*Ncols]=xreal*zreal-ximag*zimag;
 163           L[2*col+1+row*Ncols]=xreal*zimag+ximag*zreal;
 164         }
 165   }
 166
 167   /* compute inv_U_x_inv_L=inv(U)*inv(L) */
 168   inv_U_x_inv_L=&L[0];
 169 #pragma MUST_ITERATE(1,,)
 170   for (row=0;row<Nrows;row++) {
 171 #pragma MUST_ITERATE(1,,)
 172     for (col=0;col<Nrows;col++) {
 173       sum_real=0;
 174       sum_imag=0;
 175 #pragma MUST_ITERATE(1,,)
 176           for (k=0;k<Nrows;k++) {
 177             xreal=inv_U[2*k  +row*Ncols];
 178             ximag=inv_U[2*k+1+row*Ncols];
 179             yreal=inv_L[2*col  +k*Ncols];
 180             yimag=inv_L[2*col+1+k*Ncols];
 181             sum_real+=xreal*yreal-ximag*yimag;
 182             sum_imag+=xreal*yimag+ximag*yreal;
 183           }
 184       inv_U_x_inv_L[2*col  +row*Ncols]=sum_real;
 185       inv_U_x_inv_L[2*col+1+row*Ncols]=sum_imag;
 186     }
 187   }
 188
 189   /* compute inv_A=inv(U)*inv(L)*P */
 190 #pragma MUST_ITERATE(1,,)
 191   for (row=0;row<Nrows;row++) {
 192 #pragma MUST_ITERATE(1,,)
 193     for (col=0;col<Nrows;col++) {
 194       sum_real=0;
 195       sum_imag=0;
 196 #pragma MUST_ITERATE(1,,)
 197         for (k=0;k<Nrows;k++) {
 198       xreal=inv_U_x_inv_L[2*k  +row*Ncols];
 199       ximag=inv_U_x_inv_L[2*k+1+row*Ncols];
 200       yreal=P[col+k*Nrows];
 201       sum_real+=xreal*yreal;
 202       sum_imag+=ximag*yreal;
 203         }
 204     inv_A[2*col  +row*Ncols]=sum_real;
 205     inv_A[2*col+1+row*Ncols]=sum_imag;
 206     }
 207   }
 208
 209   return 0;
 210 }
 211
 212 static void complex_dp_div(double x_real,double x_imag,double y_real,double y_imag,double *z_real,double *z_imag) {
 213
 214   double inv_mag_sq;
 215 #ifdef ENABLE_NR
 216   double x,y;
 217 #endif
 218
 219 #ifndef ENABLE_NR
 220   inv_mag_sq=1/(y_real*y_real+y_imag*y_imag);
 221 #else
 222   x=y_real*y_real+y_imag*y_imag;
 223   y=_rcpdp(x);
 224   y=y*(2.0-x*y);
 225   y=y*(2.0-x*y);
 226   y=y*(2.0-x*y);
 227   inv_mag_sq=y;
 228 #endif
 229   *z_real=(x_real*y_real+x_imag*y_imag)*inv_mag_sq;
 230   *z_imag=(x_imag*y_real-x_real*y_imag)*inv_mag_sq;
 231
 232 }
 233
 234 static void complex_dp_inv(double y_real,double y_imag,double *z_real,double *z_imag) {
 235
 236   double inv_mag_sq;
 237 #ifdef ENABLE_NR
 238   double x,y;
 239 #endif
 240
 241 #ifndef ENABLE_NR
 242   inv_mag_sq=1/(y_real*y_real+y_imag*y_imag);
 243 #else
 244   x=y_real*y_real+y_imag*y_imag;
 245   y=_rcpdp(x);
 246   y=y*(2.0-x*y);
 247   y=y*(2.0-x*y);
 248   y=y*(2.0-x*y);
 249   inv_mag_sq=y;
 250 #endif
 251   *z_real= y_real*inv_mag_sq;
 252   *z_imag=-y_imag*inv_mag_sq;
 253
 254 }
 255
 256 /* ======================================================================= */
 257 /*  End of file:  DSPF_dp_lud_inv_cmplx.c                                  */
 258 /* ----------------------------------------------------------------------- */
 259 /*            Copyright (c) 2013 Texas Instruments, Incorporated.          */
 260 /*                           All Rights Reserved.                          */
 261 /* ======================================================================= */