[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSPF_sp_lud_cmplx / c66 / DSPF_sp_lud_cmplx.c
1 /* ======================================================================= */
2 /* DSPF_sp_lud_cmplx.c -- lower/upper complex matrix decomposition */
3 /* Optimized C Implementation */
4 /* */
5 /* Rev 1.0.0 */
6 /* */
7 /* Copyright (C) 2013 Texas Instruments Incorporated - http://www.ti.com/ */
8 /* */
9 /* */
10 /* Redistribution and use in source and binary forms, with or without */
11 /* modification, are permitted provided that the following conditions */
12 /* are met: */
13 /* */
14 /* Redistributions of source code must retain the above copyright */
15 /* notice, this list of conditions and the following disclaimer. */
16 /* */
17 /* Redistributions in binary form must reproduce the above copyright */
18 /* notice, this list of conditions and the following disclaimer in the */
19 /* documentation and/or other materials provided with the */
20 /* distribution. */
21 /* */
22 /* Neither the name of Texas Instruments Incorporated nor the names of */
23 /* its contributors may be used to endorse or promote products derived */
24 /* from this software without specific prior written permission. */
25 /* */
26 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
27 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
28 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */
29 /* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
30 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
31 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT */
32 /* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
33 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY */
34 /* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
35 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE */
36 /* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
37 /* */
38 /* ======================================================================= */\
39 #include <math.h>
40 #include <string.h>
41 #include <c6x.h>
43 #pragma CODE_SECTION(DSPF_sp_lud_cmplx,".text:optimized");
45 #include "DSPF_sp_lud_cmplx.h"
47 #define ENABLE_NR 1
48 #define ENABLE_MATHLIB 1
50 #ifdef ENABLE_MATHLIB
51 #include <ti/mathlib/src/atan2sp/c66/atan2sp.h>
52 #include <ti/mathlib/src/cossp/c66/cossp.h>
53 #include <ti/mathlib/src/sinsp/c66/sinsp.h>
54 #endif
56 static void complex_sp_inv(__float2_t cy,__float2_t *cz);
58 int DSPF_sp_lud_cmplx(const int Nrows, float *restrict A, float *restrict L,float *restrict U,unsigned short *restrict P)
59 {
60 int row,col,Ncols;
61 int min_row,max_row,k,temp;
62 float mag,min,max;
63 __float2_t ctemp,cy,cz;
64 __float2_t *AA,*LL,*UU;
65 int addr;
67 _nassert(Nrows>0);
68 _nassert((int)A % 8 == 0);
69 _nassert((int)L % 8 == 0);
70 _nassert((int)U % 8 == 0);
71 _nassert((int)P % 2 == 0);
73 /* initialize __float2_t arrays to overlay input float arrays */
74 addr=(int)(&A[0]);
75 AA=(__float2_t *)(addr);
76 addr=(int)(&L[0]);
77 LL=(__float2_t *)(addr);
78 addr=(int)(&U[0]);
79 UU=(__float2_t *)(addr);
81 /* ------------------------------------------------------------------- */
82 /* generate identify matrix */
83 /* ------------------------------------------------------------------- */
84 memset(P,0,sizeof(unsigned short)*Nrows*Nrows);
85 #pragma MUST_ITERATE(1,,)
86 for (row=0;row<Nrows;row++) {
87 P[row+row*Nrows]=1;
88 }
90 /* ------------------------------------------------------------------- */
91 /* LU decomposition */
92 /* ------------------------------------------------------------------- */
93 Ncols=Nrows;
94 memcpy(UU,AA,sizeof(__float2_t)*Nrows*Ncols);
95 #pragma MUST_ITERATE(1,,)
96 for (k=0;k<Nrows-1;k++) {
98 /* find min and max entries in column */
99 max=0.0;
100 min=3.4e+38;
101 #pragma MUST_ITERATE(1,,)
102 for (row=k;row<Nrows;row++) {
103 ctemp= _dmpysp(UU[k+row*Ncols],UU[k+row*Ncols]);
104 mag=sqrt(_hif2(ctemp)+_lof2(ctemp));
107 if (mag>max) {
108 max=mag;
109 max_row=row;
110 }
111 if (mag<min) {
112 min=mag;
113 min_row=row;
114 }
115 }
117 /* swap rows if necessary */
118 if (k!=max_row) {
119 #pragma MUST_ITERATE(1,,)
120 for (col=0;col<Nrows;col++) {
121 ctemp=UU[col+min_row*Ncols];
122 UU[col+min_row*Ncols]=UU[col+max_row*Ncols];
123 UU[col+max_row*Ncols]=ctemp;
124 temp=P[col+min_row*Nrows];
125 P[col+min_row*Nrows]=P[col+max_row*Nrows];
126 P[col+max_row*Nrows]=temp;
127 }
128 }
130 /* generate U matrix */
131 cy=UU[k+k*Ncols];
132 complex_sp_inv(cy,&cz);
134 #pragma MUST_ITERATE(1,,)
135 for (row=k+1;row<Nrows;row++) {
136 #ifndef _LITTLE_ENDIAN
137 UU[k+row*Ncols]=_complex_mpysp(UU[k+row*Ncols],cz);
138 #else
139 ctemp=_complex_mpysp(UU[k+row*Ncols],cz);
140 UU[k+row*Ncols]=_ftof2(_lof2(ctemp),-_hif2(ctemp));
141 #endif
142 }
143 #pragma MUST_ITERATE(1,,)
144 for (row=k+1;row<Nrows;row++) {
145 #pragma MUST_ITERATE(1,,)
146 for (col=k+1;col<Nrows;col++) {
147 #ifndef _LITTLE_ENDIAN
148 UU[col+row*Ncols]=_dsubsp(UU[col+row*Ncols],_complex_mpysp(UU[k+row*Ncols],UU[col+k*Ncols]));
149 #else
150 ctemp=_complex_mpysp(UU[k+row*Ncols],UU[col+k*Ncols]);
151 UU[col+row*Ncols]=_dsubsp(UU[col+row*Ncols],_ftof2(_lof2(ctemp),-_hif2(ctemp)));
152 #endif
153 }
154 }
155 }
157 /* extract lower triangular entries from L into U and set L lower entries to zero */
158 #pragma MUST_ITERATE(1,,)
159 for (row=0;row<Nrows;row++) {
160 #pragma MUST_ITERATE(1,,)
161 for (col=0;col<Nrows;col++) {
162 if (row<col) {
163 LL[col+row*Ncols]=_ftof2(0,0);
164 } else {
165 if (row==col) {
166 #ifndef _LITTLE_ENDIAN
167 LL[col+row*Ncols]=_ftof2(1,0);
168 #else
169 LL[col+row*Ncols]=_ftof2(0,1);
170 #endif
171 } else {
172 LL[col+row*Ncols]=UU[col+row*Ncols];
173 UU[col+row*Ncols]=_ftof2(0,0);
174 }
175 }
176 }
177 }
179 return 0;
180 }
184 static void complex_sp_inv(__float2_t cy,__float2_t *cz) {
186 float y_real,y_imag,z_real,z_imag;
187 float inv_mag_sq;
188 #ifdef ENABLE_NR
189 float x,y;
190 #endif
192 #ifndef _LITTLE_ENDIAN
193 y_real=_hif2(cy);
194 y_imag=_lof2(cy);
195 #else
196 y_real=_lof2(cy);
197 y_imag=_hif2(cy);
198 #endif
199 #ifndef ENABLE_NR
200 inv_mag_sq=1/(y_real*y_real+y_imag*y_imag);
201 #else
202 x=y_real*y_real+y_imag*y_imag;
203 y=_rcpsp(x);
204 y=y*(2.0-x*y);
205 y=y*(2.0-x*y);
206 inv_mag_sq=y;
207 #endif
209 /* results */
210 z_real= y_real*inv_mag_sq;
211 z_imag=-y_imag*inv_mag_sq;
212 #ifndef _LITTLE_ENDIAN
213 *cz=_ftof2(z_real,z_imag);
214 #else
215 *cz=_ftof2(z_imag,z_real);
216 #endif
218 }
219 /* ======================================================================= */
220 /* End of file: DSPF_sp_lud_cmplx */
221 /* ----------------------------------------------------------------------- */
222 /* Copyright (c) 2013 Texas Instruments, Incorporated. */
223 /* All Rights Reserved. */
224 /* ======================================================================= */