[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSP_ifft16x16_imre / c66 / DSP_ifft16x16_imre_cn.c
1 /* ======================================================================= */
2 /* TEXAS INSTRUMENTS, INC. */
3 /* */
4 /* NAME */
5 /* DSP_ifft16x16_imre_cn -- ifft16x16_imre */
6 /* */
7 /* USAGE */
8 /* */
9 /* This routine is C-callable and can be called as: */
10 /* */
11 /* void DSP_ifft16x16_imre_cn ( */
12 /* short * w, */
13 /* int nx, */
14 /* short * x, */
15 /* short * y */
16 /* ) */
17 /* */
18 /* w[2*nx]: Pointer to vector of Q.15 FFT coefficients of size */
19 /* 2*nx elements. */
20 /* */
21 /* nx: Number of complex elements in vector x. */
22 /* */
23 /* x[2*nx]: Pointer to input vector of size 2*nx elements. */
24 /* */
25 /* y[2*nx]: Pointer to output vector of size 2*nx elements. */
26 /* */
27 /* */
28 /* DESCRIPTION */
29 /* */
30 /* This code performs a Radix-4 IFFT with digit reversal. The code */
31 /* uses a special ordering of twiddle factors and memory accesses */
32 /* to improve performance in the presence of cache. It operates */
33 /* largely in-place, but the final digit-reversed output is written */
34 /* out-of-place. */
35 /* */
36 /* input to this routine is in Imaginary/Real order instead of the */
37 /* traditional Real/Imaginary order. */
38 /* */
39 /* This code requires a special sequence of twiddle factors stored */
40 /* in Q1.15 fixed-point format. The following C code illustrates */
41 /* one way to generate the desired twiddle-factor array: */
42 /* */
43 /* int gen_twiddle_ifft16x16_imre(short *w, int n) */
44 /* */
45 /* int i, j, k; */
46 /* double M = 32767.5; */
47 /* */
48 /* for (j = 1, k = 0; j < n >> 2; j = j << 2) */
49 /* { */
50 /* for (i = 0; i < n >> 2; i += j << 1) */
51 /* { */
52 /* w[k + 3] = d2s(M * cos(2.0 * PI * (i + j) / n)); */
53 /* w[k + 2] = d2s(M * sin(2.0 * PI * (i + j) / n)); */
54 /* w[k + 1] = d2s(M * cos(2.0 * PI * (i ) / n)); */
55 /* w[k + 0] = d2s(M * sin(2.0 * PI * (i ) / n)); */
56 /* */
57 /* k += 4; */
58 /* } */
59 /* } */
60 /* return k; */
61 /* */
62 /* The final stage is optimised to remove the multiplication as */
63 /* w0 = 1. This stage also performs digit reversal on the data, */
64 /* so the final output is in natural order. */
65 /* */
66 /* The fft() code shown here performs the bulk of the computation */
67 /* in place. However, because digit-reversal cannot be performed */
68 /* in-place, the final result is written to a separate array, y[]. */
69 /* */
70 /* */
71 /* ASSUMPTIONS */
72 /* */
73 /* The size of the IFFT, n, must be a power of 4 and greater than */
74 /* or equal to 16 and less than 32768. */
75 /* */
76 /* The arrays 'x[]', 'y[]', and 'w[]' all must be aligned on a */
77 /* double-word boundary for the "optimized" implementations. */
78 /* */
79 /* The input and output data are complex, with the real/imaginary */
80 /* components stored in adjacent locations in the array. The real */
81 /* components are stored at even array indices, and the imaginary */
82 /* components are stored at odd array indices. */
83 /* */
84 /* Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ */
85 /* */
86 /* */
87 /* Redistribution and use in source and binary forms, with or without */
88 /* modification, are permitted provided that the following conditions */
89 /* are met: */
90 /* */
91 /* Redistributions of source code must retain the above copyright */
92 /* notice, this list of conditions and the following disclaimer. */
93 /* */
94 /* Redistributions in binary form must reproduce the above copyright */
95 /* notice, this list of conditions and the following disclaimer in the */
96 /* documentation and/or other materials provided with the */
97 /* distribution. */
98 /* */
99 /* Neither the name of Texas Instruments Incorporated nor the names of */
100 /* its contributors may be used to endorse or promote products derived */
101 /* from this software without specific prior written permission. */
102 /* */
103 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
104 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
105 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */
106 /* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
107 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
108 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT */
109 /* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
110 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY */
111 /* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
112 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE */
113 /* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
114 /* */
115 /* ======================================================================= */
117 #pragma CODE_SECTION(DSP_ifft16x16_imre_cn, ".text:ansi");
119 #include "DSP_ifft16x16_imre_cn.h"
121 /*--------------------------------------------------------------------------*/
122 /* The following macro is used to obtain a digit reversed index, of a given */
123 /* number i, into j where the number of bits in "i" is "m". For the natural */
124 /* form of C code, this is done by first interchanging every set of "2 bit" */
125 /* pairs, followed by exchanging nibbles, followed by exchanging bytes, and */
126 /* finally halfwords. To give an example, condider the following number: */
127 /* */
128 /* N = FEDCBA9876543210, where each digit represents a bit, the following */
129 /* steps illustrate the changes as the exchanges are performed: */
130 /* M = DCFE98BA54761032 is the number after every "2 bits" are exchanged. */
131 /* O = 98BADCFE10325476 is the number after every nibble is exchanged. */
132 /* P = 1032547698BADCFE is the number after every byte is exchanged. */
133 /* Since only 16 digits were considered this represents the digit reversed */
134 /* index. Since the numbers are represented as 32 bits, there is one more */
135 /* step typically of exchanging the half words as well. */
136 /*--------------------------------------------------------------------------*/
137 #define DIG_REV(i, m, j) \
138 do { \
139 unsigned _ = (i); \
140 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
141 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
142 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
143 _ = ((_ & 0x0000FFFF) << 16) | ((_ & ~0x0000FFFF) >> 16); \
144 (j) = _ >> (m); \
145 } while (0)
148 void DSP_ifft16x16_imre_cn (
149 const short * ptr_w,
150 int npoints,
151 short * ptr_x,
152 short * ptr_y
153 )
154 {
155 const short *w;
156 short *x, *x2, *x0;
157 short * y0, * y1, * y2, *y3;
159 short xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
160 short xt0_1, yt0_1, xt1_1, yt1_1, xt2_1, yt2_1;
161 short xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
162 short xh0_1, xh1_1, xh20_1, xh21_1, xl0_1, xl1_1, xl20_1, xl21_1;
163 short x_0, x_1, x_2, x_3, x_l1_0, x_l1_1, x_l1_2, x_l1_3, x_l2_0, x_l2_1;
164 short xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
165 short x_4, x_5, x_6, x_7, x_l2_2, x_l2_3, x_h2_0, x_h2_1, x_h2_2, x_h2_3;
166 short x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
167 short si10, si20, si30, co10, co20, co30;
168 short si11, si21, si31, co11, co21, co31;
169 short n00, n10, n20, n30, n01, n11, n21, n31;
170 short n02, n12, n22, n32, n03, n13, n23, n33;
171 short n0, j0;
173 int i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp, tw_jmp;
174 int radix, m, norm;
176 /*---------------------------------------------------------------------*/
177 /* Determine the magnitude od the number of points to be transformed. */
178 /* Check whether we can use a radix4 decomposition or a mixed radix */
179 /* transformation, by determining modulo 2. */
180 /*---------------------------------------------------------------------*/
181 for (i = 31, m = 1; (npoints & (1 << i)) == 0; i--, m++)
182 ;
183 radix = m & 1 ? 2 : 4;
184 norm = m - 2;
186 /*----------------------------------------------------------------------*/
187 /* The stride is quartered with every iteration of the outer loop. It */
188 /* denotes the seperation between any two adjacent inputs to the butter */
189 /* -fly. This should start out at N/4, hence stride is initially set to */
190 /* N. For every stride, 2*stride twiddle factors are accessed. The */
191 /* "tw_offset" is the offset within the current twiddle factor sub- */
192 /* table. This is set to zero, at the start of the code and is used to */
193 /* obtain the appropriate sub-table twiddle pointer by offseting it */
194 /* with the base pointer "ptr_w". */
195 /*----------------------------------------------------------------------*/
196 stride = npoints;
197 tw_offset = 0;
198 fft_jmp = 6 * stride;
199 tw_jmp = 2 * stride;
201 #ifndef NOASSUME
202 _nassert(stride > 4);
203 #pragma MUST_ITERATE(1,,1);
204 #endif
206 while (stride > 4) {
207 /*-----------------------------------------------------------------*/
208 /* At the start of every iteration of the outer loop, "j" is set */
209 /* to zero, as "w" is pointing to the correct location within the */
210 /* twiddle factor array. For every iteration of the inner loop */
211 /* 2 * stride twiddle factors are accessed. For eg, */
212 /* */
213 /* #Iteration of outer loop # twiddle factors #times cycled */
214 /* 1 2 N/4 1 */
215 /* 2 2 N/16 4 */
216 /* ... */
217 /*-----------------------------------------------------------------*/
218 j = 0;
219 fft_jmp >>= 2;
220 tw_jmp >>= 2;
222 /*-----------------------------------------------------------------*/
223 /* Set up offsets to access "N/4", "N/2", "3N/4" complex point or */
224 /* "N/2", "N", "3N/2" half word */
225 /*-----------------------------------------------------------------*/
226 h2 = stride >> 1;
227 l1 = stride;
228 l2 = stride + (stride >> 1);
230 /*-----------------------------------------------------------------*/
231 /* Reset "x" to point to the start of the input data array. */
232 /* "tw_offset" starts off at 0, and increments by "2 * stride" */
233 /* The stride quarters with every iteration of the outer loop */
234 /*-----------------------------------------------------------------*/
235 x = ptr_x;
236 w = ptr_w + tw_offset;
237 tw_offset += tw_jmp;
238 stride >>= 2;
240 /*----------------------------------------------------------------*/
241 /* The following loop iterates through the different butterflies, */
242 /* within a given stage. Recall that there are logN to base 4 */
243 /* stages. Certain butterflies share the twiddle factors. These */
244 /* are grouped together. On the very first stage there are no */
245 /* butterflies that share the twiddle factor, all N/4 butter- */
246 /* flies have different factors. On the next stage two sets of */
247 /* N/8 butterflies share the same twiddle factor. Hence after */
248 /* half the butterflies are performed, j the index into the */
249 /* factor array resets to 0, and the twiddle factors are reused. */
250 /* When this happens, the data pointer 'x' is incremented by the */
251 /* fft_jmp amount. In addition the following code is unrolled to */
252 /* perform "2" radix4 butterflies in parallel. */
253 /*----------------------------------------------------------------*/
254 #ifndef NOASSUME
255 _nassert((int)(w) % 8 == 0);
256 _nassert((int)(x) % 8 == 0);
257 _nassert(h2 % 8 == 0);
258 _nassert(l1 % 8 == 0);
259 _nassert(l2 % 8 == 0);
260 #pragma MUST_ITERATE(1, , 1);
261 #endif
263 for (i = 0; i < (npoints >> 3); i ++) {
264 /*------------------------------------------------------------*/
265 /* Read the first 4 twiddle factors, two of which are used */
266 /* for one radix 4 butterfly and two of which are used for */
267 /* next one. */
268 /*------------------------------------------------------------*/
270 /* twiddle factors for first butterfly */
271 #ifdef _LITTLE_ENDIAN
272 co10 = w[j+1];
273 si10 = w[j+0];
274 #else
275 co10 = w[j+0];
276 si10 =-w[j+1];
277 #endif
278 co20 = (co10 * co10 - si10 * si10 + 0x4000) >> 15;
279 si20 = (co10 * si10 + co10 * si10 + 0x4000) >> 15;
280 co30 = (co10 * co20 - si10 * si20 + 0x4000) >> 15;
281 si30 = (co10 * si20 + co20 * si10 + 0x4000) >> 15;
283 /* twiddle factors for second butterfly */
284 #ifdef _LITTLE_ENDIAN
285 co11 = w[j+3];
286 si11 = w[j+2];
287 #else
288 co11 = w[j+2];
289 si11 =-w[j+3];
290 #endif
291 co21 = (co11 * co11 - si11 * si11 + 0x4000) >> 15;
292 si21 = (co11 * si11 + co11 * si11 + 0x4000) >> 15;
293 co31 = (co11 * co21 - si11 * si21 + 0x4000) >> 15;
294 si31 = (co11 * si21 + co21 * si11 + 0x4000) >> 15;
296 /*------------------------------------------------------------*/
297 /* Read in the first complex input for the butterflies. */
298 /* 1st complex input to 1st butterfly: x[0] + jx[1] */
299 /* 1st complex input to 2nd butterfly: x[2] + jx[3] */
300 /* Read in the complex inputs for the butterflies. Each of the*/
301 /* successive complex inputs of the butterfly are seperated */
302 /* by a fixed amount known as stride. The stride starts out */
303 /* at N/4, and quarters with every stage. */
304 /*------------------------------------------------------------*/
305 x_0 = x[1]; /* Re[x(k)] */
306 x_1 = x[0]; /* Im[x(k)] */
307 x_2 = x[3]; /* second butterfly */
308 x_3 = x[2];
310 x_l1_0 = x[l1+1]; /* Re[x(k+N/2)] */
311 x_l1_1 = x[l1 ]; /* Im[x(k+N/2)] */
312 x_l1_2 = x[l1+3]; /* second butterfly */
313 x_l1_3 = x[l1+2];
315 x_l2_0 = x[l2+1]; /* Re[x(k+3*N/2)] */
316 x_l2_1 = x[l2 ]; /* Im[x(k+3*N/2)] */
317 x_l2_2 = x[l2+3]; /* second butterfly */
318 x_l2_3 = x[l2+2];
320 x_h2_0 = x[h2+1]; /* Re[x(k+N/4)] */
321 x_h2_1 = x[h2 ]; /* Im[x(k+N/4)] */
322 x_h2_2 = x[h2+3]; /* second butterfly */
323 x_h2_3 = x[h2+2];
325 /*-----------------------------------------------------------*/
326 /* Two butterflies are evaluated in parallel. The following */
327 /* results will be shown for one butterfly only, although */
328 /* both are being evaluated in parallel. */
329 /* */
330 /*-----------------------------------------------------------*/
331 xh0_0 = x_0 + x_l1_0;
332 xh1_0 = x_1 + x_l1_1;
333 xh0_1 = x_2 + x_l1_2;
334 xh1_1 = x_3 + x_l1_3;
336 xl0_0 = x_0 - x_l1_0;
337 xl1_0 = x_1 - x_l1_1;
338 xl0_1 = x_2 - x_l1_2;
339 xl1_1 = x_3 - x_l1_3;
341 xh20_0 = x_h2_0 + x_l2_0;
342 xh21_0 = x_h2_1 + x_l2_1;
343 xh20_1 = x_h2_2 + x_l2_2;
344 xh21_1 = x_h2_3 + x_l2_3;
346 xl20_0 = x_h2_0 - x_l2_0;
347 xl21_0 = x_h2_1 - x_l2_1;
348 xl20_1 = x_h2_2 - x_l2_2;
349 xl21_1 = x_h2_3 - x_l2_3;
351 /*-----------------------------------------------------------*/
352 /* Derive output pointers using the input pointer "x" */
353 /*-----------------------------------------------------------*/
354 x0 = x;
355 x2 = x0;
357 /*-----------------------------------------------------------*/
358 /* When the twiddle factors are not to be re-used, j is */
359 /* incremented by 4, to reflect the fact that 4 half words */
360 /* are consumed in every iteration. The input data pointer */
361 /* increments by 4. Note that within a stage, the stride */
362 /* does not change and hence the offsets for the other three */
363 /* legs, 0, h2, l1, l2. */
364 /*-----------------------------------------------------------*/
365 j += 4;
366 x += 4;
368 predj = (3*j - fft_jmp);
369 if (!predj) x += fft_jmp;
370 if (!predj) j = 0;
372 /*-----------------------------------------------------------*/
373 /* X'pr = Xpr + Xqr + Xsr + Xtr */
374 /* X'pi = Xpi + Xqi + Xsi + Xti */
375 /* */
376 /* X'qr = cos (Xpr-Xsr - Xqi+Xti) - sin (Xpi-Xsi + Xqr-Xtr) */
377 /* X'qi = cos (Xpi-Xsi + Xqr-Xtr) + sin (Xpr-Xsr - Xqi+Xti) */
378 /* */
379 /* X'sr = cos ( Xpr-Xqr + Xsr-Xtr) + sin (-Xpi+Xqi - Xsi+Xti)*/
380 /* X'si = cos ( Xpi-Xqi + Xsi-Xti) + sin (Xpr-Xqr + Xsr-Xtr) */
381 /* */
382 /* X'tr = cos (Xpr-Xsr + Xqi-Xti) - sin (Xpi-Xsi - Xqr+Xtr) */
383 /* X'ti = cos (Xpi-Xsi - Xqr+Xtr) + sin (Xpr-Xsr + Xqi-Xti) */
384 /* */
385 /* ----------------------------------------------------------*/
386 x0[1] = (xh0_0 + xh20_0 + 1)>>1;
387 x0[0] = (xh1_0 + xh21_0 + 1)>>1;
388 x0[3] = (xh0_1 + xh20_1 + 1)>>1;
389 x0[2] = (xh1_1 + xh21_1 + 1)>>1;
391 xt0_0 = xh0_0 - xh20_0;
392 yt0_0 = xh1_0 - xh21_0;
393 xt0_1 = xh0_1 - xh20_1;
394 yt0_1 = xh1_1 - xh21_1;
396 xt1_0 = xl0_0 - xl21_0;
397 yt2_0 = xl1_0 - xl20_0;
398 xt1_1 = xl0_1 - xl21_1;
399 yt2_1 = xl1_1 - xl20_1;
401 xt2_0 = xl0_0 + xl21_0;
402 yt1_0 = xl1_0 + xl20_0;
403 xt2_1 = xl0_1 + xl21_1;
404 yt1_1 = xl1_1 + xl20_1;
406 /*---------------------------------------------------------*/
407 /* Perform twiddle factor multiplies of three terms,top */
408 /* term does not have any multiplies. Note the twiddle */
409 /* factors for a normal FFT are C + j (-S). Since the */
410 /* factors that are stored are C + j S, this is */
411 /* corrected for in the multiplies. */
412 /* */
413 /* Y1 = (xt1 + jyt1) (c + js) = (xc + ys) + (yc -xs) */
414 /*---------------------------------------------------------*/
415 x2[l1+1] = (co20 * xt0_0 - si20 * yt0_0 + 0x8000) >> 16;
416 x2[l1 ] = (co20 * yt0_0 + si20 * xt0_0 + 0x8000) >> 16;
418 x2[l1+3] = (co21 * xt0_1 - si21 * yt0_1 + 0x8000) >> 16;
419 x2[l1+2] = (co21 * yt0_1 + si21 * xt0_1 + 0x8000) >> 16;
421 x2[h2+1] = (co10 * xt1_0 - si10 * yt1_0 + 0x8000) >> 16;
422 x2[h2 ] = (co10 * yt1_0 + si10 * xt1_0 + 0x8000) >> 16;
424 x2[h2+3] = (co11 * xt1_1 - si11 * yt1_1 + 0x8000) >> 16;
425 x2[h2+2] = (co11 * yt1_1 + si11 * xt1_1 + 0x8000) >> 16;
427 x2[l2+1] = (co30 * xt2_0 - si30 * yt2_0 + 0x8000) >> 16;
428 x2[l2 ] = (co30 * yt2_0 + si30 * xt2_0 + 0x8000) >> 16;
430 x2[l2+3] = (co31 * xt2_1 - si31 * yt2_1 + 0x8000) >> 16;
431 x2[l2+2] = (co31 * yt2_1 + si31 * xt2_1 + 0x8000) >> 16;
432 }
433 }
434 /*-----------------------------------------------------------------*/
435 /* The following code performs either a standard radix4 pass or a */
436 /* radix2 pass. Two pointers are used to access the input data. */
437 /* The input data is read "N/4" complex samples apart or "N/2" */
438 /* words apart using pointers "x0" and "x2". This produces out- */
439 /* puts that are 0, N/4, N/2, 3N/4 for a radix4 FFT, and 0, N/8 */
440 /* N/2, 3N/8 for radix 2. */
441 /*-----------------------------------------------------------------*/
443 y0 = ptr_y;
444 y2 = ptr_y + (int)npoints;
445 x0 = ptr_x;
446 x2 = ptr_x + (int)(npoints >> 1);
448 if (radix == 2) {
449 /*----------------------------------------------------------------*/
450 /* The pointers are set at the following locations which are half */
451 /* the offsets of a radix4 FFT. */
452 /*----------------------------------------------------------------*/
453 y1 = y0 + (int)(npoints >> 2);
454 y3 = y2 + (int)(npoints >> 2);
455 l1 = norm + 1;
456 j0 = 8;
457 n0 = npoints >> 1;
458 }
459 else {
460 y1 = y0 + (int)(npoints >> 1);
461 y3 = y2 + (int)(npoints >> 1);
462 l1 = norm + 2;
463 j0 = 4;
464 n0 = npoints >> 2;
465 }
467 /*--------------------------------------------------------------------*/
468 /* The following code reads data indentically for either a radix 4 */
469 /* or a radix 2 style decomposition. It writes out at different */
470 /* locations though. It checks if either half the points, or a */
471 /* quarter of the complex points have been exhausted to jump to */
472 /* pervent double reversal. */
473 /*--------------------------------------------------------------------*/
475 j = 0;
477 #ifndef NOASSUME
478 _nassert((int)(n0) % 4 == 0);
479 _nassert((int)(x0) % 8 == 0);
480 _nassert((int)(x2) % 8 == 0);
481 _nassert((int)(y0) % 8 == 0);
482 #pragma MUST_ITERATE(2,,2);
483 #endif
485 for (i = 0; i < npoints; i += 8) {
486 /*----------------------------------------------------------------*/
487 /* Digit reverse the index starting from 0. The increment to "j" */
488 /* is either by 4, or 8. */
489 /*----------------------------------------------------------------*/
490 DIG_REV(j, l1, h2);
492 /*----------------------------------------------------------------*/
493 /* Read in the input data, from the first eight locations. These */
494 /* are transformed either as a radix4 or as a radix 2. */
495 /*----------------------------------------------------------------*/
496 x_0 = x0[1];
497 x_1 = x0[0];
498 x_2 = x0[3];
499 x_3 = x0[2];
500 x_4 = x0[5];
501 x_5 = x0[4];
502 x_6 = x0[7];
503 x_7 = x0[6];
504 x0 += 8;
506 xh0_0 = x_0 + x_4;
507 xh1_0 = x_1 + x_5;
508 xl0_0 = x_0 - x_4;
509 xl1_0 = x_1 - x_5;
510 xh0_1 = x_2 + x_6;
511 xh1_1 = x_3 + x_7;
512 xl0_1 = x_2 - x_6;
513 xl1_1 = x_3 - x_7;
515 n00 = xh0_0 + xh0_1;
516 n01 = xh1_0 + xh1_1;
518 n10 = xl0_0 - xl1_1;
519 n11 = xl1_0 + xl0_1;
521 n20 = xh0_0 - xh0_1;
522 n21 = xh1_0 - xh1_1;
524 n30 = xl0_0 + xl1_1;
525 n31 = xl1_0 - xl0_1;
527 if (radix == 2) {
528 /*-------------------------------------------------------------*/
529 /* Perform radix2 style decomposition. */
530 /*-------------------------------------------------------------*/
531 n00 = x_0 + x_2;
532 n01 = x_1 + x_3;
533 n20 = x_0 - x_2;
534 n21 = x_1 - x_3;
535 n10 = x_4 + x_6;
536 n11 = x_5 + x_7;
537 n30 = x_4 - x_6;
538 n31 = x_5 - x_7;
539 }
541 y0[2*h2 + 1] = n00;
542 y0[2*h2 ] = n01;
544 y1[2*h2 + 1] = n10;
545 y1[2*h2 ] = n11;
547 y2[2*h2 + 1] = n20;
548 y2[2*h2 ] = n21;
550 y3[2*h2 + 1] = n30;
551 y3[2*h2 ] = n31;
553 /*----------------------------------------------------------------*/
554 /* Read in ht enext eight inputs, and perform radix4 or radix2 */
555 /* decomposition. */
556 /*----------------------------------------------------------------*/
557 x_8 = x2[1]; x_9 = x2[0];
558 x_a = x2[3]; x_b = x2[2];
559 x_c = x2[5]; x_d = x2[4];
560 x_e = x2[7]; x_f = x2[6];
561 x2 += 8;
563 xh0_2 = x_8 + x_c; xh1_2 = x_9 + x_d;
564 xl0_2 = x_8 - x_c; xl1_2 = x_9 - x_d;
565 xh0_3 = x_a + x_e; xh1_3 = x_b + x_f;
566 xl0_3 = x_a - x_e; xl1_3 = x_b - x_f;
568 n02 = xh0_2 + xh0_3;
569 n03 = xh1_2 + xh1_3;
571 n12 = xl0_2 - xl1_3;
572 n13 = xl1_2 + xl0_3;
574 n22 = xh0_2 - xh0_3;
575 n23 = xh1_2 - xh1_3;
577 n32 = xl0_2 + xl1_3;
578 n33 = xl1_2 - xl0_3;
581 if (radix == 2) {
582 n02 = x_8 + x_a; n03 = x_9 + x_b;
583 n22 = x_8 - x_a; n23 = x_9 - x_b;
584 n12 = x_c + x_e; n13 = x_d + x_f;
585 n32 = x_c - x_e; n33 = x_d - x_f;
586 }
588 /*-----------------------------------------------------------------*/
589 /* Points that are read from succesive locations map to y, y[N/4] */
590 /* y[N/2], y[3N/4] in a radix4 scheme, y, y[N/8], y[N/2],y[5N/8] */
591 /*-----------------------------------------------------------------*/
592 y0[2*h2+3] = n02;
593 y0[2*h2+2] = n03;
595 y1[2*h2+3] = n12;
596 y1[2*h2+2] = n13;
598 y2[2*h2+3] = n22;
599 y2[2*h2+2] = n23;
601 y3[2*h2+3] = n32;
602 y3[2*h2+2] = n33;
604 j += j0;
605 if (j == n0) {
606 j += n0;
607 x0 += (int)npoints >> 1;
608 x2 += (int)npoints >> 1;
609 }
610 }
611 }
613 /* ======================================================================== */
614 /* End of file: DSP_ifft16x16_imre_cn.c */
615 /* ------------------------------------------------------------------------ */
616 /* Copyright (C) 2011 Texas Instruments, Incorporated. */
617 /* All Rights Reserved. */
618 /* ======================================================================== */