[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSPF_sp_ifftSPxSP / c66 / DSPF_sp_ifftSPxSP.sa
1 * ======================================================================= *
2 * DSPF_sp_ifftSPxSP.sa -- Inverse FFT with Mixed Radix *
3 * Linear ASM Implementation *
4 * *
5 * Rev 0.0.2 *
6 * *
7 * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ *
8 * *
9 * *
10 * Redistribution and use in source and binary forms, with or without *
11 * modification, are permitted provided that the following conditions *
12 * are met: *
13 * *
14 * Redistributions of source code must retain the above copyright *
15 * notice, this list of conditions and the following disclaimer. *
16 * *
17 * Redistributions in binary form must reproduce the above copyright *
18 * notice, this list of conditions and the following disclaimer in the *
19 * documentation and/or other materials provided with the *
20 * distribution. *
21 * *
22 * Neither the name of Texas Instruments Incorporated nor the names of *
23 * its contributors may be used to endorse or promote products derived *
24 * from this software without specific prior written permission. *
25 * *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR *
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT *
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *
32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, *
33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE *
36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
37 * *
38 * ======================================================================= *
40 .sect ".text:optimized"
41 .if __TI_EABI__
42 .asg DSPF_sp_ifftSPxSP, _DSPF_sp_ifftSPxSP
43 .endif
45 .global _DSPF_sp_ifftSPxSP
47 _DSPF_sp_ifftSPxSP .cproc A_n, B_ptr_x, A_ptr_w, B_ptr_y
48 .no_mdep
50 .reg B_w0
51 .reg A_j, A_w, A_x, A_y, A_h2, A_predj
52 .reg B_j, B_w, B_x, B_y, B_h2, B_2h2, B_fft_jmp, B_predj, B_n
54 .reg A_co1:A_si1, A_co2:A_si2, A_co3:A_si3
55 .reg B_co1:B_si1, B_co2:B_si2, B_co3:B_si3
57 .reg A_x1:A_x0, A_x_h2p:A_x_h2, A_x_l1p:A_x_l1, A_x_l2p:A_x_l2
58 .reg A_xh1:A_xh0, A_xl1:A_xl0, A_xh21:A_xh20, A_xl21:A_xl20
59 .reg B_x1:B_x0, B_x_h2p:B_x_h2, B_x_l1p:B_x_l1, B_x_l2p:B_x_l2
60 .reg B_xh1:B_xh0, B_xl1:B_xl0, B_xh21:B_xh20,B_xl21:B_xl20
62 .reg A_sum1, A_sum2
63 .reg A_sum3, A_sum4, A_xt1, A_yt1
64 .reg A_prod1,A_prod2,A_prod3,A_prod4
65 .reg A_sum6:A_sum5
66 .reg A_yt0:A_xt0
67 .reg A_prod8:A_prod7,A_prod6:A_prod5
68 .reg A_sum7, A_sum8, A_xt2, A_yt2
69 .reg A_prod9,A_prod10,A_prod11,A_prod12
71 .reg B_sum1, B_sum2
72 .reg B_sum3, B_sum4, B_xt1, B_yt1
73 .reg B_prod1,B_prod2,B_prod3,B_prod4
74 .reg B_sum6:B_sum5
75 .reg B_yt0:B_xt0
76 .reg B_prod8:B_prod7,B_prod6:B_prod5
77 .reg B_sum7, B_sum8, B_xt2, B_yt2
78 .reg B_prod9,B_prod10,B_prod11,B_prod12
80 .reg A_scale1:A_scale0
81 .reg A_radix, A_temp
82 .reg B_radix2, B_stride, B_tw_offset, B_i, B_while, B_temp
83 .reg B_const6, A_const48, A_const16
84 .reg A_y_, B_y_
85 .reg B_ll0, A_ll0
86 .reg A_lj, A_lk, A_ltemp1
87 .reg B_lj, B_lk, B_ltemp1
88 .reg A_ptr_lx0, B_ptr_lx0
89 .reg A_ptr_lx1, B_ptr_lx1
90 .reg A_ly0, A_ly1, B_ly0, B_ly1
91 .reg A_lnmax, B_lnmax
92 .reg A_lctr
94 .reg A_lx1:A_lx0
95 .reg A_lx3:A_lx2
96 .reg A_lx5:A_lx4
97 .reg A_lx7:A_lx6
98 .reg B_lx1:B_lx0
99 .reg B_lx3:B_lx2
100 .reg B_lx5:B_lx4
101 .reg B_lx7:B_lx6
103 .reg A_lxh0_0, A_lxh1_0, A_lxh0_1, A_lxh1_1
104 .reg B_lxh0_0, B_lxh1_0, B_lxh0_1, B_lxh1_1
105 .reg A_lyt0, A_lyt1, A_lyt4, A_lyt5
106 .reg B_lyt0, B_lyt1, B_lyt4, B_lyt5
107 .reg A_lxl0_0, A_lxl1_0, A_lxl0_1, A_lxl1_1
108 .reg B_lxl0_0, B_lxl1_0, B_lxl0_1, B_lxl1_1
109 .reg A_lyt2, A_lyt3, A_lyt6, A_lyt7
110 .reg B_lyt2, B_lyt3, B_lyt6, B_lyt7
111 .reg A_r2flag
113 MVK .1 4, A_radix
114 MV .2 A_n, B_n
115 MV .2 B_n, B_stride
116 ZERO .2 B_tw_offset
117 MVK .2 6, B_const6
118 MVK .1 48, A_const48
119 MVK .1 16, A_const16
121 OLOOP
122 ZERO .1 A_j
123 ZERO .2 B_j
124 MPY32 .2 B_const6, B_stride, B_fft_jmp
125 SHRU .2 B_stride, 2, B_h2
126 MV .1 B_h2, A_h2
127 ADD .1 B_ptr_x, -16, A_x
128 ADD .1 B_ptr_x, -16, A_y
129 ADD .2 A_ptr_w, B_tw_offset, B_w0
130 ADD .2 B_tw_offset, B_fft_jmp, B_tw_offset
131 SHRU .2 B_stride, 2, B_stride
132 SHRU .2 B_n, 3, B_i
133 SUB .2 B_i, 1, B_i
135 ILOOP: .trip 4
136 ADD .2 B_w0, B_j, B_w
137 MVD .1 B_w, A_w
139 LDDW .1 *A_w[0], A_co1:A_si1 ;Load si1, co1
140 LDDW .1 *A_w[1], A_co2:A_si2 ;Load si2, co2
141 LDDW .1 *A_w[2], A_co3:A_si3 ;Load si3, co3
142 LDDW .2 *B_w[3], B_co1:B_si1 ;Load si1, co1
143 LDDW .2 *B_w[4], B_co2:B_si2 ;Load si2, co2
144 LDDW .2 *B_w[5], B_co3:B_si3 ;Load si3, co3
146 MVD .2 A_x, B_x ;x_copy =x
147 LDDW .1 *++A_x[2], A_x1:A_x0 ;Load x1, x0
148 LDDW .1 *++A_x[A_h2], A_x_h2p:A_x_h2 ;Load x_h2p, x_h2
149 LDDW .1 *++A_x[A_h2], A_x_l1p:A_x_l1 ;Load x_l1p, x_l1
150 LDDW .1 *A_x[A_h2], A_x_l2p:A_x_l2 ;Load x_l2p, x_l2
152 LDDW .2 *++B_x[3], B_x1:B_x0 ;Load x1, x0
153 LDDW .2 *++B_x[B_h2], B_x_h2p:B_x_h2 ;Load x_h2p, x_h2
154 LDDW .2 *++B_x[B_h2], B_x_l1p:B_x_l1 ;Load x_l1p, x_l1
155 LDDW .2 *B_x[B_h2], B_x_l2p:B_x_l2 ;Load x_l2p, x_l2
157 DADDSP .L1 A_x1:A_x0, A_x_l1p:A_x_l1, A_xh1:A_xh0
158 DSUBSP .1 A_x1:A_x0, A_x_l1p:A_x_l1, A_xl1:A_xl0
159 DADDSP .1 A_x_h2p:A_x_h2, A_x_l2p:A_x_l2, A_xh21:A_xh20
160 DSUBSP .1 A_x_h2p:A_x_h2, A_x_l2p:A_x_l2, A_xl21:A_xl20
162 DADDSP .L2 B_x1:B_x0, B_x_l1p:B_x_l1, B_xh1:B_xh0
163 DSUBSP .2 B_x1:B_x0, B_x_l1p:B_x_l1, B_xl1:B_xl0
164 DADDSP .2 B_x_h2p:B_x_h2, B_x_l2p:B_x_l2, B_xh21:B_xh20
165 DSUBSP .2 B_x_h2p:B_x_h2, B_x_l2p:B_x_l2, B_xl21:B_xl20
167 ROTL .2 B_h2, 4, B_2h2
168 SUB .1 A_x, B_2h2, A_x
170 ADD .1 A_j, A_const48, A_j ;j += 6
171 SUB .1 A_j, B_fft_jmp, A_predj ;predj = j - fft_jmp
172 [!A_predj]ADD .1 A_x, B_fft_jmp, A_x ;*x = *x + fft_jmp
173 [!A_predj]ZERO .1 A_j ;j=0
175 MVD .1 A_y, A_y_
176 MVD .2 A_y, B_y_
178 DADDSP .1 A_xh1:A_xh0, A_xh21:A_xh20, A_sum2:A_sum1
179 STDW .1 A_sum2:A_sum1, *++A_y_[2]
181 DSUBSP .1 A_xh1:A_xh0, A_xh21:A_xh20, A_yt0:A_xt0
182 CMPYSP .1 A_yt0:A_xt0, A_co2:A_si2, A_prod8:A_prod7:A_prod6:A_prod5
183 DADDSP .1 A_prod8:A_prod7,A_prod6:A_prod5, A_sum6:A_sum5
184 STDW .1 A_sum6:A_sum5, *++A_y_[A_h2]
186 .if .LITTLE_ENDIAN
188 ADDSP .L1 A_xl1, A_xl20, A_yt1 ;yt1 = xl1 - xl20
189 SUBSP .L1 A_xl0, A_xl21, A_xt1 ;xt1 = xl0 + xl21
190 CMPYSP .1 A_yt1:A_xt1, A_co1:A_si1, A_prod4:A_prod3:A_prod2:A_prod1
191 DADDSP .1 A_prod4:A_prod3,A_prod2:A_prod1, A_sum4:A_sum3
192 STDW .1 A_sum4:A_sum3, *++A_y_[A_h2]
194 SUBSP .L1 A_xl1, A_xl20, A_yt2 ;yt2 = xl1 - xl20
195 ADDSP .L1 A_xl0, A_xl21, A_xt2 ;xt2 = xl0 + xl21
196 CMPYSP .1 A_yt2:A_xt2, A_co3:A_si3, A_prod12:A_prod11:A_prod10:A_prod9
197 DADDSP .1 A_prod12:A_prod11, A_prod10:A_prod9, A_sum8:A_sum7
198 STDW .1 A_sum8:A_sum7, *A_y_[A_h2]
200 .else
202 SUBSP .L1 A_xl1, A_xl20, A_yt1 ;yt1 = xl1 - xl20
203 ADDSP .L1 A_xl0, A_xl21, A_xt1 ;xt1 = xl0 + xl21
204 CMPYSP .1 A_yt1:A_xt1, A_co1:A_si1, A_prod4:A_prod3:A_prod2:A_prod1
205 DADDSP .1 A_prod4:A_prod3,A_prod2:A_prod1, A_sum4:A_sum3
206 STDW .1 A_sum4:A_sum3, *++A_y_[A_h2]
208 ADDSP .L1 A_xl1, A_xl20, A_yt2 ;yt2 = xl1 + xl20
209 SUBSP .L1 A_xl0, A_xl21, A_xt2 ;xt2 = xl0 - xl21
210 CMPYSP .1 A_yt2:A_xt2, A_co3:A_si3, A_prod12:A_prod11:A_prod10:A_prod9
211 DADDSP .1 A_prod12:A_prod11, A_prod10:A_prod9, A_sum8:A_sum7
212 STDW .1 A_sum8:A_sum7, *A_y_[A_h2]
214 .endif
216 DADDSP .2 B_xh1:B_xh0, B_xh21:B_xh20, B_sum2:B_sum1
217 STDW .2 B_sum2:B_sum1, *++B_y_[3]
219 DSUBSP .2 B_xh1:B_xh0, B_xh21:B_xh20, B_yt0:B_xt0
220 CMPYSP .2 B_yt0:B_xt0, B_co2:B_si2, B_prod8:B_prod7:B_prod6:B_prod5
221 DADDSP .2 B_prod8:B_prod7,B_prod6:B_prod5, B_sum6:B_sum5
222 STDW .2 B_sum6:B_sum5, *++B_y_[B_h2]
224 .if .LITTLE_ENDIAN
226 SUBSP .L2 B_xl0, B_xl21, B_xt1 ;xt1 = xl0 - xl21
227 ADDSP .L2 B_xl1, B_xl20, B_yt1 ;yt1 = xl1 + xl20
228 CMPYSP .2 B_yt1:B_xt1, B_co1:B_si1, B_prod4:B_prod3:B_prod2:B_prod1
229 DADDSP .2 B_prod4:B_prod3,B_prod2:B_prod1, B_sum4:B_sum3
230 STDW .2 B_sum4:B_sum3, *++B_y_[B_h2]
232 ADDSP .L2 B_xl0, B_xl21, B_xt2 ;xt2 = xl0 - xl21
233 SUBSP .L2 B_xl1, B_xl20, B_yt2 ;yt2 = xl1 + xl20
234 CMPYSP .2 B_yt2:B_xt2, B_co3:B_si3, B_prod12:B_prod11:B_prod10:B_prod9
235 DADDSP .2 B_prod12:B_prod11, B_prod10:B_prod9, B_sum8:B_sum7
236 STDW .2 B_sum8:B_sum7, *B_y_[B_h2]
238 .else
240 SUBSP .L2 B_xl1, B_xl20, B_yt1 ;yt1 = xl1 - xl20
241 ADDSP .L2 B_xl0, B_xl21, B_xt1 ;xt1 = xl0 + xl21
242 CMPYSP .2 B_yt1:B_xt1, B_co1:B_si1, B_prod4:B_prod3:B_prod2:B_prod1
243 DADDSP .2 B_prod4:B_prod3,B_prod2:B_prod1, B_sum4:B_sum3
244 STDW .2 B_sum4:B_sum3, *++B_y_[B_h2]
246 ADDSP .L2 B_xl1, B_xl20, B_yt2 ;yt2 = xl1 + xl20
247 SUBSP .L2 B_xl0, B_xl21, B_xt2 ;xt2 = xl0 - xl21
248 CMPYSP .2 B_yt2:B_xt2, B_co3:B_si3, B_prod12:B_prod11:B_prod10:B_prod9
249 DADDSP .2 B_prod12:B_prod11, B_prod10:B_prod9, B_sum8:B_sum7
250 STDW .2 B_sum8:B_sum7, *B_y_[B_h2]
252 .endif
254 ADD .1 A_y, A_const16, A_y
255 ADD .2 B_j, A_const48, B_j
256 SUB .2 B_j, B_fft_jmp, B_predj
257 [!B_predj]ADD .1 A_y, B_fft_jmp, A_y
258 [!B_predj]ZERO .2 B_j
260 [B_i] BDEC .2 ILOOP, B_i
262 CMPGTU .2 B_stride, A_radix, B_while
263 [B_while]B .2 OLOOP
265 ; find the radix of the fft
266 MVK .1 4, A_radix
267 NORM .2 A_n, B_radix2
268 AND .2 B_radix2, 1, B_radix2
269 [B_radix2]MVK .1 2, A_radix
271 ZERO .1 A_lj
272 SHR .2 A_n, 3, B_lj
273 MV .1 B_ptr_x, A_ptr_lx0 ;ptr_x0 = ptr_x
274 ADD .2 B_ptr_x, 8, B_ptr_lx0
275 MV .1 A_n, A_ptr_lx1
276 ADDAW .1 B_ptr_x, A_ptr_lx1, A_ptr_lx1
277 ADD .2 A_ptr_lx1, 8, B_ptr_lx1
278 MV .1 B_ptr_y, A_ly0 ;y0 = ptr_y
279 MV .2 B_ptr_y, B_ly0 ;y0 = ptr_y
280 INTSP .1 A_n, A_temp
281 RCPSP .1 A_temp, A_scale0
282 MV .1 A_scale0, A_scale1
284 ;get size of fft -> l0 = _norm(n_max) - 17
285 NORM .1 A_n, A_ll0 ;l0 =_NORM(n_max)
286 ADD .1 A_ll0, 3, A_ll0 ;l0 += 3
287 MV .2 A_ll0, B_ll0
288 SHR .2 A_n, 2, B_lnmax ;nmax >>= 2
289 SHR .1 A_n, 2, A_lnmax ;nmax >>= 2
290 SHR .1 A_n, 3, A_lctr ;set loop Counter
291 SUB .1 A_lctr, 1, A_lctr
292 CMPEQ .1 A_radix, 2, A_r2flag ;Check whether radix ==2
294 [A_r2flag]B LAST_STAGE_RADIX2
296 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
297 ; last stage of radix4 computation ;
298 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
299 ;Bit reversal Caliculation
300 LOOP4: .trip 4
302 BITR .1 A_lj, A_ltemp1
303 SHRU .1 A_ltemp1, A_ll0, A_lk
304 MPY .1 4, A_lk, A_lk
305 ADD .1 A_ly0, A_lk, A_ly1 ;*y1 = *y0 + k
306 ADD .1 A_lj, 1, A_lj ;j = j+1
308 BITR .2 B_lj, B_ltemp1
309 SHRU .2 B_ltemp1, B_ll0, B_lk
310 MPY .2 4, B_lk, B_lk
311 ADD .2 B_ly0, B_lk, B_ly1 ;*y1 = *y0 + k
312 ADD .2 B_lj, 1, B_lj ;j = j+1
314 LDDW .1 *A_ptr_lx0++[2],A_lx1:A_lx0 ;load ptr_x0[0], ptr_x0[1]
315 LDDW .2 *B_ptr_lx0++[2],A_lx3:A_lx2 ;load ptr_x0[2], ptr_x0[3]
316 LDDW .1 *A_ptr_lx0++[2],A_lx5:A_lx4 ;load ptr_x0[4], ptr_x0[5]
317 LDDW .2 *B_ptr_lx0++[2],A_lx7:A_lx6 ;load ptr_x0[4], ptr_x0[5]
319 LDDW .1 *A_ptr_lx1++[2],B_lx1:B_lx0 ;load ptr_x0[0], ptr_x0[1]
320 LDDW .2 *B_ptr_lx1++[2],B_lx3:B_lx2 ;load ptr_x0[2], ptr_x0[3]
321 LDDW .1 *A_ptr_lx1++[2],B_lx5:B_lx4 ;load ptr_x0[4], ptr_x0[5]
322 LDDW .2 *B_ptr_lx1++[2],B_lx7:B_lx6 ;load ptr_x0[4], ptr_x0[5]
324 DADDSP .1 A_lx1:A_lx0, A_lx5:A_lx4, A_lxh1_0:A_lxh0_0 ;xh0_0 = x0 + x4, xh1_0 = x1 + x5
325 DADDSP .1 A_lx3:A_lx2, A_lx7:A_lx6, A_lxh1_1:A_lxh0_1 ;xh0_1 = x2 + x6, xh1_1 = x3 + x7
327 DADDSP .2 B_lx1:B_lx0, B_lx5:B_lx4, B_lxh1_0:B_lxh0_0 ;xh0_0 = x0 + x4, xh1_0 = x1 + x5
328 DADDSP .2 B_lx3:B_lx2, B_lx7:B_lx6, B_lxh1_1:B_lxh0_1 ;xh0_1 = x2 + x6, xh1_1 = x3 + x7
330 DADDSP .1 A_lxh1_0:A_lxh0_0, A_lxh1_1:A_lxh0_1, A_lyt1:A_lyt0 ;yt0 =xh0_0 + xh0_1; yt1 =xh1_0 + xh1_1
331 DSUBSP .1 A_lxh1_0:A_lxh0_0, A_lxh1_1:A_lxh0_1, A_lyt5:A_lyt4 ;yt4 =xh0_0 - xh0_1, yt5 =xh1_0 - xh1_1
333 DADDSP .2 B_lxh1_0:B_lxh0_0, B_lxh1_1:B_lxh0_1, B_lyt1:B_lyt0 ;yt0 =xh0_0 + xh0_1, yt1 =xh1_0 + xh1_1
334 DSUBSP .2 B_lxh1_0:B_lxh0_0, B_lxh1_1:B_lxh0_1, B_lyt5:B_lyt4 ;yt4 =xh0_0 - xh0_1, yt5 =xh1_0 - xh1_1
336 DSUBSP .1 A_lx1:A_lx0, A_lx5:A_lx4, A_lxl1_0:A_lxl0_0 ;xl0_0 = x0 - x4, xl1_0 = x1 - x5
337 DSUBSP .1 A_lx3:A_lx2, A_lx7:A_lx6, A_lxl1_1:A_lxl0_1 ;xl0_1 = x2 - x6, xl1_1 = x3 - x7,
339 DSUBSP .2 B_lx1:B_lx0, B_lx5:B_lx4, B_lxl1_0:B_lxl0_0 ;xl0_0 = x0 - x4, xl1_0 = x1 - x5
340 DSUBSP .2 B_lx3:B_lx2, B_lx7:B_lx6, B_lxl1_1:B_lxl0_1 ;xl0_1 = x2 - x6, xl1_1 = x3 - x7
342 SUBSP .1 A_lxl0_0, A_lxl1_1, A_lyt2 ;yt2 = xl0_0 + xl1_1
343 ADDSP .1 A_lxl1_0, A_lxl0_1, A_lyt3 ;yt3 = xl1_0 - xl0_1
344 ADDSP .1 A_lxl0_0, A_lxl1_1, A_lyt6 ;yt6 = xl0_0 - xl1_1
345 SUBSP .1 A_lxl1_0, A_lxl0_1, A_lyt7 ;yt7 = xl1_0 + xl0_1
347 SUBSP .2 B_lxl0_0, B_lxl1_1, B_lyt2 ;yt2 = xl0_0 + xl1_1
348 ADDSP .2 B_lxl1_0, B_lxl0_1, B_lyt3 ;yt3 = xl1_0 - xl0_1
349 ADDSP .2 B_lxl0_0, B_lxl1_1, B_lyt6 ;yt6 = xl0_0 - xl1_1
350 SUBSP .2 B_lxl1_0, B_lxl0_1, B_lyt7 ;yt7 = xl1_0 + xl0_1
352 DMPYSP .1 A_lyt1:A_lyt0, A_scale1:A_scale0, A_lyt1:A_lyt0
353 DMPYSP .1 A_lyt3:A_lyt2, A_scale1:A_scale0, A_lyt3:A_lyt2
354 DMPYSP .1 A_lyt5:A_lyt4, A_scale1:A_scale0, A_lyt5:A_lyt4
355 DMPYSP .1 A_lyt7:A_lyt6, A_scale1:A_scale0, A_lyt7:A_lyt6
357 DMPYSP .2 B_lyt1:B_lyt0, A_scale1:A_scale0, B_lyt1:B_lyt0
358 DMPYSP .2 B_lyt3:B_lyt2, A_scale1:A_scale0, B_lyt3:B_lyt2
359 DMPYSP .2 B_lyt5:B_lyt4, A_scale1:A_scale0, B_lyt5:B_lyt4
360 DMPYSP .2 B_lyt7:B_lyt6, A_scale1:A_scale0, B_lyt7:B_lyt6
362 .if .LITTLE_ENDIAN
364 STDW .1 A_lyt1:A_lyt0, *A_ly1++[A_lnmax]
365 STDW .1 A_lyt3:A_lyt2, *A_ly1++[A_lnmax]
366 STDW .1 A_lyt5:A_lyt4, *A_ly1++[A_lnmax]
367 STDW .1 A_lyt7:A_lyt6, *A_ly1
369 STDW .2 B_lyt1:B_lyt0, *B_ly1++[B_lnmax]
370 STDW .2 B_lyt3:B_lyt2, *B_ly1++[B_lnmax]
371 STDW .2 B_lyt5:B_lyt4, *B_ly1++[B_lnmax]
372 STDW .2 B_lyt7:B_lyt6, *B_ly1
374 .else
376 STDW .1 A_lyt1:A_lyt0, *A_ly1++[A_lnmax]
377 STDW .1 A_lyt7:A_lyt6, *A_ly1++[A_lnmax]
378 STDW .1 A_lyt5:A_lyt4, *A_ly1++[A_lnmax]
379 STDW .1 A_lyt3:A_lyt2, *A_ly1
381 STDW .2 B_lyt1:B_lyt0, *B_ly1++[B_lnmax]
382 STDW .2 B_lyt7:B_lyt6, *B_ly1++[B_lnmax]
383 STDW .2 B_lyt5:B_lyt4, *B_ly1++[B_lnmax]
384 STDW .2 B_lyt3:B_lyt2, *B_ly1
386 .endif
388 [A_lctr]BDEC .1 LOOP4, A_lctr
389 B ENDFUNCTION
391 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
392 ; last stage of radix2 computation;
393 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
394 LAST_STAGE_RADIX2:
395 LOOP2: .trip 4
397 ;Bit reversal Caliculation
398 BITR .1 A_lj, A_ltemp1
399 SHRU .1 A_ltemp1, A_ll0, A_lk
400 MPY .1 4, A_lk, A_lk
401 ADD .1 A_ly0, A_lk, A_ly1 ;*y1 = *y0 + k
402 ADD .1 A_lj, 1, A_lj ;j = j+1
404 BITR .2 B_lj, B_ltemp1
405 SHRU .2 B_ltemp1, B_ll0, B_lk
406 MPY .2 4, B_lk, B_lk
407 ADD .2 B_ly0, B_lk, B_ly1 ;*y1 = *y0 + k
408 ADD .2 B_lj, 1, B_lj ;j = j+1
410 LDDW .1 *A_ptr_lx0++[2],A_lx1:A_lx0 ;load ptr_x0[0], ptr_x0[1]
411 LDDW .2 *B_ptr_lx0++[2],A_lx3:A_lx2 ;load ptr_x0[2], ptr_x0[3]
412 LDDW .1 *A_ptr_lx0++[2],A_lx5:A_lx4 ;load ptr_x0[4], ptr_x0[5]
413 LDDW .2 *B_ptr_lx0++[2],A_lx7:A_lx6 ;load ptr_x0[4], ptr_x0[5]
415 LDDW .1 *A_ptr_lx1++[2],B_lx1:B_lx0 ;load ptr_x0[0], ptr_x0[1]
416 LDDW .2 *B_ptr_lx1++[2],B_lx3:B_lx2 ;load ptr_x0[2], ptr_x0[3]
417 LDDW .1 *A_ptr_lx1++[2],B_lx5:B_lx4 ;load ptr_x0[4], ptr_x0[5]
418 LDDW .2 *B_ptr_lx1++[2],B_lx7:B_lx6 ;load ptr_x0[4], ptr_x0[5]
420 DADDSP .1 A_lx1:A_lx0, A_lx3:A_lx2, A_lyt1:A_lyt0 ;yt0 =x0 + x2, yt1 =x1 + x3
421 DSUBSP .1 A_lx1:A_lx0, A_lx3:A_lx2, A_lyt5:A_lyt4 ;yt4 =x0 - x2, yt5 =x1 - x3
423 DADDSP .2 B_lx1:B_lx0, B_lx3:B_lx2, B_lyt1:B_lyt0 ;yt0 =x0 + x2, yt1 =x1 + x3
424 DSUBSP .2 B_lx1:B_lx0, B_lx3:B_lx2, B_lyt5:B_lyt4 ;yt4 =x0 - x2, yt5 =x1 - x3
426 DADDSP .1 A_lx5:A_lx4, A_lx7:A_lx6, A_lyt3:A_lyt2 ;yt3 = x5 + x7, yt2 = x4 + x6
427 DSUBSP .1 A_lx5:A_lx4, A_lx7:A_lx6, A_lyt7:A_lyt6 ;yt7 = x5 - x7, yt6 = x4 - x6
429 DADDSP .2 B_lx5:B_lx4, B_lx7:B_lx6, B_lyt3:B_lyt2 ;yt3 = x5 + x7, yt2 = x4 + x6
430 DSUBSP .2 B_lx5:B_lx4, B_lx7:B_lx6, B_lyt7:B_lyt6 ;yt7 = x5 - x7, yt6 = x4 - x6
432 DMPYSP .1 A_lyt1:A_lyt0, A_scale1:A_scale0, A_lyt1:A_lyt0
433 DMPYSP .1 A_lyt3:A_lyt2, A_scale1:A_scale0, A_lyt3:A_lyt2
434 DMPYSP .1 A_lyt5:A_lyt4, A_scale1:A_scale0, A_lyt5:A_lyt4
435 DMPYSP .1 A_lyt7:A_lyt6, A_scale1:A_scale0, A_lyt7:A_lyt6
437 DMPYSP .2 B_lyt1:B_lyt0, A_scale1:A_scale0, B_lyt1:B_lyt0
438 DMPYSP .2 B_lyt3:B_lyt2, A_scale1:A_scale0, B_lyt3:B_lyt2
439 DMPYSP .2 B_lyt5:B_lyt4, A_scale1:A_scale0, B_lyt5:B_lyt4
440 DMPYSP .2 B_lyt7:B_lyt6, A_scale1:A_scale0, B_lyt7:B_lyt6
442 STDW .1 A_lyt1:A_lyt0, *A_ly1++[A_lnmax]
443 STDW .1 A_lyt3:A_lyt2, *A_ly1++[A_lnmax]
444 STDW .1 A_lyt5:A_lyt4, *A_ly1++[A_lnmax]
445 STDW .1 A_lyt7:A_lyt6, *A_ly1
447 STDW .2 B_lyt1:B_lyt0, *B_ly1++[B_lnmax]
448 STDW .2 B_lyt3:B_lyt2, *B_ly1++[B_lnmax]
449 STDW .2 B_lyt5:B_lyt4, *B_ly1++[B_lnmax]
450 STDW .2 B_lyt7:B_lyt6, *B_ly1
452 [A_lctr]BDEC .1 LOOP2, A_lctr
454 ENDFUNCTION:
455 .return
456 .endproc
458 * ======================================================================== *
459 * End of file: DSPF_sp_ifftSPxSP.sa *
460 * ------------------------------------------------------------------------ *
461 * Copyright (C) 2011 Texas Instruments, Incorporated. *
462 * All Rights Reserved. *
463 * ======================================================================== *