[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSPF_sp_fftSPxSP / c66 / DSPF_sp_fftSPxSP.sa
1 * ======================================================================= *
2 * DSPF_sp_fftSPxSP.sa -- Forward FFT with Mixed Radix *
3 * Linear ASM Implementation *
4 * *
5 * Rev 0.0.2 *
6 * *
7 * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ *
8 * *
9 * *
10 * Redistribution and use in source and binary forms, with or without *
11 * modification, are permitted provided that the following conditions *
12 * are met: *
13 * *
14 * Redistributions of source code must retain the above copyright *
15 * notice, this list of conditions and the following disclaimer. *
16 * *
17 * Redistributions in binary form must reproduce the above copyright *
18 * notice, this list of conditions and the following disclaimer in the *
19 * documentation and/or other materials provided with the *
20 * distribution. *
21 * *
22 * Neither the name of Texas Instruments Incorporated nor the names of *
23 * its contributors may be used to endorse or promote products derived *
24 * from this software without specific prior written permission. *
25 * *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR *
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT *
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *
32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, *
33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE *
36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
37 * *
38 * ======================================================================= *
39 .if __TI_EABI__
40 .asg DSPF_sp_fftSPxSP, _DSPF_sp_fftSPxSP
41 .endif
42 .sect ".text:optimized"
43 .global _DSPF_sp_fftSPxSP
45 _DSPF_sp_fftSPxSP .cproc A_n, B_ptr_x, A_ptr_w, B_ptr_y
46 .no_mdep
48 .reg B_w0
49 .reg A_j, A_w, A_x, A_y, A_h2, A_predj
50 .reg B_j, B_w, B_x, B_y, B_h2, B_2h2, B_fft_jmp, B_predj, B_n
52 .reg A_co1:A_si1, A_co2:A_si2, A_co3:A_si3
53 .reg B_co1:B_si1, B_co2:B_si2, B_co3:B_si3
55 .reg A_x1:A_x0, A_x_h2p:A_x_h2, A_x_l1p:A_x_l1, A_x_l2p:A_x_l2
56 .reg A_xh1:A_xh0, A_xl1:A_xl0, A_xh21:A_xh20, A_xl21:A_xl20
57 .reg B_x1:B_x0, B_x_h2p:B_x_h2, B_x_l1p:B_x_l1, B_x_l2p:B_x_l2
58 .reg B_xh1:B_xh0, B_xl1:B_xl0, B_xh21:B_xh20,B_xl21:B_xl20
60 .reg A_sum1, A_sum2
61 .reg A_sum3, A_sum4, A_xt1, A_yt1
62 .reg A_prod1,A_prod2,A_prod3,A_prod4
63 .reg A_sum6:A_sum5
64 .reg A_yt0:A_xt0
65 .reg A_prod8:A_prod7,A_prod6:A_prod5
66 .reg A_sum7, A_sum8, A_xt2, A_yt2
67 .reg A_prod9,A_prod10,A_prod11,A_prod12
69 .reg B_sum1, B_sum2
70 .reg B_sum3, B_sum4, B_xt1, B_yt1
71 .reg B_prod1,B_prod2,B_prod3,B_prod4
72 .reg B_sum6:B_sum5
73 .reg B_yt0:B_xt0
74 .reg B_prod8:B_prod7,B_prod6:B_prod5
75 .reg B_sum7, B_sum8, B_xt2, B_yt2
76 .reg B_prod9,B_prod10,B_prod11,B_prod12
78 .reg A_radix, A_i
79 .reg B_radix2, B_stride, B_tw_offset, B_i, B_while, B_temp
80 .reg B_const6, A_const48, A_const16
81 .reg A_y_, B_y_
82 .reg B_ll0, A_ll0
83 .reg A_lj, A_lk, A_ltemp1
84 .reg B_lj, B_lk, B_ltemp1
85 .reg A_ptr_lx0, B_ptr_lx0
86 .reg A_ptr_lx1, B_ptr_lx1
87 .reg A_ly0, A_ly1, B_ly0, B_ly1
88 .reg A_lnmax, B_lnmax
89 .reg A_lctr
91 .reg A_lx1:A_lx0
92 .reg A_lx3:A_lx2
93 .reg A_lx5:A_lx4
94 .reg A_lx7:A_lx6
95 .reg B_lx1:B_lx0
96 .reg B_lx3:B_lx2
97 .reg B_lx5:B_lx4
98 .reg B_lx7:B_lx6
100 .reg A_lxh0_0, A_lxh1_0, A_lxh0_1, A_lxh1_1
101 .reg B_lxh0_0, B_lxh1_0, B_lxh0_1, B_lxh1_1
102 .reg A_lyt0, A_lyt1, A_lyt4, A_lyt5
103 .reg B_lyt0, B_lyt1, B_lyt4, B_lyt5
104 .reg A_lxl0_0, A_lxl1_0, A_lxl0_1, A_lxl1_1
105 .reg B_lxl0_0, B_lxl1_0, B_lxl0_1, B_lxl1_1
106 .reg A_lyt2, A_lyt3, A_lyt6, A_lyt7
107 .reg B_lyt2, B_lyt3, B_lyt6, B_lyt7
108 .reg A_r2flag
110 MVK .1 4, A_radix
111 MV .2 A_n, B_n
112 MV .2 B_n, B_stride
113 ZERO .2 B_tw_offset
114 MVK .2 6, B_const6
115 MVK .1 48, A_const48
116 MVK .1 16, A_const16
118 OLOOP
119 ZERO .1 A_j
120 ZERO .2 B_j
121 MPY32 .2 B_const6, B_stride, B_fft_jmp
122 SHRU .2 B_stride, 2, B_h2
123 MV .1 B_h2, A_h2
124 ADD .1 B_ptr_x, -16, A_x
125 ADD .1 B_ptr_x, -16, A_y
126 ADD .2 A_ptr_w, B_tw_offset, B_w0
127 ADD .2 B_tw_offset, B_fft_jmp, B_tw_offset
128 SHRU .2 B_stride, 2, B_stride
129 SHRU .2 B_n, 3, B_i
130 SUB .2 B_i, 1, B_i
132 ILOOP: ; .trip 4
133 ADD .2 B_w0, B_j, B_w
134 MVD .1 B_w, A_w
136 LDDW .1 *A_w[0], A_co1:A_si1 ;Load si1, co1
137 LDDW .1 *A_w[1], A_co2:A_si2 ;Load si2, co2
138 LDDW .1 *A_w[2], A_co3:A_si3 ;Load si3, co3
139 LDDW .2 *B_w[3], B_co1:B_si1 ;Load si1, co1
140 LDDW .2 *B_w[4], B_co2:B_si2 ;Load si2, co2
141 LDDW .2 *B_w[5], B_co3:B_si3 ;Load si3, co3
143 MVD .2 A_x, B_x ;x_copy =x
144 LDDW .1 *++A_x[2], A_x1:A_x0 ;Load x1, x0
145 LDDW .1 *++A_x[A_h2], A_x_h2p:A_x_h2 ;Load x_h2p, x_h2
146 LDDW .1 *++A_x[A_h2], A_x_l1p:A_x_l1 ;Load x_l1p, x_l1
147 LDDW .1 *A_x[A_h2], A_x_l2p:A_x_l2 ;Load x_l2p, x_l2
149 LDDW .2 *++B_x[3], B_x1:B_x0 ;Load x1, x0
150 LDDW .2 *++B_x[B_h2], B_x_h2p:B_x_h2 ;Load x_h2p, x_h2
151 LDDW .2 *++B_x[B_h2], B_x_l1p:B_x_l1 ;Load x_l1p, x_l1
152 LDDW .2 *B_x[B_h2], B_x_l2p:B_x_l2 ;Load x_l2p, x_l2
154 DADDSP .L1 A_x1:A_x0, A_x_l1p:A_x_l1, A_xh1:A_xh0
155 DSUBSP .1 A_x1:A_x0, A_x_l1p:A_x_l1, A_xl1:A_xl0
156 DADDSP .1 A_x_h2p:A_x_h2, A_x_l2p:A_x_l2, A_xh21:A_xh20
157 DSUBSP .1 A_x_h2p:A_x_h2, A_x_l2p:A_x_l2, A_xl21:A_xl20
159 DADDSP .L2 B_x1:B_x0, B_x_l1p:B_x_l1, B_xh1:B_xh0
160 DSUBSP .2 B_x1:B_x0, B_x_l1p:B_x_l1, B_xl1:B_xl0
161 DADDSP .2 B_x_h2p:B_x_h2, B_x_l2p:B_x_l2, B_xh21:B_xh20
162 DSUBSP .2 B_x_h2p:B_x_h2, B_x_l2p:B_x_l2, B_xl21:B_xl20
164 ROTL .2 B_h2, 4, B_2h2
165 SUB .1 A_x, B_2h2, A_x
167 ADD .1 A_j, A_const48, A_j ;j += 6
168 SUB .1 A_j, B_fft_jmp, A_predj ;predj = j - fft_jmp
169 [!A_predj]ADD .1 A_x, B_fft_jmp, A_x ;*x = *x + fft_jmp
170 [!A_predj]ZERO .1 A_j ;j=0
172 MVD .1 A_y, A_y_
173 MVD .2 A_y, B_y_
175 DADDSP .1 A_xh1:A_xh0, A_xh21:A_xh20, A_sum2:A_sum1
176 STDW .1 A_sum2:A_sum1, *++A_y_[2]
178 DSUBSP .1 A_xh1:A_xh0, A_xh21:A_xh20, A_yt0:A_xt0
179 CMPYSP .1 A_yt0:A_xt0, A_co2:A_si2, A_prod8:A_prod7:A_prod6:A_prod5
180 DADDSP .1 A_prod8:A_prod7,A_prod6:A_prod5, A_sum6:A_sum5
181 STDW .1 A_sum6:A_sum5, *++A_y_[A_h2]
183 .if .LITTLE_ENDIAN
185 SUBSP .L1 A_xl1, A_xl20, A_yt1 ;yt1 = xl1 - xl20
186 ADDSP .L1 A_xl0, A_xl21, A_xt1 ;xt1 = xl0 + xl21
187 CMPYSP .1 A_yt1:A_xt1, A_co1:A_si1, A_prod4:A_prod3:A_prod2:A_prod1
188 DADDSP .1 A_prod4:A_prod3,A_prod2:A_prod1, A_sum4:A_sum3
189 STDW .1 A_sum4:A_sum3, *++A_y_[A_h2]
191 ADDSP .L1 A_xl1, A_xl20, A_yt2 ;yt2 = xl1 + xl20
192 SUBSP .L1 A_xl0, A_xl21, A_xt2 ;xt2 = xl0 - xl21
193 CMPYSP .1 A_yt2:A_xt2, A_co3:A_si3, A_prod12:A_prod11:A_prod10:A_prod9
194 DADDSP .1 A_prod12:A_prod11, A_prod10:A_prod9, A_sum8:A_sum7
195 STDW .1 A_sum8:A_sum7, *A_y_[A_h2]
197 .else
199 SUBSP .L1 A_xl0, A_xl21, A_xt1 ;xt1 = xl0 - xl21
200 ADDSP .L1 A_xl1, A_xl20, A_yt1 ;yt1 = xl1 + xl20
201 CMPYSP .1 A_yt1:A_xt1, A_co1:A_si1, A_prod4:A_prod3:A_prod2:A_prod1
202 DADDSP .1 A_prod4:A_prod3,A_prod2:A_prod1, A_sum4:A_sum3
203 STDW .1 A_sum4:A_sum3, *++A_y_[A_h2]
205 ADDSP .L1 A_xl0, A_xl21, A_xt2 ;xt2 = xl0 + xl21
206 SUBSP .L1 A_xl1, A_xl20, A_yt2 ;yt2 = xl1 - xl20
207 CMPYSP .1 A_yt2:A_xt2, A_co3:A_si3, A_prod12:A_prod11:A_prod10:A_prod9
208 DADDSP .1 A_prod12:A_prod11, A_prod10:A_prod9, A_sum8:A_sum7
209 STDW .1 A_sum8:A_sum7, *A_y_[A_h2]
211 .endif
213 DADDSP .2 B_xh1:B_xh0, B_xh21:B_xh20, B_sum2:B_sum1
214 STDW .2 B_sum2:B_sum1, *++B_y_[3]
216 DSUBSP .2 B_xh1:B_xh0, B_xh21:B_xh20, B_yt0:B_xt0
217 CMPYSP .2 B_yt0:B_xt0, B_co2:B_si2, B_prod8:B_prod7:B_prod6:B_prod5
218 DADDSP .2 B_prod8:B_prod7,B_prod6:B_prod5, B_sum6:B_sum5
219 STDW .2 B_sum6:B_sum5, *++B_y_[B_h2]
221 .if .LITTLE_ENDIAN
223 SUBSP .L2 B_xl1, B_xl20, B_yt1 ;yt1 = xl1 - xl20
224 ADDSP .L2 B_xl0, B_xl21, B_xt1 ;xt1 = xl0 + xl21
225 CMPYSP .2 B_yt1:B_xt1, B_co1:B_si1, B_prod4:B_prod3:B_prod2:B_prod1
226 DADDSP .2 B_prod4:B_prod3,B_prod2:B_prod1, B_sum4:B_sum3
227 STDW .2 B_sum4:B_sum3, *++B_y_[B_h2]
229 ADDSP .L2 B_xl1, B_xl20, B_yt2 ;yt2 = xl1 + xl20
230 SUBSP .L2 B_xl0, B_xl21, B_xt2 ;xt2 = xl0 - xl21
231 CMPYSP .2 B_yt2:B_xt2, B_co3:B_si3, B_prod12:B_prod11:B_prod10:B_prod9
232 DADDSP .2 B_prod12:B_prod11, B_prod10:B_prod9, B_sum8:B_sum7
233 STDW .2 B_sum8:B_sum7, *B_y_[B_h2]
235 .else
237 SUBSP .L2 B_xl0, B_xl21, B_xt1 ;xt1 = xl0 - xl21
238 ADDSP .L2 B_xl1, B_xl20, B_yt1 ;yt1 = xl1 + xl20
239 CMPYSP .2 B_yt1:B_xt1, B_co1:B_si1, B_prod4:B_prod3:B_prod2:B_prod1
240 DADDSP .2 B_prod4:B_prod3,B_prod2:B_prod1, B_sum4:B_sum3
241 STDW .2 B_sum4:B_sum3, *++B_y_[B_h2]
243 ADDSP .L2 B_xl0, B_xl21, B_xt2 ;xt2 = xl0 + xl21
244 SUBSP .L2 B_xl1, B_xl20, B_yt2 ;yt2 = xl1 - xl20
245 CMPYSP .2 B_yt2:B_xt2, B_co3:B_si3, B_prod12:B_prod11:B_prod10:B_prod9
246 DADDSP .2 B_prod12:B_prod11, B_prod10:B_prod9, B_sum8:B_sum7
247 STDW .2 B_sum8:B_sum7, *B_y_[B_h2]
249 .endif
251 ADD .1 A_y, A_const16, A_y
252 ADD .2 B_j, A_const48, B_j
253 SUB .2 B_j, B_fft_jmp, B_predj
254 [!B_predj]ADD .1 A_y, B_fft_jmp, A_y
255 [!B_predj]ZERO .2 B_j
257 [B_i] BDEC .2 ILOOP, B_i
259 CMPGTU .2 B_stride, A_radix, B_while
260 [B_while]B .2 OLOOP
262 ; find the radix of the fft
263 MVK .1 4, A_radix
264 NORM .2 A_n, B_radix2
265 AND .2 B_radix2, 1, B_radix2
266 [B_radix2]MVK .1 2, A_radix
268 ZERO .1 A_lj
269 SHR .2 A_n, 3, B_lj
270 MV .1 B_ptr_x, A_ptr_lx0 ;ptr_x0 = ptr_x
271 ADD .2 B_ptr_x, 8, B_ptr_lx0
272 MV .1 A_n, A_ptr_lx1
273 ADDAW .1 B_ptr_x, A_ptr_lx1, A_ptr_lx1
274 ADD .2 A_ptr_lx1, 8, B_ptr_lx1
275 MV .1 B_ptr_y, A_ly0 ;y0 = ptr_y
276 MV .2 B_ptr_y, B_ly0 ;y0 = ptr_y
278 ;get size of fft -> l0 = _norm(n_max) - 17
279 NORM .1 A_n, A_ll0 ;l0 =_NORM(n_max)
280 ADD .1 A_ll0, 3, A_ll0 ;l0 += 3
281 MV .2 A_ll0, B_ll0
282 SHR .2 A_n, 2, B_lnmax ;nmax >>= 2
283 SHR .1 A_n, 2, A_lnmax ;nmax >>= 2
284 SHR .1 A_n, 3, A_lctr ;set loop Counter
285 SUB .1 A_lctr, 1, A_lctr
286 CMPEQ .1 A_radix, 2, A_r2flag ;Check whether radix ==2
288 [A_r2flag]B LAST_STAGE_RADIX2
290 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
291 ; last stage of radix4 computation ;
292 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
293 ;Bit reversal Caliculation
294 LOOP4: ; .trip 4
296 BITR .1 A_lj, A_ltemp1
297 SHRU .1 A_ltemp1, A_ll0, A_lk
298 MPY .1 4, A_lk, A_lk
299 ADD .1 A_ly0, A_lk, A_ly1 ;*y1 = *y0 + k
300 ADD .1 A_lj, 1, A_lj ;j = j+1
302 BITR .2 B_lj, B_ltemp1
303 SHRU .2 B_ltemp1, B_ll0, B_lk
304 MPY .2 4, B_lk, B_lk
305 ADD .2 B_ly0, B_lk, B_ly1 ;*y1 = *y0 + k
306 ADD .2 B_lj, 1, B_lj ;j = j+1
308 LDDW .1 *A_ptr_lx0++[2],A_lx1:A_lx0 ;load ptr_x0[0], ptr_x0[1]
309 LDDW .2 *B_ptr_lx0++[2],A_lx3:A_lx2 ;load ptr_x0[2], ptr_x0[3]
310 LDDW .1 *A_ptr_lx0++[2],A_lx5:A_lx4 ;load ptr_x0[4], ptr_x0[5]
311 LDDW .2 *B_ptr_lx0++[2],A_lx7:A_lx6 ;load ptr_x0[4], ptr_x0[5]
313 LDDW .1 *A_ptr_lx1++[2],B_lx1:B_lx0 ;load ptr_x0[0], ptr_x0[1]
314 LDDW .2 *B_ptr_lx1++[2],B_lx3:B_lx2 ;load ptr_x0[2], ptr_x0[3]
315 LDDW .1 *A_ptr_lx1++[2],B_lx5:B_lx4 ;load ptr_x0[4], ptr_x0[5]
316 LDDW .2 *B_ptr_lx1++[2],B_lx7:B_lx6 ;load ptr_x0[4], ptr_x0[5]
318 DADDSP .1 A_lx1:A_lx0, A_lx5:A_lx4, A_lxh1_0:A_lxh0_0 ;xh0_0 = x0 + x4, xh1_0 = x1 + x5
319 DADDSP .1 A_lx3:A_lx2, A_lx7:A_lx6, A_lxh1_1:A_lxh0_1 ;xh0_1 = x2 + x6, xh1_1 = x3 + x7
321 DADDSP .2 B_lx1:B_lx0, B_lx5:B_lx4, B_lxh1_0:B_lxh0_0 ;xh0_0 = x0 + x4, xh1_0 = x1 + x5
322 DADDSP .2 B_lx3:B_lx2, B_lx7:B_lx6, B_lxh1_1:B_lxh0_1 ;xh0_1 = x2 + x6, xh1_1 = x3 + x7
324 DADDSP .1 A_lxh1_0:A_lxh0_0, A_lxh1_1:A_lxh0_1, A_lyt1:A_lyt0 ;yt0 =xh0_0 + xh0_1; yt1 =xh1_0 + xh1_1
325 DSUBSP .1 A_lxh1_0:A_lxh0_0, A_lxh1_1:A_lxh0_1, A_lyt5:A_lyt4 ;yt4 =xh0_0 - xh0_1, yt5 =xh1_0 - xh1_1
327 DADDSP .2 B_lxh1_0:B_lxh0_0, B_lxh1_1:B_lxh0_1, B_lyt1:B_lyt0 ;yt0 =xh0_0 + xh0_1, yt1 =xh1_0 + xh1_1
328 DSUBSP .2 B_lxh1_0:B_lxh0_0, B_lxh1_1:B_lxh0_1, B_lyt5:B_lyt4 ;yt4 =xh0_0 - xh0_1, yt5 =xh1_0 - xh1_1
330 DSUBSP .1 A_lx1:A_lx0, A_lx5:A_lx4, A_lxl1_0:A_lxl0_0 ;xl0_0 = x0 - x4, xl1_0 = x1 - x5
331 DSUBSP .1 A_lx3:A_lx2, A_lx7:A_lx6, A_lxl1_1:A_lxl0_1 ;xl0_1 = x2 - x6, xl1_1 = x3 - x7,
333 DSUBSP .2 B_lx1:B_lx0, B_lx5:B_lx4, B_lxl1_0:B_lxl0_0 ;xl0_0 = x0 - x4, xl1_0 = x1 - x5
334 DSUBSP .2 B_lx3:B_lx2, B_lx7:B_lx6, B_lxl1_1:B_lxl0_1 ;xl0_1 = x2 - x6, xl1_1 = x3 - x7
336 SUBSP .1 A_lxl0_0, A_lxl1_1, A_lyt2 ;yt2 = xl0_0 + xl1_1
337 ADDSP .1 A_lxl1_0, A_lxl0_1, A_lyt3 ;yt3 = xl1_0 - xl0_1
338 ADDSP .1 A_lxl0_0, A_lxl1_1, A_lyt6 ;yt6 = xl0_0 - xl1_1
339 SUBSP .1 A_lxl1_0, A_lxl0_1, A_lyt7 ;yt7 = xl1_0 + xl0_1
341 SUBSP .2 B_lxl0_0, B_lxl1_1, B_lyt2 ;yt2 = xl0_0 + xl1_1
342 ADDSP .2 B_lxl1_0, B_lxl0_1, B_lyt3 ;yt3 = xl1_0 - xl0_1
343 ADDSP .2 B_lxl0_0, B_lxl1_1, B_lyt6 ;yt6 = xl0_0 - xl1_1
344 SUBSP .2 B_lxl1_0, B_lxl0_1, B_lyt7 ;yt7 = xl1_0 + xl0_1
346 .if .LITTLE_ENDIAN
348 STDW .1 A_lyt1:A_lyt0, *A_ly1++[A_lnmax]
349 STDW .1 A_lyt7:A_lyt6, *A_ly1++[A_lnmax]
350 STDW .1 A_lyt5:A_lyt4, *A_ly1++[A_lnmax]
351 STDW .1 A_lyt3:A_lyt2, *A_ly1
353 STDW .2 B_lyt1:B_lyt0, *B_ly1++[B_lnmax]
354 STDW .2 B_lyt7:B_lyt6, *B_ly1++[B_lnmax]
355 STDW .2 B_lyt5:B_lyt4, *B_ly1++[B_lnmax]
356 STDW .2 B_lyt3:B_lyt2, *B_ly1
358 .else
360 STDW .1 A_lyt1:A_lyt0, *A_ly1++[A_lnmax]
361 STDW .1 A_lyt3:A_lyt2, *A_ly1++[A_lnmax]
362 STDW .1 A_lyt5:A_lyt4, *A_ly1++[A_lnmax]
363 STDW .1 A_lyt7:A_lyt6, *A_ly1
365 STDW .2 B_lyt1:B_lyt0, *B_ly1++[B_lnmax]
366 STDW .2 B_lyt3:B_lyt2, *B_ly1++[B_lnmax]
367 STDW .2 B_lyt5:B_lyt4, *B_ly1++[B_lnmax]
368 STDW .2 B_lyt7:B_lyt6, *B_ly1
370 .endif
372 [A_lctr]BDEC .1 LOOP4, A_lctr
373 B ENDFUNCTION
375 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
376 ; last stage of radix2 computation;
377 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
378 LAST_STAGE_RADIX2:
379 LOOP2: ; .trip 4
381 ;Bit reversal Caliculation
382 BITR .1 A_lj, A_ltemp1
383 SHRU .1 A_ltemp1, A_ll0, A_lk
384 MPY .1 4, A_lk, A_lk
385 ADD .1 A_ly0, A_lk, A_ly1 ;*y1 = *y0 + k
386 ADD .1 A_lj, 1, A_lj ;j = j+1
388 BITR .2 B_lj, B_ltemp1
389 SHRU .2 B_ltemp1, B_ll0, B_lk
390 MPY .2 4, B_lk, B_lk
391 ADD .2 B_ly0, B_lk, B_ly1 ;*y1 = *y0 + k
392 ADD .2 B_lj, 1, B_lj ;j = j+1
394 LDDW .1 *A_ptr_lx0++[2],A_lx1:A_lx0 ;load ptr_x0[0], ptr_x0[1]
395 LDDW .2 *B_ptr_lx0++[2],A_lx3:A_lx2 ;load ptr_x0[2], ptr_x0[3]
396 LDDW .1 *A_ptr_lx0++[2],A_lx5:A_lx4 ;load ptr_x0[4], ptr_x0[5]
397 LDDW .2 *B_ptr_lx0++[2],A_lx7:A_lx6 ;load ptr_x0[4], ptr_x0[5]
399 LDDW .1 *A_ptr_lx1++[2],B_lx1:B_lx0 ;load ptr_x0[0], ptr_x0[1]
400 LDDW .2 *B_ptr_lx1++[2],B_lx3:B_lx2 ;load ptr_x0[2], ptr_x0[3]
401 LDDW .1 *A_ptr_lx1++[2],B_lx5:B_lx4 ;load ptr_x0[4], ptr_x0[5]
402 LDDW .2 *B_ptr_lx1++[2],B_lx7:B_lx6 ;load ptr_x0[4], ptr_x0[5]
404 DADDSP .1 A_lx1:A_lx0, A_lx3:A_lx2, A_lyt1:A_lyt0 ;yt0 =x0 + x2, yt1 =x1 + x3
405 DSUBSP .1 A_lx1:A_lx0, A_lx3:A_lx2, A_lyt5:A_lyt4 ;yt4 =x0 - x2, yt5 =x1 - x3
407 DADDSP .2 B_lx1:B_lx0, B_lx3:B_lx2, B_lyt1:B_lyt0 ;yt0 =x0 + x2, yt1 =x1 + x3
408 DSUBSP .2 B_lx1:B_lx0, B_lx3:B_lx2, B_lyt5:B_lyt4 ;yt4 =x0 - x2, yt5 =x1 - x3
410 DADDSP .1 A_lx5:A_lx4, A_lx7:A_lx6, A_lyt3:A_lyt2 ;yt3 = x5 + x7, yt2 = x4 + x6
411 DSUBSP .1 A_lx5:A_lx4, A_lx7:A_lx6, A_lyt7:A_lyt6 ;yt7 = x5 - x7, yt6 = x4 - x6
413 DADDSP .2 B_lx5:B_lx4, B_lx7:B_lx6, B_lyt3:B_lyt2 ;yt3 = x5 + x7, yt2 = x4 + x6
414 DSUBSP .2 B_lx5:B_lx4, B_lx7:B_lx6, B_lyt7:B_lyt6 ;yt7 = x5 - x7, yt6 = x4 - x6
416 STDW .1 A_lyt1:A_lyt0, *A_ly1++[A_lnmax]
417 STDW .1 A_lyt3:A_lyt2, *A_ly1++[A_lnmax]
418 STDW .1 A_lyt5:A_lyt4, *A_ly1++[A_lnmax]
419 STDW .1 A_lyt7:A_lyt6, *A_ly1
421 STDW .2 B_lyt1:B_lyt0, *B_ly1++[B_lnmax]
422 STDW .2 B_lyt3:B_lyt2, *B_ly1++[B_lnmax]
423 STDW .2 B_lyt5:B_lyt4, *B_ly1++[B_lnmax]
424 STDW .2 B_lyt7:B_lyt6, *B_ly1
426 [A_lctr]BDEC .1 LOOP2, A_lctr
428 ENDFUNCTION:
429 .return
430 .endproc
432 * ======================================================================== *
433 * End of file: DSPF_sp_fftSPxSP.sa *
434 * ------------------------------------------------------------------------ *
435 * Copyright (C) 2011 Texas Instruments, Incorporated. *
436 * All Rights Reserved. *
437 * ======================================================================== *