[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSPF_sp_cfftr2_dit / c674 / DSPF_sp_cfftr2_dit.s
1 * ======================================================================= *
2 * DSPF_sp_cfftr2_dit.asm -- Forward FFT with Radix 2 and DIT *
3 * Legacy ASM Implementation from C67x DSPLIB *
4 * *
5 * Rev 0.0.2 *
6 * *
7 * Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ *
8 * *
9 * *
10 * Redistribution and use in source and binary forms, with or without *
11 * modification, are permitted provided that the following conditions *
12 * are met: *
13 * *
14 * Redistributions of source code must retain the above copyright *
15 * notice, this list of conditions and the following disclaimer. *
16 * *
17 * Redistributions in binary form must reproduce the above copyright *
18 * notice, this list of conditions and the following disclaimer in the *
19 * documentation and/or other materials provided with the *
20 * distribution. *
21 * *
22 * Neither the name of Texas Instruments Incorporated nor the names of *
23 * its contributors may be used to endorse or promote products derived *
24 * from this software without specific prior written permission. *
25 * *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR *
29 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT *
30 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *
31 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *
32 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, *
33 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *
34 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *
35 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE *
36 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
37 * *
38 * ======================================================================= *
40 .asg B8, B_x
41 .asg A5, A_w
42 .asg B7, B_x2mp1
43 .asg B6, B_x2m
44 .asg A7, A_si
45 .asg A6, A_co
46 .asg B0, B_lx2mc
47 .asg B4, B_8n2
48 .asg B13, B_n2
49 .asg A13, A_p1
50 .asg B11, B_p2
51 .asg B3, B_p4
52 .asg A15, A_p3
53 .asg B2, B_lx2iac
54 .asg A3, A_x
55 .asg A10, A_8n2
56 .asg A12, A_rtemp
57 .asg B10, B_itemp
58 .asg A1, A_stcnt
59 .asg A15, A_x2ias
60 .asg B3, B_x2mp1s
61 .asg A15, A_x2ms
62 .asg B3, B_x2iap1s
63 .asg A9, A_x2iap1
64 .asg A8, A_x2ia
65 .asg B1, B_hafn2
66 .asg A2, A_nby2
67 .asg A4, A_wbase
70 .sect ".text:optimized"
71 .if __TI_EABI__
72 .asg DSPF_sp_cfftr2_dit, _DSPF_sp_cfftr2_dit
73 .endif
75 .global _DSPF_sp_cfftr2_dit
76 _DSPF_sp_cfftr2_dit:
78 ; push all the registers (also CSR, IRP )
79 SUBAW .D2 B15, 16, B15
80 || B .S1 no_int
81 || MVC .S2 CSR, B1
83 MV .S1X B15, A1
84 || STW .D2T1 A10, *B15[0]
86 STW .D2T2 B1, *B15[2]
87 || STW .D1T1 A11, *A1[1]
89 STW .D2T2 B11, *B15[4]
90 || STW .D1T1 A12, *A1[3]
91 || AND .L2 B1, -2, B1 ; disable interrupts
92 || MVC .S2 IRP, B2
94 STW .D2T2 B2, *B15[6]
95 || STW .D1T1 A13, *A1[5]
96 || MVC .S2 B1, CSR
98 STW .D2T2 B13, *B15[8]
99 || STW .D1T1 A14, *A1[7]
101 no_int:
103 STW .D2T2 B14, *B15[10]
104 || STW .D1T1 A15, *A1[9]
106 STW .D2T2 B3, *B15[12]
107 || MVC .S2 B15, IRP
109 ADDAW .D1 A4, A6, A3 ; init x[2m] ptr
110 || SHR .S2X A6, 1, B_n2 ; init n2
111 || MV .L1X B4, A_w ; init w ptr
112 || STW .D2T2 B10, *B15[13] ; push b10
114 MV .S2X A3, B_x ; transfer x[2m] ptr
115 || SHL .S1 A6, 2, A_8n2 ; keep 8n2 for addr incr
116 || SUB .L2 B_n2, 6, B15 ; for inner loop cntr
117 || STW .D2T2 B12, *B15[11] ; push b12
119 * ====================== PIPED LOOP PROLOG ======================================= *
121 LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
122 || LDDW .D1 *A_w++, A_si:A_co ; load si:co
123 || MV .S2 B_x, B5 ; init x[2m] store ptr
125 MV .L1 A4, A11 ; init x[2ia] store ptr
126 || SHL .S2X A6, 2, B_8n2 ; copy of 8n2 on b-side
127 || MV B_n2, B_lx2mc ; load cntr for x[2m] loads
129 [B_lx2mc]SUB .L2 B_lx2mc, 1, B_lx2mc ; decr x[2m] load cntr
130 || MV .S1 A4, A_x ; f xx2 = x
131 || MV .S2X A4, B14 ; save base x ptr
132 || MV .D1 A4, A0 ; save base x ptr
133 || SUB .L1 A_w, 8, A_wbase ; save w base ptr
135 MV .D2 B_n2, B_lx2iac ; init x[2ia] load cntr
136 || MV .L1X B_n2, A_stcnt ; init store cntr
137 || SHR .S2 B_n2, 1, B_hafn2 ; init half of n2
139 LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
141 MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
142 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
144 [B_lx2mc]SUB .S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
145 || MV .L1X B15, A_nby2 ; init loop cntr
147 MPYSP .M1X A_si, B_x2mp1, A_p3 ; p3=si*x[2m+1]
148 || MPYSP .M2X A_si, B_x2m, B_p4 ; p4=si*x[2m]
150 LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
152 MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
153 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
155 [B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
157 LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; load x[2ia+1]:x[2ia]
158 || MPYSP .M1X A_si, B_x2mp1, A_p3 ; p3=si*x[2m+1]
159 || MPYSP .M2X A_si, B_x2m, B_p4 ; p4=si*x[2m]
160 || ADDSP .L1 A_p1, A_p3, A_rtemp ; rtemp=p1+p3
161 || SUBSP .L2 B_p2, B_p4, B_itemp ; itemp=p2-p4
163 LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
164 ||[B_lx2iac]SUB.S2 B_lx2iac, 1, B_lx2iac ; decr load cntr
166 MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
167 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
169 [B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
171 LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; load x[2ia+1]:x[2ia]
172 || MPYSP .M1X A_si, B_x2mp1, A_p3 ; p3=si*x[2m+1]
173 || MPYSP .M2X A_si, B_x2m, B_p4 ; p4=si*x[2m]
174 || ADDSP .L1 A_p1, A_p3, A_rtemp ; rtemp=p1+p3
175 || SUBSP .L2 B_p2, B_p4, B_itemp ; itemp=p2-p4
177 LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
178 ||[B_lx2iac]SUB.S2 B_lx2iac, 1, B_lx2iac ; decr load cntr
179 || ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; x[2ia]=x[2ia]+rtemp
180 || SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; x[2m+1]=x[2ia+1]-itemp
182 MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
183 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
184 || B .S2 loop
186 [B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
187 || SUBSP .L1 A_x2ia, A_rtemp, A_x2ms ; x[2m]=x[2ia]-rtemp
188 || ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; x[2ia+1]=x[2ia+1]+itemp
190 * ================== PIPED LOOP KERNEL ======================================== *
191 loop:
193 LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; load x[2ia+1]:x[2ia]
194 || MPYSP .M1X A_si, B_x2mp1, A_p3 ; p3=si*x[2m+1]
195 || MPYSP .M2X A_si, B_x2m, B_p4 ; p4=si*x[2m]
196 ||[!B_lx2mc]ADD.D2 B_x, B_8n2, B_x ; incr load ptr if required
197 || ADDSP .L1 A_p1, A_p3, A_rtemp ; rtemp=p1+p3
198 || SUBSP .L2 B_p2, B_p4, B_itemp ; itemp=p2-p4
199 ||[!A_stcnt]ADD.S2 B5, B_8n2, B5 ; incr store ptr if required
200 ||[!A_stcnt]ADD.S1 A11, A_8n2, A11 ; incr store ptr if required
202 LDDW .D2 *B_x++, B_x2mp1:B_x2m ; load x[2m+1]:x[2m]
203 ||[!B_lx2mc]LDDW.D1 *A_w++, A_si:A_co ; load si:co
204 ||[!B_lx2mc]MPY .M2 1, B_n2, B_lx2mc ; reset load cntr
205 ||[B_lx2iac]SUB .S2 B_lx2iac, 1, B_lx2iac ; decr load cntr
206 || ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; x[2ia]=x[2ia]+rtemp
207 || SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; x[2m+1]=x[2ia+1]-itemp
208 ||[A_nby2]SUB .S1 A_nby2, 1, A_nby2 ; decr loop cntr
209 ||[!A_stcnt]MPY.M1X 1, B_n2, A_stcnt ; reset store cntr
211 MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
212 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
213 ||[!B_lx2iac]ADD.S1 A_x, A_8n2, A_x ; incr load ptr if required
214 ||[A_nby2]B .S2 loop ; Branch loop
215 || STW .D1T1 A_x2ias, *A11++ ; store x[2ia]
216 || STW .D2T2 B_x2mp1s, *B5[1] ; store x[2m+1]
218 [B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
219 ||[!B_lx2iac]MPY.M2 1, B_n2, B_lx2iac ; decr load cntr
220 || SUBSP .L1 A_x2ia, A_rtemp, A_x2ms ; x[2m]=x[2ia]-rtemp
221 || ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; x[2ia+1]=x[2ia+1]+itemp
222 || STW .D1 B_x2iap1s, *A11++ ; store x[2ia+1]
223 || STW .D2 A_x2ms, *B5++[2] ; store x[2m]
224 ||[A_stcnt]SUB.S1 A_stcnt, 1, A_stcnt ; decr store cntr
226 * ======================= END OF PIPED LOOP KERNEL ========================== *
228 LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; (e) load x[2ia+1]:x[2ia]
229 || MPYSP .M1X A_si, B_x2mp1, A_p3 ; (e) p3=si*x[2m+1]
230 || MPYSP .M2X A_si, B_x2m, B_p4 ; (e) p4=si*x[2m]
231 ||[!B_lx2mc]ADDAW.D2 B14, B_n2, B_x ; (p) init B_x for outer loop
232 || ADDSP .L1 A_p1, A_p3, A_rtemp ; (e) rtemp=p1+p3
233 || SUBSP .L2 B_p2, B_p4, B_itemp ; (e) itemp=p2-p4
234 ||[!A_stcnt]ADD.S2 B5, B_8n2, B5 ; (e) incr store ptr if required
235 ||[!A_stcnt]ADD.S1 A11, A_8n2, A11 ; (e) incr store ptr if required
237 ; loads are predicated with B_hafn2 so that the last prolog does not
238 ; perform invalid loads
240 [B_hafn2] LDDW.D2 *B_x++, B_x2mp1:B_x2m ; (p) load x[2m+1]:x[2m]
241 ||[!B_lx2mc]LDDW.D1 *A4, A_si:A_co ; (p) load si:co
242 ||[!B_lx2mc]MPY .M2 1, B_hafn2, B_lx2mc ; (p) reset load cntr
243 ||[B_lx2iac]SUB .S2 B_lx2iac, 1, B_lx2iac ; (p) decr load cntr
244 || ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; (e) x[2ia]=x[2ia]+rtemp
245 || SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; (e) x[2m+1]=x[2ia+1]-itemp
246 || ADD .S1 A4, 8, A_w ; (p) set w ptr
247 ||[!A_stcnt]MPY.M1X 1, B_n2, A_stcnt ; (p) reset store cntr
249 MPYSP .M1X A_co, B_x2m, A_p1 ; (e) p1=co*x[2m]
250 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; (e) p2=co*x[2m+1]
251 ||[!B_lx2iac]ADD.S1 A_x, A_8n2, A_x ; (e) incr load ptr if required
252 || MV .S2 B5, B12 ; preserve store ptr
253 || STW .D1T1 A_x2ias, *A11++ ; (e) store x[2ia]
254 || STW .D2T2 B_x2mp1s, *B5[1] ; (e) store x[2m+1]
256 [B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
257 ||[!B_lx2iac]MPY.M2 1, B_n2, B_lx2iac ; reset load cntr
258 || SUBSP .L1 A_x2ia, A_rtemp, A_x2ms ; (e) x[2m]=x[2ia]-rtemp
259 || ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; (e) x[2ia+1]=x[2ia+1]+itemp
260 || STW .D1 B_x2iap1s, *A11++ ; (e) store x[2ia+1]
261 || STW .D2 A_x2ms, *B12++[2] ; (e) store x[2m]
262 ||[A_stcnt] SUB.S1 A_stcnt, 1, A_stcnt ; (e) decr store cntr
263 || MPY .M1X B13, 1, A_nby2 ; is it last outer loop?
265 LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; (e) load x[2ia+1]:x[2ia]
266 || MPYSP .M1X A_si, B_x2mp1, A_p3 ; (e) p3=si*x[2m+1]
267 || MPYSP .M2X A_si, B_x2m, B_p4 ; (e) p4=si*x[2m]
268 ||[!B_lx2mc]ADDAW.D2 B_x, B_n2, B_x ; (p) incr ptr if required
269 || ADDSP .L1 A_p1, A_p3, A_rtemp ; (e) rtemp=p1+p3
270 || SUBSP .L2 B_p2, B_p4, B_itemp ; (e) itemp=p2-p4
271 ||[!A_stcnt]ADD.S2 B12, B_8n2, B12 ; (e) incr store ptr if required
272 ||[!A_stcnt]ADD.S1 A11, A_8n2, A11 ; (e) incr store ptr if required
274 [B_hafn2] LDDW.D2 *B_x++, B_x2mp1:B_x2m ; (p) load x[2m+1]:x[2m]
275 ||[!B_lx2mc]LDDW.D1 *A_w++, A_si:A_co ; (p) load si:co
276 ||[!B_lx2mc]MPY .M2 1, B_hafn2, B_lx2mc ; (p) reset load cntr
277 ||[B_lx2iac]SUB .S2 B_lx2iac, 1, B_lx2iac ; (p) decr load cntr
278 || ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; (e) x[2ia]=x[2ia]+rtemp
279 || SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; (e) x[2m+1]=x[2ia+1]-itemp
280 ||[!A_stcnt]MPY.M1X 1, B_n2, A_stcnt ; (e) reset store cntr
281 ||[A_nby2]SUB .S1 A_nby2, 1, A_nby2 ; is it last outer loop?
283 MPYSP .M1X A_co, B_x2m, A_p1 ; (p) p1=co*x[2m]
284 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; (p) p2=co*x[2m+1]
285 ||[!B_lx2iac]ADD.S1 A_x, A_8n2, A_x ; (e) incr load ptr if required
286 || MV .S2 B_8n2, B9 ; preserve store ptr
287 || STW .D1T1 A_x2ias, *A11++ ; (e) store x[2ia]
288 || STW .D2T2 B_x2mp1s, *B12[1] ; (e) store x[2m+1]
290 [B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
291 ||[!B_lx2iac]MPY.M2 1, B_n2, B_lx2iac ; reset load cntr
292 || SUBSP .L1 A_x2ia, A_rtemp, A_x2ms ; (e) x[2m]=x[2ia]-rtemp
293 || ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; (e) x[2ia+1]=x[2ia+1]+itemp
294 || STW .D1 B_x2iap1s, *A11++ ; (e) store x[2ia+1]
295 || STW .D2 A_x2ms, *B12++[2] ; (e) store x[2m]
296 ||[A_stcnt] SUB.S1 A_stcnt, 1, A_stcnt ; decr store cntr
298 LDDW .D1 *A_x++, A_x2iap1:A_x2ia ; (e) load x[2ia+1]:x[2ia]
299 || MPYSP .M1X A_si, B_x2mp1, A_p3 ; (e) p3=si*x[2m+1]
300 || MPYSP .M2X A_si, B_x2m, B_p4 ; (e) p4=si*x[2m]
301 ||[!B_lx2mc]ADDAW.D2 B_x, B_n2, B_x ; (p) incr ptr if required
302 || ADDSP .L1 A_p1, A_p3, A_rtemp ; (e) rtemp=p1+p3
303 || SUBSP .L2 B_p2, B_p4, B_itemp ; (e) itemp=p2-p4
304 ||[!A_stcnt]ADD.S2 B12, B9, B12 ; (e) incr store ptr if required
305 ||[!A_stcnt]ADD.S1 A11, A_8n2, A11 ; (e) incr store ptr if required
307 [B_hafn2] LDDW.D2 *B_x++, B_x2mp1:B_x2m ; (p) load x[2m+1]:x[2m]
308 ||[!B_lx2mc]LDDW.D1 *A_w++, A_si:A_co ; (p) load si:co
309 ||[!B_lx2mc]MPY .M2 1, B_hafn2, B_lx2mc ; (p) reset load cntr
310 ||[B_lx2iac]SUB .S2 B_lx2iac, 1, B_lx2iac ; (p) decr load cntr
311 || ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; (e) x[2ia]=x[2ia]+rtemp
312 || SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; (e) x[2m+1]=x[2ia+1]-itemp
313 ||[!A_stcnt]MPY .M1X 1, B_n2, A_stcnt ; (e) reset store cntr
314 || MV A11, A14 ; preserve store ptr
316 MPYSP .M2X A_co, B_x2mp1, B_p2 ; (p) p2=co*x[2m+1]
317 || MV .S1 A0, A_x ; set the x[2ia] ptr
318 || SHR .S2 B_8n2, 1, B_8n2 ; set B_8n2 for next
319 || STW .D1T1 A_x2ias, *A14++ ; (e) store x[2ia]
320 || STW .D2T2 B_x2mp1s, *B12[1] ; (e) store x[2m+1]
321 || INTSP .L2 B_hafn2, B5 ; use L unit in a strange way
322 || SUBSP .L1 A_x2ia, A_rtemp, A11 ; (e) x[2m]=x[2ia]-rtemp
323 || MPYSP .M1X A_co, B_x2m, A_p1 ; (p) co*x[2m]
325 [B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
326 ||[!B_lx2iac]MPY.M2 1, B_hafn2, B_lx2iac ; reset load cntr
327 || ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; x[2ia+1]=x[2ia+1]+itemp
328 || STW .D1 B_x2iap1s, *A14++ ; store x[2ia+1]
329 || STW .D2 A_x2ms, *B12++[2] ; store x[2m]
330 ||[A_stcnt]SUB.S1 A_stcnt, 1, A_stcnt ; decr store cntr
332 [B_hafn2]LDDW.D1 *A_x++, A_x2iap1:A_x2ia ; (e) load x[2ia+1]:x[2ia]
333 || MPYSP .M1X A_si, B_x2mp1, A_p3 ; (p) p3=si*x[2m+1]
334 || MPYSP .M2X A_si, B_x2m, B_p4 ; (p) p4=si*x[2m]
335 ||[!B_lx2mc]ADDAW.D2 B_x, B_n2, B_x ; (p) incr ptr if required
336 || ADDSP .L1 A_p1, A_p3, A_rtemp ; (p) rtemp=p1+p3
337 || SUBSP .L2 B_p2, B_p4, B_itemp ; (p) itemp=p2-p4
338 ||[!A_stcnt]ADD.S2 B12, B9, B12 ; (e) incr store ptr if required
339 ||[!A_stcnt]ADD.S1 A14, A_8n2, A14 ; (e) incr store ptr if required
341 [B_hafn2] LDDW.D2 *B_x++, B_x2mp1:B_x2m ; (p) load x[2m+1]:x[2m]
342 ||[!B_lx2mc]LDDW.D1 *A_w++, A_si:A_co ; (p) load si:co
343 ||[!B_lx2mc]MPY .M2 1, B_hafn2, B_lx2mc ; (p) reset load cntr
344 || ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; (p) x[2ia]=x[2ia]-rtemp
345 || SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; (p) x[2m+1]=x[2ia+1]-itemp
346 ||[B_lx2iac]SUB.S2 B_lx2iac, 1, B_lx2iac ; decr load cntr
347 ||[!A_stcnt]SHR.S1 A_8n2, 3, A_stcnt ; (e) reset store cntr
348 || MPY .M1X B_n2, 4, A15 ; generate incrment
350 MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
351 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
352 ||[A_nby2]B .S1 loop ; Branch next outer loop
353 || STW .D1T1 A_x2ias, *A14++ ; store x[2ia]
354 || STW .D2T2 B_x2mp1s, *B12[1] ; store x[2m+1]
355 || SPINT .L2 B5, B_n2 ; get B_n2 for next iteration
356 || ADD .S2 B14, B_8n2, B5 ; get store ptr
357 ||[A_stcnt]SUB.L1 A_stcnt, 1, A_stcnt ; decr store cntr
359 [B_lx2mc]SUB.S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
360 ||[!B_lx2iac]MPY.M2 1, B_hafn2, B_lx2iac ; reset load cntr
361 || SUBSP .L1 A_x2ia, A_rtemp, A_x2ms ; x[2m]=x[2ia]-rtemp
362 || ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; p itemp3 = t3_1 - itemp1
363 || STW .D1 B_x2iap1s, *A14++ ; store x[2ia+1]
364 || STW .D2 A11, *B12++[2] ; store x[2m]
365 ||[A_nby2]MPY .M1X B15, 1, A_nby2 ; set loop counter
366 ||[!B_lx2iac]ADD.S1 A_x, A15, A_x ; incr load ptr
368 [B_hafn2] LDDW.D1 *A_x++, A_x2iap1:A_x2ia ; (p) load x[2ia+1]:x[2ia]
369 || MPYSP .M1X A_si, B_x2mp1, A_p3 ; (p) p3=si*x[2m+1]
370 || MPYSP .M2X A_si, B_x2m, B_p4 ; (p) p4=si*x[2m]
371 ||[!B_lx2mc]ADDAW.D2 B_x, B_n2, B_x ; (p) incr ptr if required
372 || ADDSP .L1 A_p1, A_p3, A_rtemp ; (p) rtemp=p1+p3
373 || SUBSP .L2 B_p2, B_p4, B_itemp ; (p) itemp=p2-p4
374 ||[!A_stcnt]ADD.S2 B12, B9, B12 ; (e) incr store ptr if required
375 ||[!A_stcnt]ADD.S1 A14, A_8n2, A14 ; (e) incr store ptr if required
377 [B_hafn2] LDDW.D2 *B_x++, B_x2mp1:B_x2m ; (p) load x[2m+1]:x[2m]
378 ||[!B_lx2mc]LDDW.D1 *A_w++, A_si:A_co ; (p) load si:co
379 ||[!B_lx2mc]MPY .M2 1, B_hafn2, B_lx2mc ; (p) reset load cntr
380 ||[B_lx2iac]SUB .S2 B_lx2iac, 1, B_lx2iac ; (p) x[2ia]=x[2ia]-rtemp
381 || ADDSP .L1 A_x2ia, A_rtemp, A_x2ias ; (p) x[2m+1]=x[2ia+1]-itemp
382 || SUBSP .L2X A_x2iap1, B_itemp, B_x2mp1s ; x[2m+1]=x[2ia+1]-itemp
383 || MPY .M1X 4, B_n2, A_8n2 ; set A_8n2
384 || MV .S1 A0, A11 ; set load ptr
386 MPYSP .M1X A_co, B_x2m, A_p1 ; p1=co*x[2m]
387 || MPYSP .M2X A_co, B_x2mp1, B_p2 ; p2=co*x[2m+1]
388 || SHR .S2 B_hafn2, 1, B_hafn2 ; for next outer loop
389 || STW .D1T1 A_x2ias, *A14++ ; store x[2ia]
390 || STW .D2T2 B_x2mp1s, *B12[1] ; store x[2m+1]
391 ||[A_nby2]B .S1 loop ; branch
393 [B_lx2mc]SUB .S2 B_lx2mc, 1, B_lx2mc ; decr load cntr
394 ||[!B_lx2iac]MPY.M2 1, B_n2, B_lx2iac ; reset load cntr
395 || SUBSP .L1 A_x2ia, A_rtemp, A_x2ms ; x[2m]=x[2ia]-rtemp
396 || ADDSP .L2X A_x2iap1, B_itemp, B_x2iap1s ; x[2ia+1]=x[2ia+1]-itemp
397 || STW .D1 B_x2iap1s, *A14 ; store x[2ia+1]
398 || STW .D2 A_x2ms, *B12 ; store x[2m]
399 || MPY .M1 A_stcnt, 0, A_stcnt ; reset store count
400 ||[!B_lx2iac]ADD.S1 A_x, A_8n2, A_x ; incr load ptr
401 ****************************************************************************
403 MVC .S2 IRP, B15
405 MV .S1X B15, A1
406 || LDW .D2T2 *B15[12], B3
408 LDW .D2T1 *B15[0], A10
409 || LDW .D1T2 *A1[13], B10
411 LDW .D2T1 *B15[1], A11
412 || LDW .D1T2 *A1[6], B2
414 LDW .D2T1 *B15[3], A12
415 || LDW .D1T2 *A1[2], B1
417 LDW .D2T1 *B15[5], A13
418 || LDW .D1T2 *A1[8], B13
420 LDW .D2T1 *B15[7], A14
421 || LDW .D1T2 *A1[10], B14
423 LDW .D2T1 *B15[9], A15
424 || LDW .D1T2 *A1[4], B11
425 || B .S2 B3
427 MVC .S2 B2, IRP
428 || LDW .D1T2 *A1[11], B12
430 NOP 2
432 ADDAW B15, 16, B15
434 MVC .S2 B1, CSR
436 .end
438 * ======================================================================= *
439 * End of file: DSPF_sp_cfftr2_dit.asm *
440 * ----------------------------------------------------------------------- *
441 * Copyright (c) 2011 Texas Instruments, Incorporated. *
442 * All Rights Reserved. *
443 * ======================================================================= *