Initial code package commit.
[apps/c55x-digital-mic-decimation.git] / src / pick_bits_cic_f2.asm
1 ;============================================================================
2 ; Copyright (c) 2016 Texas Instruments Incorporated.
3 ;
4 ;  Redistribution and use in source and binary forms, with or without
5 ;  modification, are permitted provided that the following conditions
6 ;  are met:
7 ;
8 ;    Redistributions of source code must retain the above copyright
9 ;    notice, this list of conditions and the following disclaimer.
10 ;
11 ;    Redistributions in binary form must reproduce the above copyright
12 ;    notice, this list of conditions and the following disclaimer in the
13 ;    documentation and/or other materials provided with the
14 ;    distribution.
15 ;
16 ;    Neither the name of Texas Instruments Incorporated nor the names of
17 ;    its contributors may be used to endorse or promote products derived
18 ;    from this software without specific prior written permission.
19 ;
20 ;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 ;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 ;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 ;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 ;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 ;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 ;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 ;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 ;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 ;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 ;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 ;
32 ;===============================================================================
33 ; Function:    pickBitsCic
34 ; Processor:   C55xx, Rev. 3
35 ; Description: 
36 ;   Unpacks "left" and "right" 32-bit packed DMA buffers 
37 ;   containing output from digital mic.
38 ;   Performs CIC on unpacked data, DS = 16 & NS = 4.
39 ;
40 ;   x cycle inner loop.
41 ;
42 ;   C-callable.
43 ;   Mnemonic assembly.
44 ;
45 ; Usage:    void pick_bits_cic(
46 ;               Uint32 *lData,          -> XAR0
47 ;               Uint32 *rData,          -> XAR1
48 ;               Uint16 inDataLen,       -> T0
49 ;               Int32 *cicState,        -> XAR2
50 ;               Int32 *outSamps,        -> XAR3
51 ;               Uint16 *pNumOutSamps    -> XAR4
52 ;           );
53 ;
54 ;****************************************************************
56                 .C54CM_off                      ; enable assembler for C54CM=0
57                 .ARMS_off                       ; enable assembler for ARMS=0
58                 .CPL_on                         ; enable assembler for CPL=1
60                 .mmregs                         ; enable mem mapped register names
62                 .def    _pickBitsCic
64                 .include "pick_bits_cic.inc"
66 ; Stack frame
67 ; -----------
68 RET_ADDR_SZ     .set    1                       ; return address
69 REG_SAVE_SZ     .set    0                       ; save-on-entry registers saved
70 FRAME_SZ        .set    0                       ; local variables
71 ARG_BLK_SZ      .set    0                       ; argument block
72 PARAM_OFFSET    .set    ARG_BLK_SZ + FRAME_SZ + REG_SAVE_SZ + RET_ADDR_SZ   ; offset to function arguments on stack
74 ; Local variables
75 ; ---------------
77 ; Register usage
78 ; --------------
79                 .asg    XAR2, cicState_xptr     ; extended pointer to CIC state
80                 .asg    AR0, lData_ptr          ; pointer to "left" input data
81                 .asg    AR1, rData_ptr          ; pointer to "right" input data
82                 .asg    AR3, outSamps_ptr       ; pointer to output samples
83                 .asg    AR4, numOutSamps_ptr    ; pointer to number of output samples
84                 .asg    BRC0, loop0_cnt         ; loop0 count
85                 .asg    BRC1, loop1_cnt         ; loop1 count
88                 .text
89 _pickBitsCic:
92 ;
93 ; Save any save-on-entry registers that are used
94 ;----------------------------------------------------------------
96 ;
97 ; Allocate the local frame and argument block
98 ;----------------------------------------------------------------
99                 AADD        #-(ARG_BLK_SZ + FRAME_SZ), SP 
102 ; Save entry values for later
103 ;----------------------------------------------------------------
106 ; Configure the status registers as needed
107 ;----------------------------------------------------------------
108                 AND         #001FFh, mmap(ST0_55)   ; clear ACOV[0-3], TC[1-2], and C
110                 ;OR          #04540h, mmap(ST1_55)   ; set CPL, M40, SXMD, FRCT
111                 ;AND         #0FDDFh, mmap(ST1_55)   ; clear SATD, C54CM
113                 ;AND         #07A00h, mmap(ST2_55)   ; clear ARMS, RDM, CDPLC, AR[0-7]LC
114                 BCLR        ARMS                    ; clear ARMS
116                 ;AND         #0FCDDh, mmap(ST3_55)   ; clear SATA, SMUL ; note -- must always write 1100b to bits 11-8, 0b to bit 4
119 ; Compute number of output samples
120 ;----------------------------------------------------------------
121                 MOV         T0, AC0                     ; AC0 = inDataLen
122                 MOV         AC0 << #2, *numOutSamps_ptr ; *pNumOutSamps = inDataLen<<2
125 ; Loop setup
126 ;----------------------------------------------------------------
127                 ; Initialize loop0 (outer) count
128                 ; Initialize loop1 (inner) count
129                 SUB         #1, T0                  ; T0 = inBufLen-1
130              || MOV         #(DS-1), loop1_cnt
131                 MOV         T0, mmap(@loop0_cnt)    ; loop0_cnt = inBufLen-1
134 ; Output sample loop
135 ;----------------------------------------------------------------
136                 RPTB        loop0-1
139 ; Unpack current "left" word
140 ;----------------------------------------------------------------
141                 ;
142                 ; MSW of 32-bit word
143                 ;
145                 ; Load integrator state
146                 AMOV        cicState_xptr, XAR4
147              || MOV         *lData_ptr+, T1  ; get MSW
148                 MOV         dbl(*AR4+), AC0
149                 MOV         dbl(*AR4+), AC1
150                 MOV         dbl(*AR4+), AC2
151                 MOV         dbl(*AR4+), AC3
153              || RPTBLOCAL   loop1_left1-1
154                 MOV         #0, T0
155              || ROL         TC2, T1, TC2, T1
156                 ROL         TC2, T0, TC2, T0    ; LSB of T0 = bit
157                 XCCPART     T0==#0
158              || SUB         #1, T0              ; T0 = input = 2*bit-1: 0->-1, 1->+1
160                 ; Perform integration for current input
161                 ADD         T0, AC0             ; compute integrator 1 output ; AC0 = acc[0]
162                 ADD         AC0, AC1            ; compute integrator 2 output ; AC1 = acc[1]
163                 ADD         AC1, AC2            ; compute integrator 3 output ; AC2 = acc[2]
164                 ADD         AC2, AC3            ; compute integrator 4 output ; AC3 = acc[3]
165 loop1_left1:
167                 ; Store integrator state
168                 AMOV        cicState_xptr, XAR4
169                 MOV         AC0, dbl(*AR4+)
170                 MOV         AC1, dbl(*AR4+)
171                 MOV         AC2, dbl(*AR4+)
172                 MOV         AC3, dbl(*AR4+)
174                 ; Compute differentiator output
175                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[0]
176                 MOV         AC3, dbl(*AR4+)     ; diffDly[0] = AC3 = acc[3]
177              || SUB         AC0, AC3            ; compute differentiator 1 output
178                                                 ; AC3 = diff[0] = acc[3] - diffDly[0]
180                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[1]
181                 MOV         AC3, dbl(*AR4+)     ; diffDly[1] = AC3 = diff[0]
182              || SUB         AC0, AC3            ; compute differentiator 1 output
183                                                 ; AC3 = diff[1] = diff[0] - diffDly[1]
185                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[2]
186                 MOV         AC3, dbl(*AR4+)     ; diffDly[2] = AC3 = diff[1]
187              || SUB         AC0, AC3            ; compute differentiator 2 output
188                                                 ; AC3 = diff[2] = diff[1] - diffDly[2]
190                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[3]
191                 MOV         AC3, dbl(*AR4+)     ; diffDly[3] = AC3 = diff[2]
192              || SUB         AC0, AC3            ; compute differentiator 3 output
193                                                 ; AC3 = diff[3] = diff[2] - diffDly[3]
195                 ; Store output
196                 MOV         AC3, dbl(*outSamps_ptr+)
198                 ;
199                 ; LSW of 32-bit word
200                 ;
202                 ; Load integrator state
203                 AMOV        cicState_xptr, XAR4
204              || MOV         *lData_ptr+, T1  ; get LSW
205                 MOV         dbl(*AR4+), AC0
206              ;|| MOV         AC3, dbl(*outSamps_ptr+)
207                 MOV         dbl(*AR4+), AC1
208                 MOV         dbl(*AR4+), AC2
209                 MOV         dbl(*AR4+), AC3
211              || RPTBLOCAL   loop1_left2-1
212                 MOV         #0, T0
213              || ROL         TC2, T1, TC2, T1
214                 ROL         TC2, T0, TC2, T0    ; LSB of T0 = bit
215                 XCCPART     T0==#0
216              || SUB         #1, T0              ; T0 = input = 2*bit-1: 0->-1, 1->+1
218                 ; Perform integration for current input
219                 ADD         T0, AC0             ; compute integrator 1 output ; AC0 = acc[0]
220                 ADD         AC0, AC1            ; compute integrator 2 output ; AC1 = acc[1]
221                 ADD         AC1, AC2            ; compute integrator 3 output ; AC2 = acc[2]
222                 ADD         AC2, AC3            ; compute integrator 4 output ; AC3 = acc[3]
223 loop1_left2:
225                 ; Store integrator state
226                 AMOV        cicState_xptr, XAR4
227                 MOV         AC0, dbl(*AR4+)
228                 MOV         AC1, dbl(*AR4+)
229                 MOV         AC2, dbl(*AR4+)
230                 MOV         AC3, dbl(*AR4+)
232                 ; Compute differentiator output
233                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[0]
234                 MOV         AC3, dbl(*AR4+)     ; diffDly[0] = AC3 = acc[3]
235              || SUB         AC0, AC3            ; compute differentiator 1 output
236                                                 ; AC3 = diff[0] = acc[3] - diffDly[0]
238                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[1]
239                 MOV         AC3, dbl(*AR4+)     ; diffDly[1] = AC3 = diff[0]
240              || SUB         AC0, AC3            ; compute differentiator 1 output
241                                                 ; AC3 = diff[1] = diff[0] - diffDly[1]
243                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[2]
244                 MOV         AC3, dbl(*AR4+)     ; diffDly[2] = AC3 = diff[1]
245              || SUB         AC0, AC3            ; compute differentiator 2 output
246                                                 ; AC3 = diff[2] = diff[1] - diffDly[2]
248                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[3]
249                 MOV         AC3, dbl(*AR4+)     ; diffDly[3] = AC3 = diff[2]
250              || SUB         AC0, AC3            ; compute differentiator 3 output
251                                                 ; AC3 = diff[3] = diff[2] - diffDly[3]
253                 ; Store output
254                 MOV         AC3, dbl(*outSamps_ptr+)
257 ; Unpack current "right" word
258 ;----------------------------------------------------------------
259                 ;
260                 ; MSW of 32-bit word
261                 ;
263                 ; Load integrator state
264                 AMOV        cicState_xptr, XAR4
265              || MOV         *rData_ptr+, T1  ; get MSW
266                 MOV         dbl(*AR4+), AC0
267              ;|| MOV         AC3, dbl(*outSamps_ptr+)
268                 MOV         dbl(*AR4+), AC1
269                 MOV         dbl(*AR4+), AC2
270                 MOV         dbl(*AR4+), AC3
272              || RPTBLOCAL   loop1_right1-1
273                 MOV         #0, T0
274              || ROL         TC2, T1, TC2, T1
275                 ROL         TC2, T0, TC2, T0    ; LSB of T0 = bit
276                 XCCPART     T0==#0
277              || SUB         #1, T0              ; T0 = input = 2*bit-1: 0->-1, 1->+1
279                 ; Perform integration for current input
280                 ADD         T0, AC0             ; compute integrator 1 output ; AC0 = acc[0]
281                 ADD         AC0, AC1            ; compute integrator 2 output ; AC1 = acc[1]
282                 ADD         AC1, AC2            ; compute integrator 3 output ; AC2 = acc[2]
283                 ADD         AC2, AC3            ; compute integrator 4 output ; AC3 = acc[3]
284 loop1_right1:
286                 ; Store integrator state
287                 AMOV        cicState_xptr, XAR4
288                 MOV         AC0, dbl(*AR4+)
289                 MOV         AC1, dbl(*AR4+)
290                 MOV         AC2, dbl(*AR4+)
291                 MOV         AC3, dbl(*AR4+)
293                 ; Compute differentiator output
294                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[0]
295                 MOV         AC3, dbl(*AR4+)     ; diffDly[0] = AC3 = acc[3]
296              || SUB         AC0, AC3            ; compute differentiator 1 output
297                                                 ; AC3 = diff[0] = acc[3] - diffDly[0]
299                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[1]
300                 MOV         AC3, dbl(*AR4+)     ; diffDly[1] = AC3 = diff[0]
301              || SUB         AC0, AC3            ; compute differentiator 1 output
302                                                 ; AC3 = diff[1] = diff[0] - diffDly[1]
304                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[2]
305                 MOV         AC3, dbl(*AR4+)     ; diffDly[2] = AC3 = diff[1]
306              || SUB         AC0, AC3            ; compute differentiator 2 output
307                                                 ; AC3 = diff[2] = diff[1] - diffDly[2]
309                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[3]
310                 MOV         AC3, dbl(*AR4+)     ; diffDly[3] = AC3 = diff[2]
311              || SUB         AC0, AC3            ; compute differentiator 3 output
312                                                 ; AC3 = diff[3] = diff[2] - diffDly[3]
314                 ; Store output
315                 MOV         AC3, dbl(*outSamps_ptr+)
317                 ;
318                 ; LSW of 32-bit word
319                 ;
321                 ; Load integrator state
322                 AMOV        cicState_xptr, XAR4
323              || MOV         *rData_ptr+, T1  ; get LSW
324                 MOV         dbl(*AR4+), AC0
325              ;|| MOV         AC3, dbl(*outSamps_ptr+)
326                 MOV         dbl(*AR4+), AC1
327                 MOV         dbl(*AR4+), AC2
328                 MOV         dbl(*AR4+), AC3
330              || RPTBLOCAL   loop1_right2-1
331                 MOV         #0, T0
332              || ROL         TC2, T1, TC2, T1
333                 ROL         TC2, T0, TC2, T0    ; LSB of T0 = bit
334                 XCCPART     T0==#0
335              || SUB         #1, T0              ; T0 = input = 2*bit-1: 0->-1, 1->+1
337                 ; Perform integration for current input
338                 ADD         T0, AC0             ; compute integrator 1 output ; AC0 = acc[0]
339                 ADD         AC0, AC1            ; compute integrator 2 output ; AC1 = acc[1]
340                 ADD         AC1, AC2            ; compute integrator 3 output ; AC2 = acc[2]
341                 ADD         AC2, AC3            ; compute integrator 4 output ; AC3 = acc[3]
342 loop1_right2:
344                 ; Store integrator state
345                 AMOV        cicState_xptr, XAR4
346                 MOV         AC0, dbl(*AR4+)
347                 MOV         AC1, dbl(*AR4+)
348                 MOV         AC2, dbl(*AR4+)
349                 MOV         AC3, dbl(*AR4+)
351                 ; Compute differentiator output
352                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[0]
353                 MOV         AC3, dbl(*AR4+)     ; diffDly[0] = AC3 = acc[3]
354              || SUB         AC0, AC3            ; compute differentiator 1 output
355                                                 ; AC3 = diff[0] = acc[3] - diffDly[0]
357                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[1]
358                 MOV         AC3, dbl(*AR4+)     ; diffDly[1] = AC3 = diff[0]
359              || SUB         AC0, AC3            ; compute differentiator 1 output
360                                                 ; AC3 = diff[1] = diff[0] - diffDly[1]
362                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[2]
363                 MOV         AC3, dbl(*AR4+)     ; diffDly[2] = AC3 = diff[1]
364              || SUB         AC0, AC3            ; compute differentiator 2 output
365                                                 ; AC3 = diff[2] = diff[1] - diffDly[2]
367                 MOV         dbl(*AR4), AC0      ; AC0 = diffDly[3]
368                 MOV         AC3, dbl(*AR4+)     ; diffDly[3] = AC3 = diff[2]
369              || SUB         AC0, AC3            ; compute differentiator 3 output
370                                                 ; AC3 = diff[3] = diff[2] - diffDly[3]
372                 ; Store output
373                 MOV         AC3, dbl(*outSamps_ptr+)
374 loop0:
377 ; Restore status regs to expected C-convention values as needed
378 ;----------------------------------------------------------------
379                 ;BCLR        M40                 ; clear M40
380                 ;BCLR        FRCT                ; clear FRCT
382                 BSET        ARMS                ; set ARMS
384                 ;BSET        SMUL                ; set SMUL
387 ; Deallocate the local frame and argument block
388 ;----------------------------------------------------------------
389                 AADD        #(ARG_BLK_SZ + FRAME_SZ), SP 
392 ; Restore any save-on-entry registers that are used
393 ;----------------------------------------------------------------
396 ; Return to calling function
397 ;----------------------------------------------------------------
398                 RET                             ; return to calling function