[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSP_fir_cplx_hM4X4 / c66 / DSP_fir_cplx_hM4X4.c
1 /*======================================================================= */
2 /* TEXAS INSTRUMENTS, INC. */
3 /* */
4 /* DSPLIB DSP Signal Processing Library */
5 /* */
6 /* This library contains proprietary intellectual property of Texas */
7 /* Instruments, Inc. The library and its source code are protected by */
8 /* various copyrights, and portions may also be protected by patents or */
9 /* other legal protections. */
10 /* */
11 /* This software is licensed for use with Texas Instruments TMS320 */
12 /* family DSPs. This license was provided to you prior to installing */
13 /* the software. You may review this license by consulting the file */
14 /* TI_license.PDF which accompanies the files in this library. */
15 /* */
16 /* ----------------------------------------------------------------------- */
17 /* */
18 /* DSP_fir_cplx_hM4X4.c -- Complex FIR Filter */
19 /* Optimized C Implementation (w/ Intrinsics) */
20 /* */
21 /* Usage */
22 /* This routine is C-callable and can be called as: */
23 /* */
24 /* void DSP_fir_cplx_hM4X4 ( */
25 /* const short *restrict x, */
26 /* const short *restrict h, */
27 /* short *restrict r, */
28 /* int nh, */
29 /* int nr, */
30 /* ) */
31 /* */
32 /* x[2*(nr+nh-1)] : Complex input data. x must point to x[2*(nh-1)]. */
33 /* h[2*nh] : Complex coefficients (in normal order). */
34 /* r[2*nr] : Complex output data. */
35 /* nh : Number of complex coefficients. */
36 /* nr : Number of complex output samples. */
37 /* */
38 /* Description */
39 /* This complex FIR computes nr complex output samples using nh */
40 /* complex coefficients. It operates on 16-bit data with a 32-bit */
41 /* accumulate. Each array consists of an even and odd term with even */
42 /* terms representing the real part of the element and the odd terms */
43 /* the imaginary part. The pointer to input array x must point to the */
44 /* (nh)th complex sample, i.e. element 2*(nh-1), upon entry to the */
45 /* function. The coefficients are expected in normal order. */
46 /* */
47 /* Assumptions */
48 /* Arrays x, h, and r do not overlap */
49 /* nr >= 8; nr % 4 == 0 */
50 /* nh >= 4; nh % 4 == 0 */
51 /* */
52 /* Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/ */
53 /* */
54 /* */
55 /* Redistribution and use in source and binary forms, with or without */
56 /* modification, are permitted provided that the following conditions */
57 /* are met: */
58 /* */
59 /* Redistributions of source code must retain the above copyright */
60 /* notice, this list of conditions and the following disclaimer. */
61 /* */
62 /* Redistributions in binary form must reproduce the above copyright */
63 /* notice, this list of conditions and the following disclaimer in the */
64 /* documentation and/or other materials provided with the */
65 /* distribution. */
66 /* */
67 /* Neither the name of Texas Instruments Incorporated nor the names of */
68 /* its contributors may be used to endorse or promote products derived */
69 /* from this software without specific prior written permission. */
70 /* */
71 /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */
72 /* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT */
73 /* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR */
74 /* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT */
75 /* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
76 /* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT */
77 /* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
78 /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY */
79 /* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
80 /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE */
81 /* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
82 /* */
83 /* ======================================================================= */
85 #pragma CODE_SECTION(DSP_fir_cplx_hM4X4, ".text:optimized");
87 #include "DSP_fir_cplx_hM4X4.h"
88 #ifdef __TI_COMPILER_VERSION__
89 #include "c6x.h"
90 #endif
92 #ifdef _LITTLE_ENDIAN
93 void DSP_fir_cplx_hM4X4 (
94 const short *restrict x, /* Input array [nr+nh-1 elements] */
95 const short *restrict h, /* Coeff array [nh elements] */
96 short *restrict r, /* Output array [nr elements] */
97 int nh, /* Number of coefficients */
98 int nr /* Number of output samples */
99 )
100 {
101 int i, j, imag_real_0, imag_real_1, imag_real_2, imag_real_3;
102 long long h_3210, x_3210,x_7654,x_ba98;
103 long long real0imag0, real1imag1, real2imag2, real3imag3;
104 __x128_t x_54_32_76_54, x_98_76_ba_98, re1im1re0im0, re3im3re2im2;
106 /*--------------------------------------------------------------------*/
107 /* _nasserts are used to inform the compiler that the input, filter, */
108 /* output arrays are word or double word aligned. In addition the # */
109 /* filter taps and output samples is stated to be even. */
110 /*--------------------------------------------------------------------*/
111 _nassert((int)nr >= 8);
112 _nassert((int)nr % 4 == 0);
113 _nassert((int)nh >= 4);
114 _nassert((int)nh % 4 == 0);
116 /*--------------------------------------------------------------------*/
117 /* Inform the compiler that the following loop will iterate at least */
118 /* twice and that the # output samples is a multiple of 4. */
119 /*--------------------------------------------------------------------*/
120 #pragma MUST_ITERATE(2,,1)
121 for (i = 0; i < 2*nr; i += 8) {
122 /*----------------------------------------------------------------*/
123 /* Zero out accumulators for 4 complex output samples */
124 /*----------------------------------------------------------------*/
125 real0imag0 = real1imag1=0;
126 real2imag2 = real3imag3=0;
128 x_ba98 = _mem8((void *)&x[i+4]);
129 x_7654 = _mem8((void *)&x[i]);
131 /*----------------------------------------------------------------*/
132 /* Inform compiler that filter taps is at least 4, and a multiple */
133 /* of 4. */
134 /*----------------------------------------------------------------*/
135 _nassert((int)nr >= 8);
136 _nassert((int)nr % 4 == 0);
137 _nassert((int)nh >= 4);
138 _nassert((int)nh % 4 == 0);
140 #pragma MUST_ITERATE(2,,2)
141 #pragma UNROLL(2)
142 for (j = 0; j < 2*nh; j += 4) {
143 /*------------------------------------------------------------*/
144 /* Perform double word loads using intrinsic */
145 /*------------------------------------------------------------*/
146 h_3210 = _amem8((void *)&h[j]);
148 /*------------------------------------------------------------*/
149 /* Load input data using Double word loads. */
150 /*------------------------------------------------------------*/
151 x_3210 = _mem8((void *)&x[i - j - 4]);
153 /*------------------------------------------------------------*/
154 /* Create 2*2 complex matrix for _cmatmpy intrinsic */
155 /* Perform complex matrix multiply using _cmatmpy */
156 /*------------------------------------------------------------*/
157 x_54_32_76_54 = _llto128(_dmv(_loll(x_7654),_hill(x_3210)),x_7654);
158 re1im1re0im0 = _cmatmpy(h_3210,x_54_32_76_54);
160 /*------------------------------------------------------------*/
161 /* Create 2*2 complex matrix for _cmatmpy intrinsic */
162 /* Perform complex matrix multiply using _cmatmpy */
163 /*------------------------------------------------------------*/
164 x_98_76_ba_98 = _llto128(_dmv(_loll(x_ba98),_hill(x_7654)),x_ba98);
165 re3im3re2im2 = _cmatmpy(h_3210,x_98_76_ba_98);
167 /*------------------------------------------------------------*/
168 /* Accumalate 4 complex output using _dadd() */
169 /*------------------------------------------------------------*/
170 real0imag0 = _dadd(real0imag0,_lo128(re1im1re0im0));
171 real1imag1 = _dadd(real1imag1,_hi128(re1im1re0im0));
172 real2imag2 = _dadd(real2imag2,_lo128(re3im3re2im2));
173 real3imag3 = _dadd(real3imag3,_hi128(re3im3re2im2));
175 /*------------------------------------------------------------*/
176 /* Save inputs for the next iteration */
177 /*------------------------------------------------------------*/
178 x_ba98 = x_7654;
179 x_7654 = x_3210;
180 }
182 /*----------------------------------------------------------------*/
183 /* Shift out accumulated sum, pack and store as double words */
184 /*----------------------------------------------------------------*/
185 real0imag0 = _dshl(real0imag0,1);
186 real1imag1 = _dshl(real1imag1,1);
187 real2imag2 = _dshl(real2imag2,1);
188 real3imag3 = _dshl(real3imag3,1);
190 imag_real_0 = _packh2(_hill(real0imag0), _loll(real0imag0));
191 imag_real_1 = _packh2(_hill(real1imag1), _loll(real1imag1));
192 imag_real_2 = _packh2(_hill(real2imag2), _loll(real2imag2));
193 imag_real_3 = _packh2(_hill(real3imag3), _loll(real3imag3));
195 _amem8(&r[i]) = _dcrot270(_itoll(imag_real_1, imag_real_0));
196 _amem8(&r[i+4]) = _dcrot270(_itoll(imag_real_3, imag_real_2));
197 }
198 }
200 /*-----------------------------------------------------------*/
201 /* Big Endian version */
202 /*-----------------------------------------------------------*/
203 #else
204 void DSP_fir_cplx_hM4X4 (
205 const short *restrict x, /* Input array [nr+nh-1 elements] */
206 const short *restrict h, /* Coeff array [nh elements] */
207 short *restrict r, /* Output array [nr elements] */
208 int nh, /* Number of coefficients */
209 int nr /* Number of output samples */
210 )
211 {
212 int i, j, real_imag_0, real_imag_1, real_imag_2, real_imag_3;
213 long long h_0123, x_0123 ,x_4567 ,x_89ab;
214 long long imag0real0, imag1real1, imag2real2, imag3real3;
215 __x128_t x_45_67_23_45, x_89_ab_67_89, im0re0im1re1, im2re2im3re3;
217 /*--------------------------------------------------------------------*/
218 /* _nasserts are used to inform the compiler that the input, filter, */
219 /* output arrays are word or double word aligned. In addition the # */
220 /* filter taps and output samples is stated to be even. */
221 /*--------------------------------------------------------------------*/
222 _nassert((int)nr >= 8);
223 _nassert((int)nr % 4 == 0);
224 _nassert((int)nh >= 4);
225 _nassert((int)nh % 4 == 0);
227 /*--------------------------------------------------------------------*/
228 /* Inform the compiler that the following loop will iterate at least */
229 /* twice and that the # output samples is a multiple of 4. */
230 /*--------------------------------------------------------------------*/
231 #pragma MUST_ITERATE(2,,1)
232 for (i = 0; i < 2*nr; i += 8) {
233 /*----------------------------------------------------------------*/
234 /* Zero out accumulators for 4 complex output samples */
235 /*----------------------------------------------------------------*/
236 imag0real0 = imag1real1=0;
237 imag2real2 = imag3real3=0;
239 x_89ab = _mem8((void *)&x[i+4]);
240 x_4567 = _mem8((void *)&x[i]);
242 /*----------------------------------------------------------------*/
243 /* Inform compiler that filter taps is at least 4, and a multiple */
244 /* of 4. */
245 /*----------------------------------------------------------------*/
246 _nassert((int)nr >= 8);
247 _nassert((int)nr % 4 == 0);
248 _nassert((int)nh >= 4);
249 _nassert((int)nh % 4 == 0);
251 #pragma MUST_ITERATE(2,,2)
252 #pragma UNROLL(2)
253 for (j = 0; j < 2*nh; j += 4) {
254 /*------------------------------------------------------------*/
255 /* Perform double word loads using intrinsic */
256 /*------------------------------------------------------------*/
257 h_0123 = _amem8((void *)&h[j]);
258 /*------------------------------------------------------------*/
259 /* Load input data using Double word loads. */
260 /*------------------------------------------------------------*/
261 x_0123 = _mem8((void *)&x[i - j - 4]);
263 /*------------------------------------------------------------*/
264 /* Create 2*2 complex matrix for _cmatmpy intrinsic */
265 /* Perform complex matrix multiply using _cmatmpy */
266 /*------------------------------------------------------------*/
267 x_45_67_23_45 = _llto128(x_4567,_dmv(_loll(x_0123),_hill(x_4567)));
268 im0re0im1re1 = _cmatmpy(h_0123,x_45_67_23_45);
270 /*------------------------------------------------------------*/
271 /* Create 2*2 complex matrix for _cmatmpy intrinsic */
272 /* Perform complex matrix multiply using _cmatmpy */
273 /*------------------------------------------------------------*/
274 x_89_ab_67_89 = _llto128(x_89ab,_dmv(_loll(x_4567),_hill(x_89ab)));
275 im2re2im3re3 = _cmatmpy(h_0123,x_89_ab_67_89);
277 /*------------------------------------------------------------*/
278 /* Accumalate 4 complex output using _dadd() */
279 /*------------------------------------------------------------*/
280 imag0real0 = _dadd(imag0real0,_hi128(im0re0im1re1));
281 imag1real1 = _dadd(imag1real1,_lo128(im0re0im1re1));
282 imag2real2 = _dadd(imag2real2,_hi128(im2re2im3re3));
283 imag3real3 = _dadd(imag3real3,_lo128(im2re2im3re3));
285 /*------------------------------------------------------------*/
286 /* Save inputs for the next iteration */
287 /*------------------------------------------------------------*/
288 x_89ab = x_4567;
289 x_4567 = x_0123;
290 }
292 /*----------------------------------------------------------------*/
293 /* Shift out accumulated sum, pack and store as double words */
294 /*----------------------------------------------------------------*/
295 imag0real0 = _dshl(imag0real0,1);
296 imag1real1 = _dshl(imag1real1,1);
297 imag2real2 = _dshl(imag2real2,1);
298 imag3real3 = _dshl(imag3real3,1);
300 real_imag_0 = _packh2(_hill(imag0real0), _loll(imag0real0));
301 real_imag_1 = _packh2(_hill(imag1real1), _loll(imag1real1));
302 real_imag_2 = _packh2(_hill(imag2real2), _loll(imag2real2));
303 real_imag_3 = _packh2(_hill(imag3real3), _loll(imag3real3));
305 _amem8(&r[i]) = _itoll(real_imag_0, real_imag_1);
306 _amem8(&r[i+4]) = _itoll(real_imag_2, real_imag_3);
307 }
308 }
309 #endif
311 /* ======================================================================= */
312 /* End of file: DSP_fir_cplx_hM4X4.c */
313 /* ----------------------------------------------------------------------- */
314 /* Copyright (c) 2011 Texas Instruments, Incorporated. */
315 /* All Rights Reserved. */
316 /* ======================================================================= */