]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - ep-processor-libraries/dsplib.git/blob - ti/dsplib/src/DSP_mat_mul_cplx/c64P/DSP_mat_mul_cplx.c
DSPLIB: optimized signal processing functions for TI DSPs
[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSP_mat_mul_cplx / c64P / DSP_mat_mul_cplx.c
1 /* ======================================================================= */
2 /*  TEXAS INSTRUMENTS, INC.                                                */
3 /*                                                                         */
4 /*  DSPLIB  DSP Signal Processing Library                                  */
5 /*                                                                         */
6 /*  This library contains proprietary intellectual property of Texas       */
7 /*  Instruments, Inc.  The library and its source code are protected by    */
8 /*  various copyrights, and portions may also be protected by patents or   */
9 /*  other legal protections.                                               */
10 /*                                                                         */
11 /*  This software is licensed for use with Texas Instruments TMS320        */
12 /*  family DSPs.  This license was provided to you prior to installing     */
13 /*  the software.  You may review this license by consulting the file      */
14 /*  TI_license.PDF which accompanies the files in this library.            */
15 /*                                                                         */
16 /* ----------------------------------------------------------------------- */
17 /*                                                                         */
18 /* DSP_mat_mul_cplx.c -- Perform Matrix Multiplication                     */
19 /*                  Intrinsic C Implementation                             */
20 /*                                                                         */
21 /* Rev 0.0.1                                                               */
22 /*                                                                         */
23 /*  USAGE                                                                  */
24 /*      This routine is C-callable and can be called as:                   */
25 /*                                                                         */
26 /*          void DSP_mat_mul_cplx (                                        */
27 /*              const short *restrict x, int r1, int c1,                   */
28 /*              const short *restrict y,         int c2,                   */
29 /*              short       *restrict r,                                   */
30 /*              int                   qs                                   */
31 /*          );                                                             */
32 /*                                                                         */
33 /*      x  == Pointer to r1 by c1 input matrix.                            */
34 /*      y  == Pointer to c1 by c2 input matrix.                            */
35 /*      r  == Pointer to r1 by c2 output matrix.                           */
36 /*                                                                         */
37 /*      r1 == Number of rows in x.                                         */
38 /*      c1 == Number of columns in x.  Also number of rows in y.           */
39 /*      c2 == Number of columns in y.                                      */
40 /*                                                                         */
41 /*      qs == Final right-shift to apply to the result.                    */
42 /*                                                                         */
43 /*  DESCRIPTION                                                            */
44 /*      This function computes the expression "r = x * y" for the          */
45 /*      matrices x and y.  The columnar dimension of x must match          */
46 /*      the row dimension of y.  The resulting matrix has the same         */
47 /*      number of rows as x and the same number of columns as y.           */
48 /*                                                                         */
49 /*      The values stored in the matrices are assumed to be fixed-point    */
50 /*      or integer values.  All intermediate sums are retained to 32-bit   */
51 /*      precision, and no overflow checking is performed.  The results     */
52 /*      are right-shifted by a user-specified amount, and then truncated   */
53 /*      to 16 bits.                                                        */
54 /*                                                                         */
55 /*      This code is suitable for dense matrices.  No optimizations are    */
56 /*      made for sparse matrices.                                          */
57 /*                                                                         */
58 /*  ASSUMPTIONS                                                            */
59 /*      The arrays 'x', 'y', and 'r' are stored in distinct arrays.  That  */
60 /*      is, in-place processing is not allowed.                            */
61 /*                                                                         */
62 /*  Requirements:                                                          */
63 /*  R1   min = 4, Factor 2                                                 */
64 /*  c1   min = 4, Factor 2                                                 */
65 /*  C2   min = 4, Factor 2                                                 */
66 /*                                                                         */
67 /* Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/  */ 
68 /*                                                                         */
69 /*                                                                         */
70 /*  Redistribution and use in source and binary forms, with or without     */
71 /*  modification, are permitted provided that the following conditions     */
72 /*  are met:                                                               */
73 /*                                                                         */
74 /*    Redistributions of source code must retain the above copyright       */
75 /*    notice, this list of conditions and the following disclaimer.        */
76 /*                                                                         */
77 /*    Redistributions in binary form must reproduce the above copyright    */
78 /*    notice, this list of conditions and the following disclaimer in the  */
79 /*    documentation and/or other materials provided with the               */
80 /*    distribution.                                                        */
81 /*                                                                         */
82 /*    Neither the name of Texas Instruments Incorporated nor the names of  */
83 /*    its contributors may be used to endorse or promote products derived  */
84 /*    from this software without specific prior written permission.        */
85 /*                                                                         */
86 /*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    */
87 /*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      */
88 /*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  */
89 /*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT   */
90 /*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,  */
91 /*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT       */
92 /*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  */
93 /*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  */
94 /*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT    */
95 /*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  */
96 /*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.   */
97 /*                                                                         */
98 /* ======================================================================= */
100 #pragma CODE_SECTION(DSP_mat_mul_cplx, ".text:optimized");
102 #include "DSP_mat_mul_cplx.h"
104 /* For fixed-size implementation, use these defines */
105 /* #define FIXED_SIZES */
106 /* #define SIZE (16)   */
108 #ifdef _LITTLE_ENDIAN
109 void DSP_mat_mul_cplx (
110     const short *restrict x, int r1, int c1,
111     const short *restrict y,         int c2,
112     short       *restrict r,
113     int                   qs
116     int i, j, k;
117     int sum_re0, sum_im0, sum_re1, sum_im1, sum_re2, sum_im2, sum_re3, sum_im3;
118     int nbR1, nbC1R2, nbC2;
119   
120 #ifdef FIXED_SIZES
121     nbR1 = SIZE;
122     nbC1R2 = SIZE;
123     nbC2 = SIZE;
124   
125     _nassert(r1 == SIZE);
126     _nassert(c1 == SIZE);
127     _nassert(c2 == SIZE);
128 #else
129     nbR1 = r1;
130     nbC1R2 = c1;
131     nbC2 = c2;
132 #endif
134     /* -------------------------------------------------------------------- */
135     /*  Multiply each row in x by each column in y.  The product of row m   */
136     /*  in x and column n in y is placed in position (m,n) in the result.   */
137     /* -------------------------------------------------------------------- */
138     _nassert((int)x % 8 == 0);
139     _nassert((int)y % 8 == 0);
140     _nassert((int)r % 8 == 0);
141     _nassert(nbR1 % 2 == 0);
142     _nassert(nbR1 >= 4);
144     for (i = 0; i < nbR1*2; i+=4) {
146         _nassert(nbC2 % 2 == 0);
147         _nassert(nbC2 >= 4);
149         for (j = 0; j < nbC2*2; j+=4) {
150             sum_re0 = 0; sum_im0 = 0;
151             sum_re1 = 0; sum_im1 = 0;
152             sum_re2 = 0; sum_im2 = 0;
153             sum_re3 = 0; sum_im3 = 0;
155             _nassert(nbC1R2 % 2 == 0);
156             _nassert(nbC1R2 >= 4);
158             for (k = 0; k < nbC1R2*2; k+=4) {
160                 long long tmp0_x0x1, tmp0_y0y1, tmp1_x0x1, tmp1_y0y1;
161                 long long tmp_p0, tmp_p1, tmp_p2, tmp_p3, tmp_p4, tmp_p5, tmp_p6, tmp_p7;
162                 tmp0_x0x1 = _amem8((void*)&x[k + i*nbC1R2]);
163                 tmp1_x0x1 = _amem8((void*)&x[k + ((i+2)*nbC1R2)]);
164                 tmp0_y0y1 = _amem8((void*)&y[j + k*nbC2]);
165                 tmp1_y0y1 = _amem8((void*)&y[j + ((k+2)*nbC2)]);
166                 
167                 tmp_p0    = _cmpy(_loll(tmp0_x0x1), _loll(tmp0_y0y1));
168                 tmp_p1    = _cmpy(_hill(tmp0_x0x1), _loll(tmp1_y0y1));
169                 tmp_p2    = _cmpy(_loll(tmp0_x0x1), _hill(tmp0_y0y1));
170                 tmp_p3    = _cmpy(_hill(tmp0_x0x1), _hill(tmp1_y0y1)); 
171                 tmp_p4    = _cmpy(_loll(tmp1_x0x1), _loll(tmp0_y0y1));
172                 tmp_p5    = _cmpy(_hill(tmp1_x0x1), _loll(tmp1_y0y1));
173                 tmp_p6    = _cmpy(_loll(tmp1_x0x1), _hill(tmp0_y0y1));
174                 tmp_p7    = _cmpy(_hill(tmp1_x0x1), _hill(tmp1_y0y1));
176                 sum_re0   = sum_re0 + _hill(tmp_p0) + _hill(tmp_p1);
177                 sum_im0   = sum_im0 + _loll(tmp_p0) + _loll(tmp_p1);
178                 sum_re1   = sum_re1 + _hill(tmp_p2) + _hill(tmp_p3);
179                 sum_im1   = sum_im1 + _loll(tmp_p2) + _loll(tmp_p3);
180                 sum_re2   = sum_re2 + _hill(tmp_p4) + _hill(tmp_p5);
181                 sum_im2   = sum_im2 + _loll(tmp_p4) + _loll(tmp_p5);
182                 sum_re3   = sum_re3 + _hill(tmp_p6) + _hill(tmp_p7);
183                 sum_im3   = sum_im3 + _loll(tmp_p6) + _loll(tmp_p7);
185             }
186             _amem8((void*)&r[j + i*nbC2])     = _itoll(_pack2(sum_re1 >> qs, sum_im1 >> qs), 
187                                                        _pack2(sum_re0 >> qs, sum_im0 >> qs));
188             _amem8((void*)&r[j + (i+2)*nbC2]) = _itoll(_pack2(sum_re3 >> qs, sum_im3 >> qs),
189                                                        _pack2(sum_re2 >> qs, sum_im2 >> qs));
190         }
191     }
193 /*-----------------------------------------------------------*/
194 /*  Big Endian version                                       */
195 /*-----------------------------------------------------------*/
196 #else
197 void DSP_mat_mul_cplx (
198     const short *restrict x, int r1, int c1,
199     const short *restrict y,         int c2,
200     short       *restrict r,
201     int                   qs
204     int i, j, k;
205     int sum_re0, sum_im0, sum_re1, sum_im1, sum_re2, sum_im2, sum_re3, sum_im3;
206     int nbR1, nbC1R2, nbC2;
207   
208 #ifdef FIXED_SIZES
209     nbR1 = SIZE;
210     nbC1R2 = SIZE;
211     nbC2 = SIZE;
212   
213     _nassert(r1 == SIZE);
214     _nassert(c1 == SIZE);
215     _nassert(c2 == SIZE);
216 #else
217     nbR1 = r1;
218     nbC1R2 = c1;
219     nbC2 = c2;
220 #endif
222     /* -------------------------------------------------------------------- */
223     /*  Multiply each row in x by each column in y.  The product of row m   */
224     /*  in x and column n in y is placed in position (m,n) in the result.   */
225     /* -------------------------------------------------------------------- */
226     _nassert((int)x % 8 == 0);
227     _nassert((int)y % 8 == 0);
228     _nassert((int)r % 8 == 0);
229     _nassert(nbR1 % 2 == 0);
230     _nassert(nbR1 >= 4);
232     for (i = 0; i < nbR1*2; i+=4) {
234         _nassert(nbC2 % 2 == 0);
235         _nassert(nbC2 >= 4);
237         for (j = 0; j < nbC2*2; j+=4) {
238             sum_re0 = 0; sum_im0 = 0;
239             sum_re1 = 0; sum_im1 = 0;
240             sum_re2 = 0; sum_im2 = 0;
241             sum_re3 = 0; sum_im3 = 0;
243             _nassert(nbC1R2 % 2 == 0);
244             _nassert(nbC1R2 >= 4);
246             for (k = 0; k < nbC1R2*2; k+=4) {
248                 long long tmp0_x1x0, tmp0_y1y0, tmp1_x1x0, tmp1_y1y0;
249                 long long tmp_p0, tmp_p1, tmp_p2, tmp_p3, tmp_p4, tmp_p5, tmp_p6, tmp_p7;
251                 tmp0_x1x0   = _amem8((void*)&x[k + i*nbC1R2]);
252                 tmp1_x1x0   = _amem8((void*)&x[k + ((i+2)*nbC1R2)]);
253                 tmp0_y1y0   = _amem8((void*)&y[j + k*nbC2]);
254                 tmp1_y1y0   = _amem8((void*)&y[j + ((k+2)*nbC2)]);
255                 
256                 tmp_p0      = _cmpy(_loll(tmp0_x1x0), _hill(tmp1_y1y0));
257                 tmp_p1      = _cmpy(_hill(tmp0_x1x0), _hill(tmp0_y1y0));
258                 tmp_p2      = _cmpy(_loll(tmp0_x1x0), _loll(tmp1_y1y0));
259                 tmp_p3      = _cmpy(_hill(tmp0_x1x0), _loll(tmp0_y1y0)); 
260                 tmp_p4      = _cmpy(_loll(tmp1_x1x0), _hill(tmp1_y1y0));
261                 tmp_p5      = _cmpy(_hill(tmp1_x1x0), _hill(tmp0_y1y0));
262                 tmp_p6      = _cmpy(_loll(tmp1_x1x0), _loll(tmp1_y1y0));
263                 tmp_p7      = _cmpy(_hill(tmp1_x1x0), _loll(tmp0_y1y0));
265                 sum_im0     =  sum_im0 + _hill(tmp_p0) + _hill(tmp_p1);
266                 sum_re0     =  sum_re0 + _loll(tmp_p0) + _loll(tmp_p1);
267                 sum_im1     =  sum_im1 + _hill(tmp_p2) + _hill(tmp_p3);
268                 sum_re1     =  sum_re1 + _loll(tmp_p2) + _loll(tmp_p3);
269                 sum_im2     =  sum_im2 + _hill(tmp_p4) + _hill(tmp_p5);
270                 sum_re2     =  sum_re2 + _loll(tmp_p4) + _loll(tmp_p5);
271                 sum_im3     =  sum_im3 + _hill(tmp_p6) + _hill(tmp_p7);
272                 sum_re3     =  sum_re3 + _loll(tmp_p6) + _loll(tmp_p7);
274             }
275             _amem8((void*)&r[j + i*nbC2])     = _itoll(_pack2(sum_re0 >> qs, (-sum_im0) >> qs), 
276                                                        _pack2(sum_re1 >> qs, (-sum_im1) >> qs));
277             _amem8((void*)&r[j + (i+2)*nbC2]) = _itoll(_pack2(sum_re2 >> qs, (-sum_im2) >> qs),
278                                                        _pack2(sum_re3 >> qs, (-sum_im3) >> qs));
279         }
280     }
282 #endif
283 /* ======================================================================== */
284 /*  End of file:  DSP_mat_mul_cplx.c                                        */
285 /* ------------------------------------------------------------------------ */
286 /*            Copyright (c) 2011 Texas Instruments, Incorporated.           */
287 /*                           All Rights Reserved.                           */
288 /* ======================================================================== */