ti/dsplib/src/DSP_mat_mul_cplx/c64P/DSP_mat_mul_cplx.c

   1 /* ======================================================================= */
   2 /*  TEXAS INSTRUMENTS, INC.                                                */
   3 /*                                                                         */
   4 /*  DSPLIB  DSP Signal Processing Library                                  */
   5 /*                                                                         */
   6 /*  This library contains proprietary intellectual property of Texas       */
   7 /*  Instruments, Inc.  The library and its source code are protected by    */
   8 /*  various copyrights, and portions may also be protected by patents or   */
   9 /*  other legal protections.                                               */
  10 /*                                                                         */
  11 /*  This software is licensed for use with Texas Instruments TMS320        */
  12 /*  family DSPs.  This license was provided to you prior to installing     */
  13 /*  the software.  You may review this license by consulting the file      */
  14 /*  TI_license.PDF which accompanies the files in this library.            */
  15 /*                                                                         */
  16 /* ----------------------------------------------------------------------- */
  17 /*                                                                         */
  18 /* DSP_mat_mul_cplx.c -- Perform Matrix Multiplication                     */
  19 /*                  Intrinsic C Implementation                             */
  20 /*                                                                         */
  21 /* Rev 0.0.1                                                               */
  22 /*                                                                         */
  23 /*  USAGE                                                                  */
  24 /*      This routine is C-callable and can be called as:                   */
  25 /*                                                                         */
  26 /*          void DSP_mat_mul_cplx (                                        */
  27 /*              const short *restrict x, int r1, int c1,                   */
  28 /*              const short *restrict y,         int c2,                   */
  29 /*              short       *restrict r,                                   */
  30 /*              int                   qs                                   */
  31 /*          );                                                             */
  32 /*                                                                         */
  33 /*      x  == Pointer to r1 by c1 input matrix.                            */
  34 /*      y  == Pointer to c1 by c2 input matrix.                            */
  35 /*      r  == Pointer to r1 by c2 output matrix.                           */
  36 /*                                                                         */
  37 /*      r1 == Number of rows in x.                                         */
  38 /*      c1 == Number of columns in x.  Also number of rows in y.           */
  39 /*      c2 == Number of columns in y.                                      */
  40 /*                                                                         */
  41 /*      qs == Final right-shift to apply to the result.                    */
  42 /*                                                                         */
  43 /*  DESCRIPTION                                                            */
  44 /*      This function computes the expression "r = x * y" for the          */
  45 /*      matrices x and y.  The columnar dimension of x must match          */
  46 /*      the row dimension of y.  The resulting matrix has the same         */
  47 /*      number of rows as x and the same number of columns as y.           */
  48 /*                                                                         */
  49 /*      The values stored in the matrices are assumed to be fixed-point    */
  50 /*      or integer values.  All intermediate sums are retained to 32-bit   */
  51 /*      precision, and no overflow checking is performed.  The results     */
  52 /*      are right-shifted by a user-specified amount, and then truncated   */
  53 /*      to 16 bits.                                                        */
  54 /*                                                                         */
  55 /*      This code is suitable for dense matrices.  No optimizations are    */
  56 /*      made for sparse matrices.                                          */
  57 /*                                                                         */
  58 /*  ASSUMPTIONS                                                            */
  59 /*      The arrays 'x', 'y', and 'r' are stored in distinct arrays.  That  */
  60 /*      is, in-place processing is not allowed.                            */
  61 /*                                                                         */
  62 /*  Requirements:                                                          */
  63 /*  R1   min = 4, Factor 2                                                 */
  64 /*  c1   min = 4, Factor 2                                                 */
  65 /*  C2   min = 4, Factor 2                                                 */
  66 /*                                                                         */
  67 /* Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/  */
  68 /*                                                                         */
  69 /*                                                                         */
  70 /*  Redistribution and use in source and binary forms, with or without     */
  71 /*  modification, are permitted provided that the following conditions     */
  72 /*  are met:                                                               */
  73 /*                                                                         */
  74 /*    Redistributions of source code must retain the above copyright       */
  75 /*    notice, this list of conditions and the following disclaimer.        */
  76 /*                                                                         */
  77 /*    Redistributions in binary form must reproduce the above copyright    */
  78 /*    notice, this list of conditions and the following disclaimer in the  */
  79 /*    documentation and/or other materials provided with the               */
  80 /*    distribution.                                                        */
  81 /*                                                                         */
  82 /*    Neither the name of Texas Instruments Incorporated nor the names of  */
  83 /*    its contributors may be used to endorse or promote products derived  */
  84 /*    from this software without specific prior written permission.        */
  85 /*                                                                         */
  86 /*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    */
  87 /*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      */
  88 /*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  */
  89 /*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT   */
  90 /*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,  */
  91 /*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT       */
  92 /*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  */
  93 /*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  */
  94 /*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT    */
  95 /*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  */
  96 /*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.   */
  97 /*                                                                         */
  98 /* ======================================================================= */
  99
 100 #pragma CODE_SECTION(DSP_mat_mul_cplx, ".text:optimized");
 101
 102 #include "DSP_mat_mul_cplx.h"
 103
 104 /* For fixed-size implementation, use these defines */
 105 /* #define FIXED_SIZES */
 106 /* #define SIZE (16)   */
 107
 108 #ifdef _LITTLE_ENDIAN
 109 void DSP_mat_mul_cplx (
 110     const short *restrict x, int r1, int c1,
 111     const short *restrict y,         int c2,
 112     short       *restrict r,
 113     int                   qs
 114 )
 115 {
 116     int i, j, k;
 117     int sum_re0, sum_im0, sum_re1, sum_im1, sum_re2, sum_im2, sum_re3, sum_im3;
 118     int nbR1, nbC1R2, nbC2;
 119
 120 #ifdef FIXED_SIZES
 121     nbR1 = SIZE;
 122     nbC1R2 = SIZE;
 123     nbC2 = SIZE;
 124
 125     _nassert(r1 == SIZE);
 126     _nassert(c1 == SIZE);
 127     _nassert(c2 == SIZE);
 128 #else
 129     nbR1 = r1;
 130     nbC1R2 = c1;
 131     nbC2 = c2;
 132 #endif
 133
 134     /* -------------------------------------------------------------------- */
 135     /*  Multiply each row in x by each column in y.  The product of row m   */
 136     /*  in x and column n in y is placed in position (m,n) in the result.   */
 137     /* -------------------------------------------------------------------- */
 138     _nassert((int)x % 8 == 0);
 139     _nassert((int)y % 8 == 0);
 140     _nassert((int)r % 8 == 0);
 141     _nassert(nbR1 % 2 == 0);
 142     _nassert(nbR1 >= 4);
 143
 144     for (i = 0; i < nbR1*2; i+=4) {
 145
 146         _nassert(nbC2 % 2 == 0);
 147         _nassert(nbC2 >= 4);
 148
 149         for (j = 0; j < nbC2*2; j+=4) {
 150             sum_re0 = 0; sum_im0 = 0;
 151             sum_re1 = 0; sum_im1 = 0;
 152             sum_re2 = 0; sum_im2 = 0;
 153             sum_re3 = 0; sum_im3 = 0;
 154
 155             _nassert(nbC1R2 % 2 == 0);
 156             _nassert(nbC1R2 >= 4);
 157
 158             for (k = 0; k < nbC1R2*2; k+=4) {
 159
 160                 long long tmp0_x0x1, tmp0_y0y1, tmp1_x0x1, tmp1_y0y1;
 161                 long long tmp_p0, tmp_p1, tmp_p2, tmp_p3, tmp_p4, tmp_p5, tmp_p6, tmp_p7;
 162                 tmp0_x0x1 = _amem8((void*)&x[k + i*nbC1R2]);
 163                 tmp1_x0x1 = _amem8((void*)&x[k + ((i+2)*nbC1R2)]);
 164                 tmp0_y0y1 = _amem8((void*)&y[j + k*nbC2]);
 165                 tmp1_y0y1 = _amem8((void*)&y[j + ((k+2)*nbC2)]);
 166
 167                 tmp_p0    = _cmpy(_loll(tmp0_x0x1), _loll(tmp0_y0y1));
 168                 tmp_p1    = _cmpy(_hill(tmp0_x0x1), _loll(tmp1_y0y1));
 169                 tmp_p2    = _cmpy(_loll(tmp0_x0x1), _hill(tmp0_y0y1));
 170                 tmp_p3    = _cmpy(_hill(tmp0_x0x1), _hill(tmp1_y0y1));
 171                 tmp_p4    = _cmpy(_loll(tmp1_x0x1), _loll(tmp0_y0y1));
 172                 tmp_p5    = _cmpy(_hill(tmp1_x0x1), _loll(tmp1_y0y1));
 173                 tmp_p6    = _cmpy(_loll(tmp1_x0x1), _hill(tmp0_y0y1));
 174                 tmp_p7    = _cmpy(_hill(tmp1_x0x1), _hill(tmp1_y0y1));
 175
 176                 sum_re0   = sum_re0 + _hill(tmp_p0) + _hill(tmp_p1);
 177                 sum_im0   = sum_im0 + _loll(tmp_p0) + _loll(tmp_p1);
 178                 sum_re1   = sum_re1 + _hill(tmp_p2) + _hill(tmp_p3);
 179                 sum_im1   = sum_im1 + _loll(tmp_p2) + _loll(tmp_p3);
 180                 sum_re2   = sum_re2 + _hill(tmp_p4) + _hill(tmp_p5);
 181                 sum_im2   = sum_im2 + _loll(tmp_p4) + _loll(tmp_p5);
 182                 sum_re3   = sum_re3 + _hill(tmp_p6) + _hill(tmp_p7);
 183                 sum_im3   = sum_im3 + _loll(tmp_p6) + _loll(tmp_p7);
 184
 185             }
 186             _amem8((void*)&r[j + i*nbC2])     = _itoll(_pack2(sum_re1 >> qs, sum_im1 >> qs),
 187                                                        _pack2(sum_re0 >> qs, sum_im0 >> qs));
 188             _amem8((void*)&r[j + (i+2)*nbC2]) = _itoll(_pack2(sum_re3 >> qs, sum_im3 >> qs),
 189                                                        _pack2(sum_re2 >> qs, sum_im2 >> qs));
 190         }
 191     }
 192 }
 193 /*-----------------------------------------------------------*/
 194 /*  Big Endian version                                       */
 195 /*-----------------------------------------------------------*/
 196 #else
 197 void DSP_mat_mul_cplx (
 198     const short *restrict x, int r1, int c1,
 199     const short *restrict y,         int c2,
 200     short       *restrict r,
 201     int                   qs
 202 )
 203 {
 204     int i, j, k;
 205     int sum_re0, sum_im0, sum_re1, sum_im1, sum_re2, sum_im2, sum_re3, sum_im3;
 206     int nbR1, nbC1R2, nbC2;
 207
 208 #ifdef FIXED_SIZES
 209     nbR1 = SIZE;
 210     nbC1R2 = SIZE;
 211     nbC2 = SIZE;
 212
 213     _nassert(r1 == SIZE);
 214     _nassert(c1 == SIZE);
 215     _nassert(c2 == SIZE);
 216 #else
 217     nbR1 = r1;
 218     nbC1R2 = c1;
 219     nbC2 = c2;
 220 #endif
 221
 222     /* -------------------------------------------------------------------- */
 223     /*  Multiply each row in x by each column in y.  The product of row m   */
 224     /*  in x and column n in y is placed in position (m,n) in the result.   */
 225     /* -------------------------------------------------------------------- */
 226     _nassert((int)x % 8 == 0);
 227     _nassert((int)y % 8 == 0);
 228     _nassert((int)r % 8 == 0);
 229     _nassert(nbR1 % 2 == 0);
 230     _nassert(nbR1 >= 4);
 231
 232     for (i = 0; i < nbR1*2; i+=4) {
 233
 234         _nassert(nbC2 % 2 == 0);
 235         _nassert(nbC2 >= 4);
 236
 237         for (j = 0; j < nbC2*2; j+=4) {
 238             sum_re0 = 0; sum_im0 = 0;
 239             sum_re1 = 0; sum_im1 = 0;
 240             sum_re2 = 0; sum_im2 = 0;
 241             sum_re3 = 0; sum_im3 = 0;
 242
 243             _nassert(nbC1R2 % 2 == 0);
 244             _nassert(nbC1R2 >= 4);
 245
 246             for (k = 0; k < nbC1R2*2; k+=4) {
 247
 248                 long long tmp0_x1x0, tmp0_y1y0, tmp1_x1x0, tmp1_y1y0;
 249                 long long tmp_p0, tmp_p1, tmp_p2, tmp_p3, tmp_p4, tmp_p5, tmp_p6, tmp_p7;
 250
 251                 tmp0_x1x0   = _amem8((void*)&x[k + i*nbC1R2]);
 252                 tmp1_x1x0   = _amem8((void*)&x[k + ((i+2)*nbC1R2)]);
 253                 tmp0_y1y0   = _amem8((void*)&y[j + k*nbC2]);
 254                 tmp1_y1y0   = _amem8((void*)&y[j + ((k+2)*nbC2)]);
 255
 256                 tmp_p0      = _cmpy(_loll(tmp0_x1x0), _hill(tmp1_y1y0));
 257                 tmp_p1      = _cmpy(_hill(tmp0_x1x0), _hill(tmp0_y1y0));
 258                 tmp_p2      = _cmpy(_loll(tmp0_x1x0), _loll(tmp1_y1y0));
 259                 tmp_p3      = _cmpy(_hill(tmp0_x1x0), _loll(tmp0_y1y0));
 260                 tmp_p4      = _cmpy(_loll(tmp1_x1x0), _hill(tmp1_y1y0));
 261                 tmp_p5      = _cmpy(_hill(tmp1_x1x0), _hill(tmp0_y1y0));
 262                 tmp_p6      = _cmpy(_loll(tmp1_x1x0), _loll(tmp1_y1y0));
 263                 tmp_p7      = _cmpy(_hill(tmp1_x1x0), _loll(tmp0_y1y0));
 264
 265                 sum_im0     =  sum_im0 + _hill(tmp_p0) + _hill(tmp_p1);
 266                 sum_re0     =  sum_re0 + _loll(tmp_p0) + _loll(tmp_p1);
 267                 sum_im1     =  sum_im1 + _hill(tmp_p2) + _hill(tmp_p3);
 268                 sum_re1     =  sum_re1 + _loll(tmp_p2) + _loll(tmp_p3);
 269                 sum_im2     =  sum_im2 + _hill(tmp_p4) + _hill(tmp_p5);
 270                 sum_re2     =  sum_re2 + _loll(tmp_p4) + _loll(tmp_p5);
 271                 sum_im3     =  sum_im3 + _hill(tmp_p6) + _hill(tmp_p7);
 272                 sum_re3     =  sum_re3 + _loll(tmp_p6) + _loll(tmp_p7);
 273
 274             }
 275             _amem8((void*)&r[j + i*nbC2])     = _itoll(_pack2(sum_re0 >> qs, (-sum_im0) >> qs),
 276                                                        _pack2(sum_re1 >> qs, (-sum_im1) >> qs));
 277             _amem8((void*)&r[j + (i+2)*nbC2]) = _itoll(_pack2(sum_re2 >> qs, (-sum_im2) >> qs),
 278                                                        _pack2(sum_re3 >> qs, (-sum_im3) >> qs));
 279         }
 280     }
 281 }
 282 #endif
 283 /* ======================================================================== */
 284 /*  End of file:  DSP_mat_mul_cplx.c                                        */
 285 /* ------------------------------------------------------------------------ */
 286 /*            Copyright (c) 2011 Texas Instruments, Incorporated.           */
 287 /*                           All Rights Reserved.                           */
 288 /* ======================================================================== */
 289