]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - ep-processor-libraries/dsplib.git/blob - ti/dsplib/src/DSP_minerror/c66/DSP_minerror.c
DSPLIB: optimized signal processing functions for TI DSPs
[ep-processor-libraries/dsplib.git] / ti / dsplib / src / DSP_minerror / c66 / DSP_minerror.c
1 /* ======================================================================= */
2 /*  TEXAS INSTRUMENTS, INC.                                                */
3 /*                                                                         */
4 /*  DSPLIB  DSP Signal Processing Library                                  */
5 /*                                                                         */
6 /*  This library contains proprietary intellectual property of Texas       */
7 /*  Instruments, Inc.  The library and its source code are protected by    */
8 /*  various copyrights, and portions may also be protected by patents or   */
9 /*  other legal protections.                                               */
10 /*                                                                         */
11 /*  This software is licensed for use with Texas Instruments TMS320        */
12 /*  family DSPs.  This license was provided to you prior to installing     */
13 /*  the software.  You may review this license by consulting the file      */
14 /*  TI_license.PDF which accompanies the files in this library.            */
15 /*                                                                         */
16 /* ----------------------------------------------------------------------- */
17 /*                                                                         */
18 /* DSP_minerror.c -- Minimum Energy Error Search                           */
19 /*                   Optimized C Implementation (w/ Intrinsics)            */
20 /*                                                                         */
21 /* Rev 0.0.1                                                               */
22 /*                                                                         */
23 /*  Usage                                                                  */
24 /*     This routine is C-callable and can be called as:                    */
25 /*                                                                         */
26 /*     void DSP_minerror (                                                 */
27 /*         const short *restrict GSP0_TABLE,                               */
28 /*         const short *restrict errCoefs,                                 */
29 /*         int         *restrict max_index                                 */
30 /*     )                                                                   */
31 /*                                                                         */
32 /*     GSP0_TABLE[256*9] :  Pointer to GSP0 terms array.                   */
33 /*                          Must be double-word aligned.                   */
34 /*     errCoefs[9]       :  Array of error coefficients.                   */
35 /*     max_index         :  Index to GSP0_TABLE[max_index], the first      */
36 /*                          element of the 9-element vector that resulted  */
37 /*                          in the maximum dot product.                    */
38 /*     return int        :  Maximum dot product result.                    */
39 /*                                                                         */
40 /*  Description                                                            */
41 /*      Performs a dot product on 256 pairs of 9 element vectors and       */
42 /*      searches for the pair of vectors which produces the maximum dot    */
43 /*      product result and returns the value of the highest dot product.   */
44 /*      This is a large part of the VSELP vocoder codebook search.         */
45 /*                                                                         */
46 /*  Assumptions                                                            */
47 /*     Arrays GSP0_TABLE and errCoefs                                      */
48 /*                                                                         */
49 /* Copyright (C) 2011 Texas Instruments Incorporated - http://www.ti.com/  */ 
50 /*                                                                         */
51 /*                                                                         */
52 /*  Redistribution and use in source and binary forms, with or without     */
53 /*  modification, are permitted provided that the following conditions     */
54 /*  are met:                                                               */
55 /*                                                                         */
56 /*    Redistributions of source code must retain the above copyright       */
57 /*    notice, this list of conditions and the following disclaimer.        */
58 /*                                                                         */
59 /*    Redistributions in binary form must reproduce the above copyright    */
60 /*    notice, this list of conditions and the following disclaimer in the  */
61 /*    documentation and/or other materials provided with the               */
62 /*    distribution.                                                        */
63 /*                                                                         */
64 /*    Neither the name of Texas Instruments Incorporated nor the names of  */
65 /*    its contributors may be used to endorse or promote products derived  */
66 /*    from this software without specific prior written permission.        */
67 /*                                                                         */
68 /*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS    */
69 /*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT      */
70 /*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  */
71 /*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT   */
72 /*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,  */
73 /*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT       */
74 /*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,  */
75 /*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY  */
76 /*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT    */
77 /*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE  */
78 /*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.   */
79 /*                                                                         */
80 /* ======================================================================= */
82 #pragma CODE_SECTION(DSP_minerror, ".text:optimized");
84 #include "DSP_minerror.h"
86 #define GSP0_TERMS 9
87 #define GSP0_NUM 256
89 #ifdef _LITTLE_ENDIAN
90 int DSP_minerror (
91     const short *restrict GSP0_TABLE, /* Pointer to GSP0 terms array          */
92     const short *restrict errCoefs,   /* Array of error coefficients          */
93     int *restrict max_index           /* Index to the 9-element vector that   */
94                                       /* resulted in the maximum dot product  */
95 )
96 {
97     const short *Table_ptr_a, *Table_ptr_b;
99     int i, maxVal;
100     int cnt, c, dist, val0, val1, val2, val3;
101     int it_i, save;
103     unsigned int c76, c54, c32, c10;
104     unsigned int c87, c65, c43, c21;
105     unsigned int c08;
107     unsigned int tb0_76, tb0_54, tb0_32, tb0_10;
108     unsigned int tb1_87, tb1_65, tb1_43, tb1_21;
109     unsigned int tb2_76, tb2_54, tb2_32, tb2_10;
110     unsigned int tb3_87, tb3_65, tb3_43, tb3_21;
111     unsigned int tb32_08, tb10_08;
113     double table_dword0, table_dword1, table_dword2;
114     double table_dword3, table_dword4, table_dword5;
115     double table_dword6, table_dword7, table_dword8;
116     double tbdwd0, tbdwd1;
117     double coef_7654, coef_3210;
119     /*------------------------------------------------------------*/
120     /* Use non-aligned loads to load in various packed combina-   */
121     /* tions of the coefficients and extract them, using _lo      */
122     /* and _hi intrinsic.                                         */
123     /*------------------------------------------------------------*/
124     coef_3210 = _amemd8_const(&errCoefs[0]);
125     coef_7654 = _amemd8_const(&errCoefs[4]);
126     c87 = _mem4_const(&errCoefs[7]);
128     c10 = _lo(coef_3210);
129     c32 = _hi(coef_3210);
130     c54 = _lo(coef_7654);
131     c76 = _hi(coef_7654);
133     c21 = _packlh2(c32, c10);
134     c43 = _packlh2(c54, c32);
135     c65 = _packlh2(c76, c54);
137     /*-----------------------------------------------------------*/
138     /* Obtain a pointer to the GSP0_TABLE, by obtaining the add- */
139     /* ress of the first element. It is preferrrable to linear-  */
140     /* ze the stride into the array, into a 1D array.            */
141     /*-----------------------------------------------------------*/
142     Table_ptr_a = &GSP0_TABLE[0];
143     Table_ptr_b = Table_ptr_a + 4;
145     /*------------------------------------------------------------*/
146     /* cnt, contains the offset from the starting location.       */
147     /* dist, contains the offset in bytes between succesive locns */
148     /* "c08" packed coefficient of c0 and c8                      */
149     /*------------------------------------------------------------*/
150     cnt = -GSP0_TERMS;
151     c = 0;
152     dist = GSP0_TERMS;
153     c08 = _packlh2(c10, c87);
155     /*------------------------------------------------------------*/
156     /* Since four rows are processed simultaneously, the # iter-  */
157     /* ataions of the loop is GSP0_NUM /4, 64 iterations. Init-   */
158     /* ialize the maxVal to a negative quantity. Set "save" to    */
159     /* "0" as initial value.                                      */
160     /*------------------------------------------------------------*/
161     it_i = GSP0_NUM;
162     maxVal = -32767;    /* Min val for ints */
163     save = 0;
165     for (i = 0; i < it_i; i += 4) {
166         /*-----------------------------------------------------------*/
167         /* The use of twin pointers helps to parallelize the loads.  */
168         /* Load GSP0_TABLE values for the first "4" rows and obt-    */
169         /* ain the individual values using the _lo and _hi intr-     */
170         /* insic instructions.                                       */
171         /*-----------------------------------------------------------*/
172         table_dword0 = _amemd8_const(Table_ptr_a);
173         Table_ptr_a += 8;
175         table_dword1 = _amemd8_const(Table_ptr_b);
176         Table_ptr_b += 8;
178         table_dword2 = _amemd8_const(Table_ptr_a);
179         Table_ptr_a += 8;
181         table_dword3 = _amemd8_const(Table_ptr_b);
182         Table_ptr_b += 8;
184         table_dword4 = _amemd8_const(Table_ptr_a);
185         Table_ptr_a += 8;
187         table_dword5 = _amemd8_const(Table_ptr_b);
188         Table_ptr_b += 8;
190         table_dword6 = _amemd8_const(Table_ptr_a);
191         Table_ptr_a += 8;
193         table_dword7 = _amemd8_const(Table_ptr_b);
194         Table_ptr_b += 8;
196         table_dword8 = _amemd8_const(Table_ptr_a);
198         /*-----------------------------------------------------------*/
199         /* Use exchange of pointers, after loading the data as only  */
200         /* 9 double words have been loaded.                          */
201         /*-----------------------------------------------------------*/
202         Table_ptr_a = Table_ptr_b;
203         Table_ptr_b = Table_ptr_a + 4;
205         /*-----------------------------------------------------------*/
206         /* Obtain GSP0_TABLE entries for row "4i"                    */
207         /*-----------------------------------------------------------*/
208         tb0_10 = _lo(table_dword0);
209         tb0_32 = _hi(table_dword0);
210         tb0_54 = _lo(table_dword1);
211         tb0_76 = _hi(table_dword1);
213         /*-----------------------------------------------------------*/
214         /* Obtain GSP0_TABLE entries for row "4i + 1"                */
215         /*-----------------------------------------------------------*/
216         tb10_08 = _lo(table_dword2);
217         tb1_21 = _hi(table_dword2);
218         tb1_43 = _lo(table_dword3);
219         tb1_65 = _hi(table_dword3);
220         tb1_87 = _lo(table_dword4);
222         /*-----------------------------------------------------------*/
223         /* Obtain "GSP0_TABLE" entries for row "4i + 2"              */
224         /*-----------------------------------------------------------*/
225         tb2_10 = _hi(table_dword4);
226         tb2_32 = _lo(table_dword5);
227         tb2_54 = _hi(table_dword5);
228         tb2_76 = _lo(table_dword6);
230         /*-----------------------------------------------------------*/
231         /* Obtain "GSP0_TABLE" entries for row "4i + 3"              */
232         /*-----------------------------------------------------------*/
233         tb32_08 = _hi(table_dword6);
234         tb3_21 = _lo(table_dword7);
235         tb3_43 = _hi(table_dword7);
236         tb3_65 = _lo(table_dword8);
237         tb3_87 = _hi(table_dword8);
239         /*---------------------------------------------------------*/
240         /* Perform the first eight multiplies of any row using     */
241         /* _dotp2's. The last multiply is performed as "mpy2"      */
242         /* because it also contains the data for the next row.     */
243         /* This prevents the results from being added together     */
244         /* after multiplication. The low half, result of the       */
245         /* double word contains result for row 0, high half        */
246         /* contains result for row 1. Simultaneously perform       */
247         /* accumulation for all four rows.                         */
248         /*---------------------------------------------------------*/
249         tbdwd0 = _mpy2(tb10_08, c08);
251         val0 = _dotp2(tb0_10, c10);
252         val0 += _dotp2(tb0_32, c32);
253         val0 += _dotp2(tb0_54, c54);
254         val0 += _dotp2(tb0_76, c76);
255         val0 += _lo(tbdwd0);
257         val1 = _hi(tbdwd0);
258         val1 += _dotp2(tb1_21, c21);
259         val1 += _dotp2(tb1_43, c43);
260         val1 += _dotp2(tb1_65, c65);
261         val1 += _dotp2(tb1_87, c87);
263         /*--------------------------------------------------------*/
264         /* Perform similar set of operations for row 2 and 3      */
265         /*--------------------------------------------------------*/
266         tbdwd1 = _mpy2(tb32_08, c08);
268         val2 = _dotp2(tb2_10, c10);
269         val2 += _dotp2(tb2_32, c32);
270         val2 += _dotp2(tb2_54, c54);
271         val2 += _dotp2(tb2_76, c76);
272         val2 += _lo(tbdwd1);
274         val3 = _hi(tbdwd1);
275         val3 += _dotp2(tb3_21, c21);
276         val3 += _dotp2(tb3_43, c43);
277         val3 += _dotp2(tb3_65, c65);
278         val3 += _dotp2(tb3_87, c87);
280         /*--------------------------------------------------------*/
281         /* Compare against existing maximum, and change if accum. */
282         /* is larger than existing maximum.                       */
283         /*--------------------------------------------------------*/
284         c = (val0 > maxVal) ? 1 : 0;
285         cnt = cnt + dist;
286         if (c) maxVal = val0;
287         if (c) save   = cnt;
289         c = (val1 > maxVal) ? 1 : 0;
290         cnt = cnt + dist;
291         if (c) maxVal = val1;
292         if (c) save   = cnt;
294         c = (val2 > maxVal) ? 1 : 0;
295         cnt = cnt + dist;
296         if (c) maxVal = val2;
297         if (c) save   = cnt;
299         c = (val3 > maxVal) ? 1 : 0;
300         cnt = cnt + dist;
301         if (c) maxVal = val3;
302         if (c) save   = cnt;
303     }
304     /*------------------------------------------------------------*/
305     /* Return saved index and the maximum value found, this is    */
306     /* the one with the least error.                              */
307     /*------------------------------------------------------------*/
308     *max_index = save;
309     return (maxVal);
311 #else
312 int DSP_minerror (
313     const short *restrict GSP0_TABLE, /* Pointer to GSP0 terms array          */
314     const short *restrict errCoefs,   /* Array of error coefficients          */
315     int *restrict max_index           /* Index to the 9-element vector that   */
316                                       /* resulted in the maximum dot product  */
319     const short *Table_ptr_a, *Table_ptr_b;
321     int i, maxVal;
322     int cnt, c, dist, val0, val1, val2, val3;
323     int it_i, save;
325     unsigned int c67, c45, c23, c01;
326     unsigned int c78, c56, c34, c12;
327     unsigned int c80;
329     unsigned int tb0_67, tb0_45, tb0_23, tb0_01;
330     unsigned int tb1_78, tb1_56, tb1_34, tb1_12;
331     unsigned int tb2_67, tb2_45, tb2_23, tb2_01;
332     unsigned int tb3_78, tb3_56, tb3_34, tb3_12;
333     unsigned int tb23_80, tb01_80;
335     double table_dword0, table_dword1, table_dword2;
336     double table_dword3, table_dword4, table_dword5;
337     double table_dword6, table_dword7, table_dword8;
338     double tbdwd0, tbdwd1;
339     double coef_4567, coef_0123;
341     /*------------------------------------------------------------*/
342     /* Use non-aligned loads to load in various packed combina-   */
343     /* tions of the coefficients and extract them, using _lo      */
344     /* and _hi intrinsic.                                         */
345     /*------------------------------------------------------------*/
346     coef_0123 = _amemd8_const(&errCoefs[0]);
347     coef_4567 = _amemd8_const(&errCoefs[4]);
348     c78 = _mem4_const(&errCoefs[7]);
350     c23 = _lo(coef_0123);
351     c01 = _hi(coef_0123);
352     c67 = _lo(coef_4567);
353     c45 = _hi(coef_4567);
355     c12 = _packlh2(c01, c23);
356     c34 = _packlh2(c23, c45);
357     c56 = _packlh2(c45, c67);
359     /*-----------------------------------------------------------*/
360     /* Obtain a pointer to the GSP0_TABLE, by obtaining the add- */
361     /* ress of the first element. It is preferrrable to linear-  */
362     /* ze the stride into the array, into a 1D array.            */
363     /*-----------------------------------------------------------*/
364     Table_ptr_a = &GSP0_TABLE[0];
365     Table_ptr_b = Table_ptr_a + 4;
367     /*------------------------------------------------------------*/
368     /* cnt, contains the offset from the starting location.       */
369     /* dist, contains the offset in bytes between succesive locns */
370     /* "c08" packed coefficient of c0 and c8                      */
371     /*------------------------------------------------------------*/
372     cnt = -GSP0_TERMS;
373     c = 0;
374     dist = GSP0_TERMS;
375     c80 = _packlh2(c78, c01);
377     /*------------------------------------------------------------*/
378     /* Since four rows are processed simultaneously, the # iter-  */
379     /* ataions of the loop is GSP0_NUM /4, 64 iterations. Init-   */
380     /* ialize the maxVal to a negative quantity. Set "save" to    */
381     /* "0" as initial value.                                      */
382     /*------------------------------------------------------------*/
383     it_i = GSP0_NUM;
384     maxVal = -32767;    /* Min val for ints */
385     save = 0;
387     for (i = 0; i < it_i; i += 4) {
388         /*-----------------------------------------------------------*/
389         /* The use of twin pointers helps to parallelize the loads.  */
390         /* Load GSP0_TABLE values for the first "4" rows and obt-    */
391         /* ain the individual values using the _lo and _hi intr-     */
392         /* insic instructions.                                       */
393         /*-----------------------------------------------------------*/
394         table_dword0 = _amemd8_const(Table_ptr_a);
395         Table_ptr_a += 8;
397         table_dword1 = _amemd8_const(Table_ptr_b);
398         Table_ptr_b += 8;
400         table_dword2 = _amemd8_const(Table_ptr_a);
401         Table_ptr_a += 8;
403         table_dword3 = _amemd8_const(Table_ptr_b);
404         Table_ptr_b += 8;
406         table_dword4 = _amemd8_const(Table_ptr_a);
407         Table_ptr_a += 8;
409         table_dword5 = _amemd8_const(Table_ptr_b);
410         Table_ptr_b += 8;
412         table_dword6 = _amemd8_const(Table_ptr_a);
413         Table_ptr_a += 8;
415         table_dword7 = _amemd8_const(Table_ptr_b);
416         Table_ptr_b += 8;
418         table_dword8 = _amemd8_const(Table_ptr_a);
420         /*-----------------------------------------------------------*/
421         /* Use exchange of pointers, after loading the data as only  */
422         /* 9 double words have been loaded.                          */
423         /*-----------------------------------------------------------*/
424         Table_ptr_a = Table_ptr_b;
425         Table_ptr_b = Table_ptr_a + 4;
427         /*-----------------------------------------------------------*/
428         /* Obtain GSP0_TABLE entries for row "4i"                    */
429         /*-----------------------------------------------------------*/
430         tb0_01 = _hi(table_dword0);
431         tb0_23 = _lo(table_dword0);
432         tb0_45 = _hi(table_dword1);
433         tb0_67 = _lo(table_dword1);
434         /*-----------------------------------------------------------*/
435         /* Obtain GSP0_TABLE entries for row "4i + 1"                */
436         /*-----------------------------------------------------------*/
437         tb01_80 = _hi(table_dword2);
438         tb1_12 = _lo(table_dword2);
439         tb1_34 = _hi(table_dword3);
440         tb1_56 = _lo(table_dword3);
441         tb1_78 = _hi(table_dword4);
443         /*-----------------------------------------------------------*/
444         /* Obtain "GSP0_TABLE" entries for row "4i + 2"              */
445         /*-----------------------------------------------------------*/
446         tb2_01 = _lo(table_dword4);
447         tb2_23 = _hi(table_dword5);
448         tb2_45 = _lo(table_dword5);
449         tb2_67 = _hi(table_dword6);
451         /*-----------------------------------------------------------*/
452         /* Obtain "GSP0_TABLE" entries for row "4i + 3"              */
453         /*-----------------------------------------------------------*/
454         tb23_80 = _lo(table_dword6);
455         tb3_12 = _hi(table_dword7);
456         tb3_34 = _lo(table_dword7);
457         tb3_56 = _hi(table_dword8);
458         tb3_78 = _lo(table_dword8);
460         /*---------------------------------------------------------*/
461         /* Perform the first eight multiplies of any row using     */
462         /* _dotp2's. The last multiply is performed as "mpy2"      */
463         /* because it also contains the data for the next row.     */
464         /* This prevents the results from being added together     */
465         /* after multiplication. The low half, result of the       */
466         /* double word contains result for row 0, high half        */
467         /* contains result for row 1. Simultaneously perform       */
468         /* accumulation for all four rows.                         */
469         /*---------------------------------------------------------*/
470         tbdwd0 = _mpy2(tb01_80, c80);
472         val0 = _dotp2(tb0_01, c01);
473         val0 += _dotp2(tb0_23, c23);
474         val0 += _dotp2(tb0_45, c45);
475         val0 += _dotp2(tb0_67, c67);
476         val0 += _hi(tbdwd0);
478         val1 = _lo(tbdwd0);
479         val1 += _dotp2(tb1_12, c12);
480         val1 += _dotp2(tb1_34, c34);
481         val1 += _dotp2(tb1_56, c56);
482         val1 += _dotp2(tb1_78, c78);
484         /*--------------------------------------------------------*/
485         /* Perform similar set of operations for row 2 and 3      */
486         /*--------------------------------------------------------*/
487         tbdwd1 = _mpy2(tb23_80, c80);
489         val2 = _dotp2(tb2_01, c01);
490         val2 += _dotp2(tb2_23, c23);
491         val2 += _dotp2(tb2_45, c45);
492         val2 += _dotp2(tb2_67, c67);
493         val2 += _hi(tbdwd1);
495         val3 = _lo(tbdwd1);
496         val3 += _dotp2(tb3_12, c12);
497         val3 += _dotp2(tb3_34, c34);
498         val3 += _dotp2(tb3_56, c56);
499         val3 += _dotp2(tb3_78, c78);
501         /*--------------------------------------------------------*/
502         /* Compare against existing maximum, and change if accum. */
503         /* is larger than existing maximum.                       */
504         /*--------------------------------------------------------*/
505         c = (val0 > maxVal) ? 1 : 0;
506         cnt = cnt + dist;
507         if (c) maxVal = val0;
508         if (c) save   = cnt;
510         c = (val1 > maxVal) ? 1 : 0;
511         cnt = cnt + dist;
512         if (c) maxVal = val1;
513         if (c) save   = cnt;
515         c = (val2 > maxVal) ? 1 : 0;
516         cnt = cnt + dist;
517         if (c) maxVal = val2;
518         if (c) save   = cnt;
520         c = (val3 > maxVal) ? 1 : 0;
521         cnt = cnt + dist;
522         if (c) maxVal = val3;
523         if (c) save   = cnt;
524     }
525     /*------------------------------------------------------------*/
526     /* Return saved index and the maximum value found, this is    */
527     /* the one with the least error.                              */
528     /*------------------------------------------------------------*/
529     *max_index = save;
530     return (maxVal);
532 #endif
533 /* ======================================================================= */
534 /*  End of file:  DSP_minerror.c                                           */
535 /* ----------------------------------------------------------------------- */
536 /*            Copyright (c) 2011 Texas Instruments, Incorporated.          */
537 /*                           All Rights Reserved.                          */
538 /* ======================================================================= */