]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - dense-linear-algebra-libraries/linalg.git/blob - blis/frame/1m/subm/bli_subm_unb_var1.c
Consolidate all git repos of linalg into one.
[dense-linear-algebra-libraries/linalg.git] / blis / frame / 1m / subm / bli_subm_unb_var1.c
1 /*
3    BLIS    
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
7    Copyright (C) 2014, The University of Texas at Austin
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name of The University of Texas at Austin nor the names
18       of its contributors may be used to endorse or promote products
19       derived from this software without specific prior written permission.
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 #define FUNCPTR_T subm_fp
39 typedef void (*FUNCPTR_T)(
40                            doff_t  diagoffx,
41                            diag_t  diagx,
42                            uplo_t  uplox,
43                            trans_t transx,
44                            dim_t   m,
45                            dim_t   n,
46                            void*   x, inc_t rs_x, inc_t cs_x,
47                            void*   y, inc_t rs_y, inc_t cs_y
48                          );
50 // If some mixed datatype functions will not be compiled, we initialize
51 // the corresponding elements of the function array to NULL.
52 #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
53 static FUNCPTR_T GENARRAY2_ALL(ftypes,subm_unb_var1);
54 #else
55 #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
56 static FUNCPTR_T GENARRAY2_EXT(ftypes,subm_unb_var1);
57 #else
58 static FUNCPTR_T GENARRAY2_MIN(ftypes,subm_unb_var1);
59 #endif
60 #endif
63 void bli_subm_unb_var1( obj_t*  x,
64                          obj_t*  y )
65 {
66         num_t     dt_x      = bli_obj_datatype( *x );
67         num_t     dt_y      = bli_obj_datatype( *y );
69         doff_t    diagoffx  = bli_obj_diag_offset( *x );
70         diag_t    diagx     = bli_obj_diag( *x );
71         uplo_t    uplox     = bli_obj_uplo( *x );
72         trans_t   transx    = bli_obj_conjtrans_status( *x );
74         dim_t     m         = bli_obj_length( *y );
75         dim_t     n         = bli_obj_width( *y );
77         inc_t     rs_x      = bli_obj_row_stride( *x );
78         inc_t     cs_x      = bli_obj_col_stride( *x );
79         void*     buf_x     = bli_obj_buffer_at_off( *x );
81         inc_t     rs_y      = bli_obj_row_stride( *y );
82         inc_t     cs_y      = bli_obj_col_stride( *y );
83         void*     buf_y     = bli_obj_buffer_at_off( *y );
85         FUNCPTR_T f;
87         // Index into the type combination array to extract the correct
88         // function pointer.
89         f = ftypes[dt_x][dt_y];
91         // Invoke the function.
92         f( diagoffx,
93            diagx,
94            uplox,
95            transx,
96            m,
97            n,
98            buf_x, rs_x, cs_x,
99            buf_y, rs_y, cs_y );
103 #undef  GENTFUNC2
104 #define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname, kername ) \
106 void PASTEMAC2(chx,chy,varname)( \
107                                  doff_t  diagoffx, \
108                                  diag_t  diagx, \
109                                  uplo_t  uplox, \
110                                  trans_t transx, \
111                                  dim_t   m, \
112                                  dim_t   n, \
113                                  void*   x, inc_t rs_x, inc_t cs_x, \
114                                  void*   y, inc_t rs_y, inc_t cs_y \
115                                ) \
116 { \
117         ctype_x* x_cast     = x; \
118         ctype_y* y_cast     = y; \
119         ctype_x* x1; \
120         ctype_y* y1; \
121         uplo_t   uplox_eff; \
122         conj_t   conjx; \
123         dim_t    n_iter; \
124         dim_t    n_elem, n_elem_max; \
125         inc_t    ldx, incx; \
126         inc_t    ldy, incy; \
127         dim_t    j, i; \
128         dim_t    ij0, n_shift; \
130         if ( bli_zero_dim2( m, n ) ) return; \
132         /* When the diagonal of x is implicitly unit, we first update only the
133            region strictly above or below the diagonal of y, and then update the
134            diagonal of y. */ \
136         /* Set various loop parameters. */ \
137         bli_set_dims_incs_uplo_2m( diagoffx, diagx, transx, \
138                                    uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
139                                    uplox_eff, n_elem_max, n_iter, incx, ldx, incy, ldy, \
140                                    ij0, n_shift ); \
142         if ( bli_is_zeros( uplox_eff ) ) return; \
144         conjx = bli_extract_conj( transx ); \
146         /* Handle dense and upper/lower storage cases separately. */ \
147         if ( bli_is_dense( uplox_eff ) ) \
148         { \
149                 for ( j = 0; j < n_iter; ++j ) \
150                 { \
151                         n_elem = n_elem_max; \
153                         x1     = x_cast + (j  )*ldx + (0  )*incx; \
154                         y1     = y_cast + (j  )*ldy + (0  )*incy; \
156                         PASTEMAC2(chx,chy,kername)( conjx, \
157                                                     n_elem, \
158                                                     x1, incx, \
159                                                     y1, incy ); \
160                 } \
161         } \
162         else \
163         { \
164                 if ( bli_is_upper( uplox_eff ) ) \
165                 { \
166                         for ( j = 0; j < n_iter; ++j ) \
167                         { \
168                                 n_elem = bli_min( n_shift + j + 1, n_elem_max ); \
170                                 x1     = x_cast + (ij0+j  )*ldx + (0  )*incx; \
171                                 y1     = y_cast + (ij0+j  )*ldy + (0  )*incy; \
173                                 PASTEMAC2(chx,chy,kername)( conjx, \
174                                                             n_elem, \
175                                                             x1, incx, \
176                                                             y1, incy ); \
177                         } \
178                 } \
179                 else if ( bli_is_lower( uplox_eff ) ) \
180                 { \
181                         for ( j = 0; j < n_iter; ++j ) \
182                         { \
183                                 i      = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \
184                                 n_elem = n_elem_max - i; \
186                                 x1     = x_cast + (j  )*ldx + (ij0+i  )*incx; \
187                                 y1     = y_cast + (j  )*ldy + (ij0+i  )*incy; \
189                                 PASTEMAC2(chx,chy,kername)( conjx, \
190                                                             n_elem, \
191                                                             x1, incx, \
192                                                             y1, incy ); \
193                         } \
194                 } \
196                 /* When the diagonal is unit, we handle it separately. */ \
197                 if ( bli_is_unit_diag( diagx ) ) \
198                 { \
199                         PASTEMAC2(chy,chy,addd)( diagoffx, \
200                                                  diagx, \
201                                                  transx, \
202                                                  m, \
203                                                  n, \
204                                                  x_cast, rs_x, cs_x, \
205                                                  y_cast, rs_y, cs_y ); \
206                 } \
207         } \
210 // Define the basic set of functions unconditionally, and then also some
211 // mixed datatype functions if requested.
212 INSERT_GENTFUNC2_BASIC( subm_unb_var1, SUBV_KERNEL )
214 #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
215 INSERT_GENTFUNC2_MIX_D( subm_unb_var1, SUBV_KERNEL )
216 #endif
218 #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
219 INSERT_GENTFUNC2_MIX_P( subm_unb_var1, SUBV_KERNEL )
220 #endif