]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - dense-linear-algebra-libraries/linalg.git/blob - blis/frame/3/trsm/4m/ukernels/bli_gemmtrsm4m_l_ukr_ref.c
TI Linear Algebra Library (LINALG) Rlease 1.0.0
[dense-linear-algebra-libraries/linalg.git] / blis / frame / 3 / trsm / 4m / ukernels / bli_gemmtrsm4m_l_ukr_ref.c
1 /*
3    BLIS    
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
7    Copyright (C) 2014, The University of Texas at Austin
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name of The University of Texas at Austin nor the names
18       of its contributors may be used to endorse or promote products
19       derived from this software without specific prior written permission.
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 #undef  GENTFUNCCO
38 #define GENTFUNCCO( ctype, ctype_r, ch, chr, varname, gemmukr, trsmukr ) \
39 \
40 void PASTEMAC(ch,varname)( \
41                            dim_t           k, \
42                            ctype* restrict alpha, \
43                            ctype* restrict a10, \
44                            ctype* restrict a11, \
45                            ctype* restrict b01, \
46                            ctype* restrict b11, \
47                            ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
48                            auxinfo_t*      data  \
49                          ) \
50 { \
51         const dim_t       m           = PASTEMAC(chr,mr); \
52         const dim_t       n           = PASTEMAC(chr,nr); \
53 \
54         const inc_t       is_a        = bli_auxinfo_is_a( data ); \
55         const inc_t       is_b        = bli_auxinfo_is_b( data ); \
56 \
57         ctype_r* restrict a10_r       = ( ctype_r* )a10; \
58         ctype_r* restrict a10_i       = ( ctype_r* )a10 + is_a; \
59 \
60         ctype_r* restrict a11_r       = ( ctype_r* )a11; \
61 \
62         ctype_r* restrict b01_r       = ( ctype_r* )b01; \
63         ctype_r* restrict b01_i       = ( ctype_r* )b01 + is_b; \
64 \
65         ctype_r* restrict b11_r       = ( ctype_r* )b11; \
66         ctype_r* restrict b11_i       = ( ctype_r* )b11 + is_b; \
67 \
68         const inc_t       rs_b        = PASTEMAC(chr,packnr); \
69         const inc_t       cs_b        = 1; \
70 \
71         ctype_r* restrict one_r       = PASTEMAC(chr,1); \
72         ctype_r* restrict minus_one_r = PASTEMAC(chr,m1); \
73 \
74         ctype_r           alpha_r     = PASTEMAC(ch,real)( *alpha ); \
75         ctype_r           alpha_i     = PASTEMAC(ch,imag)( *alpha ); \
76 \
77         void*             a_next      = bli_auxinfo_next_a( data ); \
78         void*             b_next      = bli_auxinfo_next_b( data ); \
79 \
80         dim_t             i, j; \
81 \
82 \
83         /* Copy the contents of c to a temporary buffer ct. */ \
84         if ( !PASTEMAC(chr,eq0)( alpha_i ) ) \
85         { \
86                 /* We can handle a non-zero imaginary component on alpha, but to do
87                    so we have to manually scale b and then use alpha == 1 for the
88                    micro-kernel calls. */ \
89                 for ( i = 0; i < m; ++i ) \
90                 for ( j = 0; j < n; ++j ) \
91                 PASTEMAC(ch,scalris)( alpha_r, \
92                                       alpha_i, \
93                                       *(b11_r + i*rs_b + j*cs_b), \
94                                       *(b11_i + i*rs_b + j*cs_b) ); \
95 \
96                 /* Use alpha.r == 1.0. */ \
97                 alpha_r = *one_r; \
98         } \
99 \
101         /* b11.r = alpha.r * b11.r - ( a10.r * b01.r - a10.i * b01.i );
102            b11.i = alpha.r * b11.r - ( a10.r * b01.i + a10.i * b01.r ); */ \
104         bli_auxinfo_set_next_ab( a10_r, b01_i, *data ); \
106         /* b11.r = alpha.r * b11.r - a10.r * b01.r; */ \
107         PASTEMAC(chr,gemmukr)( k, \
108                                minus_one_r, \
109                                a10_r, \
110                                b01_r, \
111                                &alpha_r, \
112                                b11_r, rs_b, cs_b, \
113                                data ); \
115         bli_auxinfo_set_next_ab( a10_i, b01_r, *data ); \
117         /* b11.i = alpha.r * b11.i - a10.r * b01.i; */ \
118         PASTEMAC(chr,gemmukr)( k, \
119                                minus_one_r, \
120                                a10_r, \
121                                b01_i, \
122                                &alpha_r, \
123                                b11_i, rs_b, cs_b, \
124                                data ); \
126         bli_auxinfo_set_next_ab( a10_i, b01_i, *data ); \
128         /* b11.i =     1.0 * b11.i - a10.i * b01.r; */ \
129         PASTEMAC(chr,gemmukr)( k, \
130                                minus_one_r, \
131                                a10_i, \
132                                b01_r, \
133                                one_r, \
134                                b11_i, rs_b, cs_b, \
135                                data ); \
137         bli_auxinfo_set_next_ab( a_next, b_next, *data ); \
139         /* b11.r =     1.0 * b11.r + a10.i * b01.i; */ \
140         PASTEMAC(chr,gemmukr)( k, \
141                                one_r, \
142                                a10_i, \
143                                b01_i, \
144                                one_r, \
145                                b11_r, rs_b, cs_b, \
146                                data ); \
149         /* b11 = inv(a11) * b11;
150            c11 = b11; */ \
151         PASTEMAC(ch,trsmukr)( a11_r, \
152                               b11_r, \
153                               c11, rs_c, cs_c, \
154                               data ); \
157 INSERT_GENTFUNCCO_BASIC2( gemmtrsm4m_l_ukr_ref, GEMM_UKERNEL, TRSM4M_L_UKERNEL )