]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - dense-linear-algebra-libraries/linalg.git/blob - blis/frame/3/trmm/3m/old/bli_trmm3m_cntl.c
TI Linear Algebra Library (LINALG) Rlease 1.0.0
[dense-linear-algebra-libraries/linalg.git] / blis / frame / 3 / trmm / 3m / old / bli_trmm3m_cntl.c
1 /*
3    BLIS    
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
7    Copyright (C) 2014, The University of Texas at Austin
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name of The University of Texas at Austin nor the names
18       of its contributors may be used to endorse or promote products
19       derived from this software without specific prior written permission.
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 extern scalm_t*   scalm_cntl;
39 extern blksz_t*   gemm3m_mc;
40 extern blksz_t*   gemm3m_nc;
41 extern blksz_t*   gemm3m_kc;
42 extern blksz_t*   gemm3m_mr;
43 extern blksz_t*   gemm3m_nr;
44 extern blksz_t*   gemm3m_kr;
46 extern func_t*    gemm3m_ukrs;
48 extern gemm_t*    gemm3m_cntl_bp_ke;
50 packm_t*          trmm3m_l_packa_cntl;
51 packm_t*          trmm3m_l_packb_cntl;
53 packm_t*          trmm3m_r_packa_cntl;
54 packm_t*          trmm3m_r_packb_cntl;
56 gemm_t*           trmm3m_cntl_bp_ke;
58 gemm_t*           trmm3m_l_cntl_op_bp;
59 gemm_t*           trmm3m_l_cntl_mm_op;
60 gemm_t*           trmm3m_l_cntl_vl_mm;
62 gemm_t*           trmm3m_r_cntl_op_bp;
63 gemm_t*           trmm3m_r_cntl_mm_op;
64 gemm_t*           trmm3m_r_cntl_vl_mm;
66 gemm_t*           trmm3m_l_cntl;
67 gemm_t*           trmm3m_r_cntl;
70 void bli_trmm3m_cntl_init()
71 {
72         // Create control tree objects for packm operations (left side).
73         trmm3m_l_packa_cntl
74         =
75         bli_packm_cntl_obj_create( BLIS_BLOCKED,
76                                    BLIS_VARIANT2,
77                                    // IMPORTANT: for consistency with trsm, "k" dim
78                                    // multiple is set to mr.
79                                    gemm3m_mr,
80                                    gemm3m_kr,
81                                    FALSE, // do NOT invert diagonal
82                                    FALSE, // reverse iteration if upper?
83                                    FALSE, // reverse iteration if lower?
84                                    BLIS_PACKED_ROW_PANELS_3M,
85                                    BLIS_BUFFER_FOR_A_BLOCK );
87         trmm3m_l_packb_cntl
88         =
89         bli_packm_cntl_obj_create( BLIS_BLOCKED,
90                                    BLIS_VARIANT2,
91                                    // IMPORTANT: m dim multiple here must be mr
92                                    // since "k" dim multiple is set to mr above.
93                                    gemm3m_kr,
94                                    gemm3m_nr,
95                                    FALSE, // do NOT invert diagonal
96                                    FALSE, // reverse iteration if upper?
97                                    FALSE, // reverse iteration if lower?
98                                    BLIS_PACKED_COL_PANELS_3M,
99                                    BLIS_BUFFER_FOR_B_PANEL );
101         // Create control tree objects for packm operations (right side).
102         trmm3m_r_packa_cntl
103         =
104         bli_packm_cntl_obj_create( BLIS_BLOCKED,
105                                    BLIS_VARIANT2,
106                                    // IMPORTANT: for consistency with trsm, "k" dim
107                                    // multiple is set to nr.
108                                    gemm3m_mr,
109                                    gemm3m_nr,
110                                    FALSE, // do NOT invert diagonal
111                                    FALSE, // reverse iteration if upper?
112                                    FALSE, // reverse iteration if lower?
113                                    BLIS_PACKED_ROW_PANELS_3M,
114                                    BLIS_BUFFER_FOR_A_BLOCK );
116         trmm3m_r_packb_cntl
117         =
118         bli_packm_cntl_obj_create( BLIS_BLOCKED,
119                                    BLIS_VARIANT2,
120                                    // IMPORTANT: m dim multiple here must be nr
121                                    // since "k" dim multiple is set to nr above.
122                                    gemm3m_nr,
123                                    gemm3m_nr,
124                                    FALSE, // do NOT invert diagonal
125                                    FALSE, // reverse iteration if upper?
126                                    FALSE, // reverse iteration if lower?
127                                    BLIS_PACKED_COL_PANELS_3M,
128                                    BLIS_BUFFER_FOR_B_PANEL );
131         // Create control tree object for lowest-level block-panel kernel.
132         trmm3m_cntl_bp_ke
133         =
134         bli_trmm_cntl_obj_create( BLIS_UNB_OPT,
135                                   BLIS_VARIANT2,
136                                   NULL,
137                                   gemm3m_ukrs,
138                                   NULL, NULL, NULL, NULL,
139                                   NULL, NULL, NULL );
141         // Create control tree object for outer panel (to block-panel)
142         // problem (left side).
143         trmm3m_l_cntl_op_bp
144         =
145         bli_trmm_cntl_obj_create( BLIS_BLOCKED,
146                                   BLIS_VARIANT1,
147                                   gemm3m_mc,
148                                   gemm3m_ukrs,
149                                   NULL,
150                                   trmm3m_l_packa_cntl,
151                                   trmm3m_l_packb_cntl,
152                                   NULL,
153                                   trmm3m_cntl_bp_ke,
154                                   gemm3m_cntl_bp_ke,
155                                   NULL );
157         // Create control tree object for general problem via multiple
158         // rank-k (outer panel) updates (left side).
159         trmm3m_l_cntl_mm_op
160         =
161         bli_trmm_cntl_obj_create( BLIS_BLOCKED,
162                                   BLIS_VARIANT3,
163                                   gemm3m_kc,
164                                   gemm3m_ukrs,
165                                   NULL,
166                                   NULL, 
167                                   NULL,
168                                   NULL,
169                                   trmm3m_l_cntl_op_bp,
170                                   NULL,
171                                   NULL );
173         // Create control tree object for very large problem via multiple
174         // general problems (left side).
175         trmm3m_l_cntl_vl_mm
176         =
177         bli_trmm_cntl_obj_create( BLIS_BLOCKED,
178                                   BLIS_VARIANT2,
179                                   gemm3m_nc,
180                                   gemm3m_ukrs,
181                                   NULL,
182                                   NULL,
183                                   NULL,
184                                   NULL,
185                                   trmm3m_l_cntl_mm_op,
186                                   NULL,
187                                   NULL );
189         // Create control tree object for outer panel (to block-panel)
190         // problem (right side).
191         trmm3m_r_cntl_op_bp
192         =
193         bli_trmm_cntl_obj_create( BLIS_BLOCKED,
194                                   BLIS_VARIANT1,
195                                   gemm3m_mc,
196                                   gemm3m_ukrs,
197                                   NULL,
198                                   trmm3m_r_packa_cntl,
199                                   trmm3m_r_packb_cntl,
200                                   NULL,
201                                   trmm3m_cntl_bp_ke,
202                                   gemm3m_cntl_bp_ke,
203                                   NULL );
205         // Create control tree object for general problem via multiple
206         // rank-k (outer panel) updates (right side).
207         trmm3m_r_cntl_mm_op
208         =
209         bli_trmm_cntl_obj_create( BLIS_BLOCKED,
210                                   BLIS_VARIANT3,
211                                   gemm3m_kc,
212                                   gemm3m_ukrs,
213                                   NULL,
214                                   NULL, 
215                                   NULL,
216                                   NULL,
217                                   trmm3m_r_cntl_op_bp,
218                                   NULL,
219                                   NULL );
221         // Create control tree object for very large problem via multiple
222         // general problems (right side).
223         trmm3m_r_cntl_vl_mm
224         =
225         bli_trmm_cntl_obj_create( BLIS_BLOCKED,
226                                   BLIS_VARIANT2,
227                                   gemm3m_nc,
228                                   gemm3m_ukrs,
229                                   NULL,
230                                   NULL,
231                                   NULL,
232                                   NULL,
233                                   trmm3m_r_cntl_mm_op,
234                                   NULL,
235                                   NULL );
237         // Alias the "master" trmm control trees to shorter names.
238         trmm3m_l_cntl = trmm3m_l_cntl_vl_mm;
239         trmm3m_r_cntl = trmm3m_r_cntl_vl_mm;
242 void bli_trmm3m_cntl_finalize()
244         bli_cntl_obj_free( trmm3m_l_packa_cntl );
245         bli_cntl_obj_free( trmm3m_l_packb_cntl );
246         bli_cntl_obj_free( trmm3m_r_packa_cntl );
247         bli_cntl_obj_free( trmm3m_r_packb_cntl );
249         bli_cntl_obj_free( trmm3m_cntl_bp_ke );
251         bli_cntl_obj_free( trmm3m_l_cntl_op_bp );
252         bli_cntl_obj_free( trmm3m_l_cntl_mm_op );
253         bli_cntl_obj_free( trmm3m_l_cntl_vl_mm );
254         bli_cntl_obj_free( trmm3m_r_cntl_op_bp );
255         bli_cntl_obj_free( trmm3m_r_cntl_mm_op );
256         bli_cntl_obj_free( trmm3m_r_cntl_vl_mm );