[dense-linear-algebra-libraries/linalg.git] / blis / frame / 3 / trmm / 3m / old / bli_trmm3m_cntl.c
1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 extern scalm_t* scalm_cntl;
39 extern blksz_t* gemm3m_mc;
40 extern blksz_t* gemm3m_nc;
41 extern blksz_t* gemm3m_kc;
42 extern blksz_t* gemm3m_mr;
43 extern blksz_t* gemm3m_nr;
44 extern blksz_t* gemm3m_kr;
46 extern func_t* gemm3m_ukrs;
48 extern gemm_t* gemm3m_cntl_bp_ke;
50 packm_t* trmm3m_l_packa_cntl;
51 packm_t* trmm3m_l_packb_cntl;
53 packm_t* trmm3m_r_packa_cntl;
54 packm_t* trmm3m_r_packb_cntl;
56 gemm_t* trmm3m_cntl_bp_ke;
58 gemm_t* trmm3m_l_cntl_op_bp;
59 gemm_t* trmm3m_l_cntl_mm_op;
60 gemm_t* trmm3m_l_cntl_vl_mm;
62 gemm_t* trmm3m_r_cntl_op_bp;
63 gemm_t* trmm3m_r_cntl_mm_op;
64 gemm_t* trmm3m_r_cntl_vl_mm;
66 gemm_t* trmm3m_l_cntl;
67 gemm_t* trmm3m_r_cntl;
70 void bli_trmm3m_cntl_init()
71 {
72 // Create control tree objects for packm operations (left side).
73 trmm3m_l_packa_cntl
74 =
75 bli_packm_cntl_obj_create( BLIS_BLOCKED,
76 BLIS_VARIANT2,
77 // IMPORTANT: for consistency with trsm, "k" dim
78 // multiple is set to mr.
79 gemm3m_mr,
80 gemm3m_kr,
81 FALSE, // do NOT invert diagonal
82 FALSE, // reverse iteration if upper?
83 FALSE, // reverse iteration if lower?
84 BLIS_PACKED_ROW_PANELS_3M,
85 BLIS_BUFFER_FOR_A_BLOCK );
87 trmm3m_l_packb_cntl
88 =
89 bli_packm_cntl_obj_create( BLIS_BLOCKED,
90 BLIS_VARIANT2,
91 // IMPORTANT: m dim multiple here must be mr
92 // since "k" dim multiple is set to mr above.
93 gemm3m_kr,
94 gemm3m_nr,
95 FALSE, // do NOT invert diagonal
96 FALSE, // reverse iteration if upper?
97 FALSE, // reverse iteration if lower?
98 BLIS_PACKED_COL_PANELS_3M,
99 BLIS_BUFFER_FOR_B_PANEL );
101 // Create control tree objects for packm operations (right side).
102 trmm3m_r_packa_cntl
103 =
104 bli_packm_cntl_obj_create( BLIS_BLOCKED,
105 BLIS_VARIANT2,
106 // IMPORTANT: for consistency with trsm, "k" dim
107 // multiple is set to nr.
108 gemm3m_mr,
109 gemm3m_nr,
110 FALSE, // do NOT invert diagonal
111 FALSE, // reverse iteration if upper?
112 FALSE, // reverse iteration if lower?
113 BLIS_PACKED_ROW_PANELS_3M,
114 BLIS_BUFFER_FOR_A_BLOCK );
116 trmm3m_r_packb_cntl
117 =
118 bli_packm_cntl_obj_create( BLIS_BLOCKED,
119 BLIS_VARIANT2,
120 // IMPORTANT: m dim multiple here must be nr
121 // since "k" dim multiple is set to nr above.
122 gemm3m_nr,
123 gemm3m_nr,
124 FALSE, // do NOT invert diagonal
125 FALSE, // reverse iteration if upper?
126 FALSE, // reverse iteration if lower?
127 BLIS_PACKED_COL_PANELS_3M,
128 BLIS_BUFFER_FOR_B_PANEL );
131 // Create control tree object for lowest-level block-panel kernel.
132 trmm3m_cntl_bp_ke
133 =
134 bli_trmm_cntl_obj_create( BLIS_UNB_OPT,
135 BLIS_VARIANT2,
136 NULL,
137 gemm3m_ukrs,
138 NULL, NULL, NULL, NULL,
139 NULL, NULL, NULL );
141 // Create control tree object for outer panel (to block-panel)
142 // problem (left side).
143 trmm3m_l_cntl_op_bp
144 =
145 bli_trmm_cntl_obj_create( BLIS_BLOCKED,
146 BLIS_VARIANT1,
147 gemm3m_mc,
148 gemm3m_ukrs,
149 NULL,
150 trmm3m_l_packa_cntl,
151 trmm3m_l_packb_cntl,
152 NULL,
153 trmm3m_cntl_bp_ke,
154 gemm3m_cntl_bp_ke,
155 NULL );
157 // Create control tree object for general problem via multiple
158 // rank-k (outer panel) updates (left side).
159 trmm3m_l_cntl_mm_op
160 =
161 bli_trmm_cntl_obj_create( BLIS_BLOCKED,
162 BLIS_VARIANT3,
163 gemm3m_kc,
164 gemm3m_ukrs,
165 NULL,
166 NULL,
167 NULL,
168 NULL,
169 trmm3m_l_cntl_op_bp,
170 NULL,
171 NULL );
173 // Create control tree object for very large problem via multiple
174 // general problems (left side).
175 trmm3m_l_cntl_vl_mm
176 =
177 bli_trmm_cntl_obj_create( BLIS_BLOCKED,
178 BLIS_VARIANT2,
179 gemm3m_nc,
180 gemm3m_ukrs,
181 NULL,
182 NULL,
183 NULL,
184 NULL,
185 trmm3m_l_cntl_mm_op,
186 NULL,
187 NULL );
189 // Create control tree object for outer panel (to block-panel)
190 // problem (right side).
191 trmm3m_r_cntl_op_bp
192 =
193 bli_trmm_cntl_obj_create( BLIS_BLOCKED,
194 BLIS_VARIANT1,
195 gemm3m_mc,
196 gemm3m_ukrs,
197 NULL,
198 trmm3m_r_packa_cntl,
199 trmm3m_r_packb_cntl,
200 NULL,
201 trmm3m_cntl_bp_ke,
202 gemm3m_cntl_bp_ke,
203 NULL );
205 // Create control tree object for general problem via multiple
206 // rank-k (outer panel) updates (right side).
207 trmm3m_r_cntl_mm_op
208 =
209 bli_trmm_cntl_obj_create( BLIS_BLOCKED,
210 BLIS_VARIANT3,
211 gemm3m_kc,
212 gemm3m_ukrs,
213 NULL,
214 NULL,
215 NULL,
216 NULL,
217 trmm3m_r_cntl_op_bp,
218 NULL,
219 NULL );
221 // Create control tree object for very large problem via multiple
222 // general problems (right side).
223 trmm3m_r_cntl_vl_mm
224 =
225 bli_trmm_cntl_obj_create( BLIS_BLOCKED,
226 BLIS_VARIANT2,
227 gemm3m_nc,
228 gemm3m_ukrs,
229 NULL,
230 NULL,
231 NULL,
232 NULL,
233 trmm3m_r_cntl_mm_op,
234 NULL,
235 NULL );
237 // Alias the "master" trmm control trees to shorter names.
238 trmm3m_l_cntl = trmm3m_l_cntl_vl_mm;
239 trmm3m_r_cntl = trmm3m_r_cntl_vl_mm;
240 }
242 void bli_trmm3m_cntl_finalize()
243 {
244 bli_cntl_obj_free( trmm3m_l_packa_cntl );
245 bli_cntl_obj_free( trmm3m_l_packb_cntl );
246 bli_cntl_obj_free( trmm3m_r_packa_cntl );
247 bli_cntl_obj_free( trmm3m_r_packb_cntl );
249 bli_cntl_obj_free( trmm3m_cntl_bp_ke );
251 bli_cntl_obj_free( trmm3m_l_cntl_op_bp );
252 bli_cntl_obj_free( trmm3m_l_cntl_mm_op );
253 bli_cntl_obj_free( trmm3m_l_cntl_vl_mm );
254 bli_cntl_obj_free( trmm3m_r_cntl_op_bp );
255 bli_cntl_obj_free( trmm3m_r_cntl_mm_op );
256 bli_cntl_obj_free( trmm3m_r_cntl_vl_mm );
257 }