[dense-linear-algebra-libraries/linalg.git] / blis / frame / 3 / herk / 3m / old / bli_herk3m_cntl.c
1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 extern scalm_t* scalm_cntl;
39 extern blksz_t* gemm3m_mc;
40 extern blksz_t* gemm3m_nc;
41 extern blksz_t* gemm3m_kc;
42 extern blksz_t* gemm3m_mr;
43 extern blksz_t* gemm3m_nr;
44 extern blksz_t* gemm3m_kr;
46 extern func_t* gemm3m_ukrs;
48 packm_t* herk3m_packa_cntl;
49 packm_t* herk3m_packb_cntl;
51 herk_t* herk3m_cntl_bp_ke;
52 herk_t* herk3m_cntl_op_bp;
53 herk_t* herk3m_cntl_mm_op;
54 herk_t* herk3m_cntl_vl_mm;
56 herk_t* herk3m_cntl;
59 void bli_herk3m_cntl_init()
60 {
61 // Create control tree objects for packm operations.
62 herk3m_packa_cntl
63 =
64 bli_packm_cntl_obj_create( BLIS_BLOCKED,
65 BLIS_VARIANT2,
66 gemm3m_mr,
67 gemm3m_kr,
68 FALSE, // do NOT invert diagonal
69 FALSE, // reverse iteration if upper?
70 FALSE, // reverse iteration if lower?
71 BLIS_PACKED_ROW_PANELS_3M,
72 BLIS_BUFFER_FOR_A_BLOCK );
74 herk3m_packb_cntl
75 =
76 bli_packm_cntl_obj_create( BLIS_BLOCKED,
77 BLIS_VARIANT2,
78 gemm3m_kr,
79 gemm3m_nr,
80 FALSE, // do NOT invert diagonal
81 FALSE, // reverse iteration if upper?
82 FALSE, // reverse iteration if lower?
83 BLIS_PACKED_COL_PANELS_3M,
84 BLIS_BUFFER_FOR_B_PANEL );
87 // Create control tree object for lowest-level block-panel kernel.
88 herk3m_cntl_bp_ke
89 =
90 bli_herk_cntl_obj_create( BLIS_UNB_OPT,
91 BLIS_VARIANT2,
92 NULL,
93 gemm3m_ukrs,
94 NULL, NULL, NULL,
95 NULL, NULL, NULL );
97 // Create control tree object for outer panel (to block-panel)
98 // problem.
99 herk3m_cntl_op_bp
100 =
101 bli_herk_cntl_obj_create( BLIS_BLOCKED,
102 BLIS_VARIANT1,
103 gemm3m_mc,
104 gemm3m_ukrs,
105 NULL,
106 herk3m_packa_cntl,
107 herk3m_packb_cntl,
108 NULL,
109 herk3m_cntl_bp_ke,
110 NULL );
112 // Create control tree object for general problem via multiple
113 // rank-k (outer panel) updates.
114 herk3m_cntl_mm_op
115 =
116 bli_herk_cntl_obj_create( BLIS_BLOCKED,
117 BLIS_VARIANT3,
118 gemm3m_kc,
119 gemm3m_ukrs,
120 NULL,
121 NULL,
122 NULL,
123 NULL,
124 herk3m_cntl_op_bp,
125 NULL );
127 // Create control tree object for very large problem via multiple
128 // general problems.
129 herk3m_cntl_vl_mm
130 =
131 bli_herk_cntl_obj_create( BLIS_BLOCKED,
132 BLIS_VARIANT2,
133 gemm3m_nc,
134 gemm3m_ukrs,
135 NULL,
136 NULL,
137 NULL,
138 NULL,
139 herk3m_cntl_mm_op,
140 NULL );
142 // Alias the "master" herk control tree to a shorter name.
143 herk3m_cntl = herk3m_cntl_vl_mm;
144 }
146 void bli_herk3m_cntl_finalize()
147 {
148 bli_cntl_obj_free( herk3m_packa_cntl );
149 bli_cntl_obj_free( herk3m_packb_cntl );
151 bli_cntl_obj_free( herk3m_cntl_bp_ke );
152 bli_cntl_obj_free( herk3m_cntl_op_bp );
153 bli_cntl_obj_free( herk3m_cntl_mm_op );
154 bli_cntl_obj_free( herk3m_cntl_vl_mm );
155 }