1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 extern scalv_t* scalv_cntl;
38 extern packm_t* packm_cntl;
39 extern packv_t* packv_cntl;
40 extern unpackv_t* unpackv_cntl;
42 extern gemv_t* gemv_cntl_rp_bs_dot;
43 extern gemv_t* gemv_cntl_rp_bs_axpy;
44 extern gemv_t* gemv_cntl_cp_bs_dot;
45 extern gemv_t* gemv_cntl_cp_bs_axpy;
47 extern blksz_t* gemv_mc;
49 hemv_t* hemv_cntl_bs_ke_lrow_ucol;
50 hemv_t* hemv_cntl_bs_ke_lcol_urow;
51 hemv_t* hemv_cntl_ge_lrow_ucol;
52 hemv_t* hemv_cntl_ge_lcol_urow;
55 void bli_hemv_cntl_init()
56 {
57 // Create control trees for the lowest-level kernels. These trees induce
58 // operations on (presumably) relatively small block-subvector problems.
59 hemv_cntl_bs_ke_lrow_ucol
60 =
61 bli_hemv_cntl_obj_create( BLIS_UNB_FUSED,
62 BLIS_VARIANT1,
63 NULL, NULL, NULL, NULL,
64 NULL, NULL, NULL, NULL,
65 NULL, NULL, NULL );
66 hemv_cntl_bs_ke_lcol_urow
67 =
68 bli_hemv_cntl_obj_create( BLIS_UNB_FUSED,
69 BLIS_VARIANT3,
70 NULL, NULL, NULL, NULL,
71 NULL, NULL, NULL, NULL,
72 NULL, NULL, NULL );
75 // Create control trees for generally large problems. Here, we choose a
76 // variant that prioritizes keeping a subvector of y in cache.
77 hemv_cntl_ge_lrow_ucol
78 =
79 bli_hemv_cntl_obj_create( BLIS_BLOCKED,
80 BLIS_VARIANT2,
81 gemv_mc,
82 scalv_cntl, // scale y up-front
83 packm_cntl, // pack A11 (if needed)
84 packv_cntl, // pack x1 (if needed)
85 packv_cntl, // pack y1 (if needed)
86 gemv_cntl_rp_bs_dot, // gemv_n_rp needed by var2
87 NULL, // gemv_n_cp not used by var2
88 NULL, // gemv_t_rp not used by var2
89 gemv_cntl_rp_bs_axpy, // gemv_t_cp needed by var2
90 hemv_cntl_bs_ke_lrow_ucol,
91 unpackv_cntl ); // unpack y1 (if packed)
92 hemv_cntl_ge_lcol_urow
93 =
94 bli_hemv_cntl_obj_create( BLIS_BLOCKED,
95 BLIS_VARIANT2,
96 gemv_mc,
97 scalv_cntl, // scale y up-front
98 packm_cntl, // pack A11 (if needed)
99 packv_cntl, // pack x1 (if needed)
100 packv_cntl, // pack y1 (if needed)
101 gemv_cntl_rp_bs_axpy, // gemv_n_rp needed by var2
102 NULL, // gemv_n_cp not used by var2
103 NULL, // gemv_t_rp not used by var2
104 gemv_cntl_rp_bs_dot, // gemv_t_cp needed by var2
105 hemv_cntl_bs_ke_lcol_urow,
106 unpackv_cntl ); // unpack y1 (if packed)
107 }
109 void bli_hemv_cntl_finalize()
110 {
111 bli_cntl_obj_free( hemv_cntl_bs_ke_lrow_ucol );
112 bli_cntl_obj_free( hemv_cntl_bs_ke_lcol_urow );
113 bli_cntl_obj_free( hemv_cntl_ge_lrow_ucol );
114 bli_cntl_obj_free( hemv_cntl_ge_lcol_urow );
115 }
118 hemv_t* bli_hemv_cntl_obj_create( impl_t impl_type,
119 varnum_t var_num,
120 blksz_t* b,
121 scalv_t* sub_scalv,
122 packm_t* sub_packm_a11,
123 packv_t* sub_packv_x1,
124 packv_t* sub_packv_y1,
125 gemv_t* sub_gemv_n_rp,
126 gemv_t* sub_gemv_n_cp,
127 gemv_t* sub_gemv_t_rp,
128 gemv_t* sub_gemv_t_cp,
129 hemv_t* sub_hemv,
130 unpackv_t* sub_unpackv_y1 )
131 {
132 hemv_t* cntl;
134 cntl = ( hemv_t* ) bli_malloc( sizeof(hemv_t) );
136 cntl->impl_type = impl_type;
137 cntl->var_num = var_num;
138 cntl->b = b;
139 cntl->sub_scalv = sub_scalv;
140 cntl->sub_packm_a11 = sub_packm_a11;
141 cntl->sub_packv_x1 = sub_packv_x1;
142 cntl->sub_packv_y1 = sub_packv_y1;
143 cntl->sub_gemv_n_rp = sub_gemv_n_rp;
144 cntl->sub_gemv_n_cp = sub_gemv_n_cp;
145 cntl->sub_gemv_t_rp = sub_gemv_t_rp;
146 cntl->sub_gemv_t_cp = sub_gemv_t_cp;
147 cntl->sub_hemv = sub_hemv;
148 cntl->sub_unpackv_y1 = sub_unpackv_y1;
150 return cntl;
151 }
153 void bli_hemv_cntl_obj_init( hemv_t* cntl,
154 impl_t impl_type,
155 varnum_t var_num,
156 blksz_t* b,
157 scalv_t* sub_scalv,
158 packm_t* sub_packm_a11,
159 packv_t* sub_packv_x1,
160 packv_t* sub_packv_y1,
161 gemv_t* sub_gemv_n_rp,
162 gemv_t* sub_gemv_n_cp,
163 gemv_t* sub_gemv_t_rp,
164 gemv_t* sub_gemv_t_cp,
165 hemv_t* sub_hemv,
166 unpackv_t* sub_unpackv_y1 )
167 {
168 cntl->impl_type = impl_type;
169 cntl->var_num = var_num;
170 cntl->b = b;
171 cntl->sub_scalv = sub_scalv;
172 cntl->sub_packm_a11 = sub_packm_a11;
173 cntl->sub_packv_x1 = sub_packv_x1;
174 cntl->sub_packv_y1 = sub_packv_y1;
175 cntl->sub_gemv_n_rp = sub_gemv_n_rp;
176 cntl->sub_gemv_n_cp = sub_gemv_n_cp;
177 cntl->sub_gemv_t_rp = sub_gemv_t_rp;
178 cntl->sub_gemv_t_cp = sub_gemv_t_cp;
179 cntl->sub_hemv = sub_hemv;
180 cntl->sub_unpackv_y1 = sub_unpackv_y1;
181 }