1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 #define FUNCPTR_T unpackm_cxk_fp
39 typedef void (*FUNCPTR_T)(
40 conj_t conjp,
41 dim_t n,
42 void* beta,
43 void* p,
44 void* a, inc_t inca, inc_t lda
45 );
47 #undef FUNCPTR_ARRAY_LENGTH
48 #define FUNCPTR_ARRAY_LENGTH 18
50 static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] =
51 {
52 /* panel width = 0 */
53 {
54 NULL, NULL, NULL, NULL,
55 },
56 /* panel width = 1 */
57 {
58 NULL, NULL, NULL, NULL,
59 },
60 /* panel width = 2 */
61 {
62 BLIS_SUNPACKM_2XK_KERNEL,
63 BLIS_CUNPACKM_2XK_KERNEL,
64 BLIS_DUNPACKM_2XK_KERNEL,
65 BLIS_ZUNPACKM_2XK_KERNEL,
66 },
67 /* panel width = 3 */
68 {
69 NULL, NULL, NULL, NULL,
70 },
71 /* panel width = 4 */
72 {
73 BLIS_SUNPACKM_4XK_KERNEL,
74 BLIS_CUNPACKM_4XK_KERNEL,
75 BLIS_DUNPACKM_4XK_KERNEL,
76 BLIS_ZUNPACKM_4XK_KERNEL,
77 },
78 /* panel width = 5 */
79 {
80 NULL, NULL, NULL, NULL,
81 },
82 /* panel width = 6 */
83 {
84 BLIS_SUNPACKM_6XK_KERNEL,
85 BLIS_CUNPACKM_6XK_KERNEL,
86 BLIS_DUNPACKM_6XK_KERNEL,
87 BLIS_ZUNPACKM_6XK_KERNEL,
88 },
89 /* panel width = 7 */
90 {
91 NULL, NULL, NULL, NULL,
92 },
93 /* panel width = 8 */
94 {
95 BLIS_SUNPACKM_8XK_KERNEL,
96 BLIS_CUNPACKM_8XK_KERNEL,
97 BLIS_DUNPACKM_8XK_KERNEL,
98 BLIS_ZUNPACKM_8XK_KERNEL,
99 },
100 #ifdef BLIS_ENABLE_C66X_BUILD // these packs are not used in c66x; allows fatser compilation
101 /* panel width = 9 */
102 {
103 NULL, NULL, NULL, NULL,
104 },
105 /* panel width = 10 */
106 {
107 NULL, NULL, NULL, NULL,
108 },
109 /* panel width = 11 */
110 {
111 NULL, NULL, NULL, NULL,
112 },
113 /* panel width = 12 */
114 {
115 NULL, NULL, NULL, NULL,
116 },
117 /* panel width = 13 */
118 {
119 NULL, NULL, NULL, NULL,
120 },
121 /* panel width = 14 */
122 {
123 NULL, NULL, NULL, NULL,
124 },
125 /* panel width = 15 */
126 {
127 NULL, NULL, NULL, NULL,
128 },
129 /* panel width = 16 */
130 {
131 NULL, NULL, NULL, NULL,
132 },
133 /* panel width = 17 */
134 {
135 NULL, NULL, NULL, NULL,
136 },
137 #else
138 /* panel width = 9 */
139 {
140 NULL, NULL, NULL, NULL,
141 },
142 /* panel width = 10 */
143 {
144 BLIS_SUNPACKM_10XK_KERNEL,
145 BLIS_CUNPACKM_10XK_KERNEL,
146 BLIS_DUNPACKM_10XK_KERNEL,
147 BLIS_ZUNPACKM_10XK_KERNEL,
148 },
149 /* panel width = 11 */
150 {
151 NULL, NULL, NULL, NULL,
152 },
153 /* panel width = 12 */
154 {
155 BLIS_SUNPACKM_12XK_KERNEL,
156 BLIS_CUNPACKM_12XK_KERNEL,
157 BLIS_DUNPACKM_12XK_KERNEL,
158 BLIS_ZUNPACKM_12XK_KERNEL,
159 },
160 /* panel width = 13 */
161 {
162 NULL, NULL, NULL, NULL,
163 },
164 /* panel width = 14 */
165 {
166 BLIS_SUNPACKM_14XK_KERNEL,
167 BLIS_CUNPACKM_14XK_KERNEL,
168 BLIS_DUNPACKM_14XK_KERNEL,
169 BLIS_ZUNPACKM_14XK_KERNEL,
170 },
171 /* panel width = 15 */
172 {
173 NULL, NULL, NULL, NULL,
174 },
175 /* panel width = 16 */
176 {
177 BLIS_SUNPACKM_16XK_KERNEL,
178 BLIS_CUNPACKM_16XK_KERNEL,
179 BLIS_DUNPACKM_16XK_KERNEL,
180 BLIS_ZUNPACKM_16XK_KERNEL,
181 },
182 /* panel width = 17 */
183 {
184 NULL, NULL, NULL, NULL,
185 },
186 #endif
187 };
191 #undef GENTFUNC
192 #define GENTFUNC( ctype, ch, opname, copyvker ) \
193 \
194 void PASTEMAC(ch,opname)( \
195 conj_t conjp, \
196 dim_t m, \
197 dim_t n, \
198 void* beta, \
199 void* p, inc_t ldp, \
200 void* a, inc_t inca, inc_t lda \
201 ) \
202 { \
203 dim_t panel_dim; \
204 num_t dt; \
205 FUNCPTR_T f; \
206 \
207 /* If the panel dimension is unit, then we recognize that this allows
208 the kernel to reduce to a copyv, so we call that kernel directly. */ \
209 if ( m == 1 ) \
210 { \
211 PASTEMAC2(ch,ch,copyvker)( conjp, \
212 n, \
213 p, 1, \
214 a, lda ); \
215 return; \
216 } \
217 \
218 /* The panel dimension is always equal to the leading dimension of p. */ \
219 panel_dim = ldp; \
220 \
221 /* Acquire the datatype for the current function. */ \
222 dt = PASTEMAC(ch,type); \
223 \
224 /* Index into the array to extract the correct function pointer.
225 If the panel dimension is too big to be within the array of
226 explicitly handled kernels, then we treat that kernel the same
227 as if it were in range but unimplemented. */ \
228 if ( panel_dim < FUNCPTR_ARRAY_LENGTH ) f = ftypes[panel_dim][dt]; \
229 else f = NULL; \
230 \
231 /* If there exists a kernel implementation for the panel dimension
232 provided, and the "width" of the panel is equal to the leading
233 dimension, we invoke the implementation. Otherwise, we use scal2m.
234 By using scal2m to handle edge cases (where m < panel_dim), we
235 allow the kernel implementations to remain very simple. */ \
236 if ( f != NULL && m == panel_dim ) \
237 { \
238 f( conjp, \
239 n, \
240 beta, \
241 p, \
242 a, inca, lda ); \
243 } \
244 else \
245 { \
246 /* Treat the panel as m x n and column-stored (unit row stride). */ \
247 PASTEMAC3(ch,ch,ch,scal2m)( 0, \
248 BLIS_NONUNIT_DIAG, \
249 BLIS_DENSE, \
250 (trans_t) conjp, \
251 m, \
252 n, \
253 beta, \
254 p, 1, ldp, \
255 a, inca, lda ); \
256 } \
257 }
259 INSERT_GENTFUNC_BASIC( unpackm_cxk, COPYV_KERNEL )