[dense-linear-algebra-libraries/linalg.git] / blis / frame / 1m / unpackm / ukernels / bli_unpackm_ref_cxk.c
1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 #undef GENTFUNC
38 #define GENTFUNC( ctype, ch, opname, varname ) \
39 \
40 void PASTEMAC(ch,varname)( \
41 conj_t conjp, \
42 dim_t n, \
43 void* beta, \
44 void* p, \
45 void* a, inc_t inca, inc_t lda \
46 ) \
47 { \
48 const inc_t ldp = 2; \
49 \
50 ctype* restrict beta_cast = beta; \
51 ctype* restrict pi1 = p; \
52 ctype* restrict alpha1 = a; \
53 \
54 if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
55 { \
56 if ( bli_is_conj( conjp ) ) \
57 { \
58 for ( ; n != 0; --n ) \
59 { \
60 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
61 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
62 \
63 pi1 += ldp; \
64 alpha1 += lda; \
65 } \
66 } \
67 else \
68 { \
69 for ( ; n != 0; --n ) \
70 { \
71 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
72 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
73 \
74 pi1 += ldp; \
75 alpha1 += lda; \
76 } \
77 } \
78 } \
79 else \
80 { \
81 if ( bli_is_conj( conjp ) ) \
82 { \
83 for ( ; n != 0; --n ) \
84 { \
85 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
86 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
87 \
88 pi1 += ldp; \
89 alpha1 += lda; \
90 } \
91 } \
92 else \
93 { \
94 for ( ; n != 0; --n ) \
95 { \
96 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
97 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
98 \
99 pi1 += ldp; \
100 alpha1 += lda; \
101 } \
102 } \
103 } \
104 }
106 INSERT_GENTFUNC_BASIC( unpackm_ref_2xk, unpackm_ref_2xk )
111 #undef GENTFUNC
112 #define GENTFUNC( ctype, ch, opname, varname ) \
113 \
114 void PASTEMAC(ch,varname)( \
115 conj_t conjp, \
116 dim_t n, \
117 void* beta, \
118 void* p, \
119 void* a, inc_t inca, inc_t lda \
120 ) \
121 { \
122 const inc_t ldp = 4; \
123 \
124 ctype* restrict beta_cast = beta; \
125 ctype* restrict pi1 = p; \
126 ctype* restrict alpha1 = a; \
127 \
128 if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
129 { \
130 if ( bli_is_conj( conjp ) ) \
131 { \
132 for ( ; n != 0; --n ) \
133 { \
134 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
135 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
136 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
137 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
138 \
139 pi1 += ldp; \
140 alpha1 += lda; \
141 } \
142 } \
143 else \
144 { \
145 for ( ; n != 0; --n ) \
146 { \
147 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
148 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
149 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
150 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
151 \
152 pi1 += ldp; \
153 alpha1 += lda; \
154 } \
155 } \
156 } \
157 else \
158 { \
159 if ( bli_is_conj( conjp ) ) \
160 { \
161 for ( ; n != 0; --n ) \
162 { \
163 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
164 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
165 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
166 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
167 \
168 pi1 += ldp; \
169 alpha1 += lda; \
170 } \
171 } \
172 else \
173 { \
174 for ( ; n != 0; --n ) \
175 { \
176 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
177 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
178 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
179 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
180 \
181 pi1 += ldp; \
182 alpha1 += lda; \
183 } \
184 } \
185 } \
186 }
188 INSERT_GENTFUNC_BASIC( unpackm_ref_4xk, unpackm_ref_4xk )
193 #undef GENTFUNC
194 #define GENTFUNC( ctype, ch, opname, varname ) \
195 \
196 void PASTEMAC(ch,varname)( \
197 conj_t conjp, \
198 dim_t n, \
199 void* beta, \
200 void* p, \
201 void* a, inc_t inca, inc_t lda \
202 ) \
203 { \
204 const inc_t ldp = 6; \
205 \
206 ctype* restrict beta_cast = beta; \
207 ctype* restrict pi1 = p; \
208 ctype* restrict alpha1 = a; \
209 \
210 if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
211 { \
212 if ( bli_is_conj( conjp ) ) \
213 { \
214 for ( ; n != 0; --n ) \
215 { \
216 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
217 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
218 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
219 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
220 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
221 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
222 \
223 pi1 += ldp; \
224 alpha1 += lda; \
225 } \
226 } \
227 else \
228 { \
229 for ( ; n != 0; --n ) \
230 { \
231 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
232 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
233 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
234 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
235 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
236 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
237 \
238 pi1 += ldp; \
239 alpha1 += lda; \
240 } \
241 } \
242 } \
243 else \
244 { \
245 if ( bli_is_conj( conjp ) ) \
246 { \
247 for ( ; n != 0; --n ) \
248 { \
249 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
250 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
251 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
252 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
253 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
254 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
255 \
256 pi1 += ldp; \
257 alpha1 += lda; \
258 } \
259 } \
260 else \
261 { \
262 for ( ; n != 0; --n ) \
263 { \
264 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
265 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
266 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
267 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
268 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
269 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
270 \
271 pi1 += ldp; \
272 alpha1 += lda; \
273 } \
274 } \
275 } \
276 }
278 INSERT_GENTFUNC_BASIC( unpackm_ref_6xk, unpackm_ref_6xk )
283 #undef GENTFUNC
284 #define GENTFUNC( ctype, ch, opname, varname ) \
285 \
286 void PASTEMAC(ch,varname)( \
287 conj_t conjp, \
288 dim_t n, \
289 void* beta, \
290 void* p, \
291 void* a, inc_t inca, inc_t lda \
292 ) \
293 { \
294 const inc_t ldp = 8; \
295 \
296 ctype* restrict beta_cast = beta; \
297 ctype* restrict pi1 = p; \
298 ctype* restrict alpha1 = a; \
299 \
300 if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
301 { \
302 if ( bli_is_conj( conjp ) ) \
303 { \
304 for ( ; n != 0; --n ) \
305 { \
306 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
307 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
308 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
309 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
310 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
311 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
312 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
313 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
314 \
315 pi1 += ldp; \
316 alpha1 += lda; \
317 } \
318 } \
319 else \
320 { \
321 for ( ; n != 0; --n ) \
322 { \
323 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
324 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
325 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
326 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
327 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
328 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
329 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
330 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
331 \
332 pi1 += ldp; \
333 alpha1 += lda; \
334 } \
335 } \
336 } \
337 else \
338 { \
339 if ( bli_is_conj( conjp ) ) \
340 { \
341 for ( ; n != 0; --n ) \
342 { \
343 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
344 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
345 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
346 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
347 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
348 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
349 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
350 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
351 \
352 pi1 += ldp; \
353 alpha1 += lda; \
354 } \
355 } \
356 else \
357 { \
358 for ( ; n != 0; --n ) \
359 { \
360 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
361 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
362 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
363 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
364 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
365 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
366 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
367 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
368 \
369 pi1 += ldp; \
370 alpha1 += lda; \
371 } \
372 } \
373 } \
374 }
376 INSERT_GENTFUNC_BASIC( unpackm_ref_8xk, unpackm_ref_8xk )
379 #ifndef BLIS_ENABLE_C66X_BUILD // these packs are not used in c66x; allows fatser compilation
381 #undef GENTFUNC
382 #define GENTFUNC( ctype, ch, opname, varname ) \
383 \
384 void PASTEMAC(ch,varname)( \
385 conj_t conjp, \
386 dim_t n, \
387 void* beta, \
388 void* p, \
389 void* a, inc_t inca, inc_t lda \
390 ) \
391 { \
392 const inc_t ldp = 10; \
393 \
394 ctype* restrict beta_cast = beta; \
395 ctype* restrict pi1 = p; \
396 ctype* restrict alpha1 = a; \
397 \
398 if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
399 { \
400 if ( bli_is_conj( conjp ) ) \
401 { \
402 for ( ; n != 0; --n ) \
403 { \
404 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
405 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
406 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
407 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
408 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
409 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
410 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
411 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
412 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
413 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
414 \
415 pi1 += ldp; \
416 alpha1 += lda; \
417 } \
418 } \
419 else \
420 { \
421 for ( ; n != 0; --n ) \
422 { \
423 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
424 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
425 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
426 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
427 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
428 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
429 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
430 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
431 PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
432 PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
433 \
434 pi1 += ldp; \
435 alpha1 += lda; \
436 } \
437 } \
438 } \
439 else \
440 { \
441 if ( bli_is_conj( conjp ) ) \
442 { \
443 for ( ; n != 0; --n ) \
444 { \
445 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
446 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
447 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
448 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
449 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
450 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
451 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
452 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
453 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
454 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
455 \
456 pi1 += ldp; \
457 alpha1 += lda; \
458 } \
459 } \
460 else \
461 { \
462 for ( ; n != 0; --n ) \
463 { \
464 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
465 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
466 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
467 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
468 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
469 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
470 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
471 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
472 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
473 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
474 \
475 pi1 += ldp; \
476 alpha1 += lda; \
477 } \
478 } \
479 } \
480 }
482 INSERT_GENTFUNC_BASIC( unpackm_ref_10xk, unpackm_ref_10xk )
487 #undef GENTFUNC
488 #define GENTFUNC( ctype, ch, opname, varname ) \
489 \
490 void PASTEMAC(ch,varname)( \
491 conj_t conjp, \
492 dim_t n, \
493 void* beta, \
494 void* p, \
495 void* a, inc_t inca, inc_t lda \
496 ) \
497 { \
498 const inc_t ldp = 12; \
499 \
500 ctype* restrict beta_cast = beta; \
501 ctype* restrict pi1 = p; \
502 ctype* restrict alpha1 = a; \
503 \
504 if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
505 { \
506 if ( bli_is_conj( conjp ) ) \
507 { \
508 for ( ; n != 0; --n ) \
509 { \
510 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
511 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
512 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
513 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
514 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
515 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
516 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
517 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
518 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
519 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
520 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
521 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
522 \
523 pi1 += ldp; \
524 alpha1 += lda; \
525 } \
526 } \
527 else \
528 { \
529 for ( ; n != 0; --n ) \
530 { \
531 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
532 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
533 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
534 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
535 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
536 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
537 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
538 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
539 PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
540 PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
541 PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
542 PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
543 \
544 pi1 += ldp; \
545 alpha1 += lda; \
546 } \
547 } \
548 } \
549 else \
550 { \
551 if ( bli_is_conj( conjp ) ) \
552 { \
553 for ( ; n != 0; --n ) \
554 { \
555 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
556 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
557 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
558 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
559 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
560 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
561 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
562 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
563 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
564 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
565 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
566 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
567 \
568 pi1 += ldp; \
569 alpha1 += lda; \
570 } \
571 } \
572 else \
573 { \
574 for ( ; n != 0; --n ) \
575 { \
576 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
577 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
578 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
579 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
580 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
581 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
582 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
583 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
584 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
585 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
586 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
587 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
588 \
589 pi1 += ldp; \
590 alpha1 += lda; \
591 } \
592 } \
593 } \
594 }
596 INSERT_GENTFUNC_BASIC( unpackm_ref_12xk, unpackm_ref_12xk )
601 #undef GENTFUNC
602 #define GENTFUNC( ctype, ch, opname, varname ) \
603 \
604 void PASTEMAC(ch,varname)( \
605 conj_t conjp, \
606 dim_t n, \
607 void* beta, \
608 void* p, \
609 void* a, inc_t inca, inc_t lda \
610 ) \
611 { \
612 const inc_t ldp = 14; \
613 \
614 ctype* restrict beta_cast = beta; \
615 ctype* restrict pi1 = p; \
616 ctype* restrict alpha1 = a; \
617 \
618 if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
619 { \
620 if ( bli_is_conj( conjp ) ) \
621 { \
622 for ( ; n != 0; --n ) \
623 { \
624 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
625 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
626 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
627 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
628 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
629 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
630 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
631 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
632 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
633 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
634 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
635 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
636 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
637 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
638 \
639 pi1 += ldp; \
640 alpha1 += lda; \
641 } \
642 } \
643 else \
644 { \
645 for ( ; n != 0; --n ) \
646 { \
647 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
648 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
649 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
650 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
651 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
652 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
653 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
654 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
655 PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
656 PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
657 PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
658 PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
659 PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
660 PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
661 \
662 pi1 += ldp; \
663 alpha1 += lda; \
664 } \
665 } \
666 } \
667 else \
668 { \
669 if ( bli_is_conj( conjp ) ) \
670 { \
671 for ( ; n != 0; --n ) \
672 { \
673 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
674 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
675 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
676 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
677 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
678 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
679 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
680 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
681 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
682 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
683 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
684 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
685 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
686 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
687 \
688 pi1 += ldp; \
689 alpha1 += lda; \
690 } \
691 } \
692 else \
693 { \
694 for ( ; n != 0; --n ) \
695 { \
696 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
697 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
698 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
699 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
700 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
701 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
702 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
703 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
704 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
705 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
706 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
707 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
708 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
709 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
710 \
711 pi1 += ldp; \
712 alpha1 += lda; \
713 } \
714 } \
715 } \
716 }
718 INSERT_GENTFUNC_BASIC( unpackm_ref_14xk, unpackm_ref_14xk )
723 #undef GENTFUNC
724 #define GENTFUNC( ctype, ch, opname, varname ) \
725 \
726 void PASTEMAC(ch,varname)( \
727 conj_t conjp, \
728 dim_t n, \
729 void* beta, \
730 void* p, \
731 void* a, inc_t inca, inc_t lda \
732 ) \
733 { \
734 const inc_t ldp = 16; \
735 \
736 ctype* restrict beta_cast = beta; \
737 ctype* restrict pi1 = p; \
738 ctype* restrict alpha1 = a; \
739 \
740 if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
741 { \
742 if ( bli_is_conj( conjp ) ) \
743 { \
744 for ( ; n != 0; --n ) \
745 { \
746 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
747 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
748 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
749 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
750 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
751 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
752 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
753 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
754 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
755 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
756 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
757 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
758 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
759 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
760 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 14), *(alpha1 + 14*inca) ); \
761 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 15), *(alpha1 + 15*inca) ); \
762 \
763 pi1 += ldp; \
764 alpha1 += lda; \
765 } \
766 } \
767 else \
768 { \
769 for ( ; n != 0; --n ) \
770 { \
771 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
772 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
773 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
774 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
775 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
776 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
777 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
778 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
779 PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
780 PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
781 PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
782 PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
783 PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
784 PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
785 PASTEMAC2(ch,ch,copys)( *(pi1 + 14), *(alpha1 + 14*inca) ); \
786 PASTEMAC2(ch,ch,copys)( *(pi1 + 15), *(alpha1 + 15*inca) ); \
787 \
788 pi1 += ldp; \
789 alpha1 += lda; \
790 } \
791 } \
792 } \
793 else \
794 { \
795 if ( bli_is_conj( conjp ) ) \
796 { \
797 for ( ; n != 0; --n ) \
798 { \
799 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
800 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
801 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
802 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
803 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
804 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
805 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
806 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
807 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
808 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
809 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
810 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
811 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
812 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
813 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
814 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
815 \
816 pi1 += ldp; \
817 alpha1 += lda; \
818 } \
819 } \
820 else \
821 { \
822 for ( ; n != 0; --n ) \
823 { \
824 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
825 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
826 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
827 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
828 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
829 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
830 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
831 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
832 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
833 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
834 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
835 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
836 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
837 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
838 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
839 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
840 \
841 pi1 += ldp; \
842 alpha1 += lda; \
843 } \
844 } \
845 } \
846 }
848 INSERT_GENTFUNC_BASIC( unpackm_ref_16xk, unpackm_ref_16xk )
849 #endif