]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - dense-linear-algebra-libraries/linalg.git/blob - blis/frame/1m/unpackm/ukernels/bli_unpackm_ref_cxk.c
TI Linear Algebra Library (LINALG) Rlease 1.0.0
[dense-linear-algebra-libraries/linalg.git] / blis / frame / 1m / unpackm / ukernels / bli_unpackm_ref_cxk.c
1 /*
3    BLIS    
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
7    Copyright (C) 2014, The University of Texas at Austin
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name of The University of Texas at Austin nor the names
18       of its contributors may be used to endorse or promote products
19       derived from this software without specific prior written permission.
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 #undef  GENTFUNC
38 #define GENTFUNC( ctype, ch, opname, varname ) \
39 \
40 void PASTEMAC(ch,varname)( \
41                            conj_t  conjp, \
42                            dim_t   n, \
43                            void*   beta, \
44                            void*   p, \
45                            void*   a, inc_t inca, inc_t lda  \
46                          ) \
47 { \
48         const inc_t     ldp       = 2; \
49 \
50         ctype* restrict beta_cast = beta; \
51         ctype* restrict pi1       = p; \
52         ctype* restrict alpha1    = a; \
53 \
54         if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
55         { \
56                 if ( bli_is_conj( conjp ) ) \
57                 { \
58                         for ( ; n != 0; --n ) \
59                         { \
60                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
61                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
62 \
63                                 pi1    += ldp; \
64                                 alpha1 += lda; \
65                         } \
66                 } \
67                 else \
68                 { \
69                         for ( ; n != 0; --n ) \
70                         { \
71                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
72                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
73 \
74                                 pi1    += ldp; \
75                                 alpha1 += lda; \
76                         } \
77                 } \
78         } \
79         else \
80         { \
81                 if ( bli_is_conj( conjp ) ) \
82                 { \
83                         for ( ; n != 0; --n ) \
84                         { \
85                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
86                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
87 \
88                                 pi1    += ldp; \
89                                 alpha1 += lda; \
90                         } \
91                 } \
92                 else \
93                 { \
94                         for ( ; n != 0; --n ) \
95                         { \
96                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
97                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
98 \
99                                 pi1    += ldp; \
100                                 alpha1 += lda; \
101                         } \
102                 } \
103         } \
106 INSERT_GENTFUNC_BASIC( unpackm_ref_2xk, unpackm_ref_2xk )
111 #undef  GENTFUNC
112 #define GENTFUNC( ctype, ch, opname, varname ) \
114 void PASTEMAC(ch,varname)( \
115                            conj_t  conjp, \
116                            dim_t   n, \
117                            void*   beta, \
118                            void*   p, \
119                            void*   a, inc_t inca, inc_t lda  \
120                          ) \
121 { \
122         const inc_t     ldp       = 4; \
124         ctype* restrict beta_cast = beta; \
125         ctype* restrict pi1       = p; \
126         ctype* restrict alpha1    = a; \
128         if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
129         { \
130                 if ( bli_is_conj( conjp ) ) \
131                 { \
132                         for ( ; n != 0; --n ) \
133                         { \
134                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
135                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
136                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
137                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
139                                 pi1    += ldp; \
140                                 alpha1 += lda; \
141                         } \
142                 } \
143                 else \
144                 { \
145                         for ( ; n != 0; --n ) \
146                         { \
147                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
148                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
149                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
150                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
152                                 pi1    += ldp; \
153                                 alpha1 += lda; \
154                         } \
155                 } \
156         } \
157         else \
158         { \
159                 if ( bli_is_conj( conjp ) ) \
160                 { \
161                         for ( ; n != 0; --n ) \
162                         { \
163                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
164                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
165                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
166                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
168                                 pi1    += ldp; \
169                                 alpha1 += lda; \
170                         } \
171                 } \
172                 else \
173                 { \
174                         for ( ; n != 0; --n ) \
175                         { \
176                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
177                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
178                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
179                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
181                                 pi1    += ldp; \
182                                 alpha1 += lda; \
183                         } \
184                 } \
185         } \
188 INSERT_GENTFUNC_BASIC( unpackm_ref_4xk, unpackm_ref_4xk )
193 #undef  GENTFUNC
194 #define GENTFUNC( ctype, ch, opname, varname ) \
196 void PASTEMAC(ch,varname)( \
197                            conj_t  conjp, \
198                            dim_t   n, \
199                            void*   beta, \
200                            void*   p, \
201                            void*   a, inc_t inca, inc_t lda  \
202                          ) \
203 { \
204         const inc_t     ldp       = 6; \
206         ctype* restrict beta_cast = beta; \
207         ctype* restrict pi1       = p; \
208         ctype* restrict alpha1    = a; \
210         if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
211         { \
212                 if ( bli_is_conj( conjp ) ) \
213                 { \
214                         for ( ; n != 0; --n ) \
215                         { \
216                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
217                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
218                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
219                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
220                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
221                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
223                                 pi1    += ldp; \
224                                 alpha1 += lda; \
225                         } \
226                 } \
227                 else \
228                 { \
229                         for ( ; n != 0; --n ) \
230                         { \
231                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
232                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
233                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
234                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
235                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
236                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
238                                 pi1    += ldp; \
239                                 alpha1 += lda; \
240                         } \
241                 } \
242         } \
243         else \
244         { \
245                 if ( bli_is_conj( conjp ) ) \
246                 { \
247                         for ( ; n != 0; --n ) \
248                         { \
249                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
250                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
251                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
252                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
253                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
254                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
256                                 pi1    += ldp; \
257                                 alpha1 += lda; \
258                         } \
259                 } \
260                 else \
261                 { \
262                         for ( ; n != 0; --n ) \
263                         { \
264                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
265                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
266                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
267                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
268                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
269                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
271                                 pi1    += ldp; \
272                                 alpha1 += lda; \
273                         } \
274                 } \
275         } \
278 INSERT_GENTFUNC_BASIC( unpackm_ref_6xk, unpackm_ref_6xk )
283 #undef  GENTFUNC
284 #define GENTFUNC( ctype, ch, opname, varname ) \
286 void PASTEMAC(ch,varname)( \
287                            conj_t  conjp, \
288                            dim_t   n, \
289                            void*   beta, \
290                            void*   p, \
291                            void*   a, inc_t inca, inc_t lda  \
292                          ) \
293 { \
294         const inc_t     ldp       = 8; \
296         ctype* restrict beta_cast = beta; \
297         ctype* restrict pi1       = p; \
298         ctype* restrict alpha1    = a; \
300         if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
301         { \
302                 if ( bli_is_conj( conjp ) ) \
303                 { \
304                         for ( ; n != 0; --n ) \
305                         { \
306                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
307                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
308                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
309                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
310                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
311                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
312                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
313                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
315                                 pi1    += ldp; \
316                                 alpha1 += lda; \
317                         } \
318                 } \
319                 else \
320                 { \
321                         for ( ; n != 0; --n ) \
322                         { \
323                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
324                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
325                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
326                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
327                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
328                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
329                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
330                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
332                                 pi1    += ldp; \
333                                 alpha1 += lda; \
334                         } \
335                 } \
336         } \
337         else \
338         { \
339                 if ( bli_is_conj( conjp ) ) \
340                 { \
341                         for ( ; n != 0; --n ) \
342                         { \
343                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
344                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
345                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
346                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
347                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
348                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
349                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
350                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
352                                 pi1    += ldp; \
353                                 alpha1 += lda; \
354                         } \
355                 } \
356                 else \
357                 { \
358                         for ( ; n != 0; --n ) \
359                         { \
360                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
361                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
362                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
363                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
364                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
365                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
366                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
367                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
369                                 pi1    += ldp; \
370                                 alpha1 += lda; \
371                         } \
372                 } \
373         } \
376 INSERT_GENTFUNC_BASIC( unpackm_ref_8xk, unpackm_ref_8xk )
379 #ifndef BLIS_ENABLE_C66X_BUILD // these packs are not used in c66x; allows fatser compilation
381 #undef  GENTFUNC
382 #define GENTFUNC( ctype, ch, opname, varname ) \
384 void PASTEMAC(ch,varname)( \
385                            conj_t  conjp, \
386                            dim_t   n, \
387                            void*   beta, \
388                            void*   p, \
389                            void*   a, inc_t inca, inc_t lda  \
390                          ) \
391 { \
392         const inc_t     ldp       = 10; \
394         ctype* restrict beta_cast = beta; \
395         ctype* restrict pi1       = p; \
396         ctype* restrict alpha1    = a; \
398         if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
399         { \
400                 if ( bli_is_conj( conjp ) ) \
401                 { \
402                         for ( ; n != 0; --n ) \
403                         { \
404                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
405                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
406                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
407                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
408                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
409                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
410                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
411                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
412                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
413                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
415                                 pi1    += ldp; \
416                                 alpha1 += lda; \
417                         } \
418                 } \
419                 else \
420                 { \
421                         for ( ; n != 0; --n ) \
422                         { \
423                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
424                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
425                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
426                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
427                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
428                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
429                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
430                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
431                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
432                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
434                                 pi1    += ldp; \
435                                 alpha1 += lda; \
436                         } \
437                 } \
438         } \
439         else \
440         { \
441                 if ( bli_is_conj( conjp ) ) \
442                 { \
443                         for ( ; n != 0; --n ) \
444                         { \
445                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
446                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
447                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
448                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
449                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
450                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
451                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
452                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
453                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
454                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
456                                 pi1    += ldp; \
457                                 alpha1 += lda; \
458                         } \
459                 } \
460                 else \
461                 { \
462                         for ( ; n != 0; --n ) \
463                         { \
464                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
465                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
466                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
467                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
468                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
469                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
470                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
471                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
472                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
473                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
475                                 pi1    += ldp; \
476                                 alpha1 += lda; \
477                         } \
478                 } \
479         } \
482 INSERT_GENTFUNC_BASIC( unpackm_ref_10xk, unpackm_ref_10xk )
487 #undef  GENTFUNC
488 #define GENTFUNC( ctype, ch, opname, varname ) \
490 void PASTEMAC(ch,varname)( \
491                            conj_t  conjp, \
492                            dim_t   n, \
493                            void*   beta, \
494                            void*   p, \
495                            void*   a, inc_t inca, inc_t lda  \
496                          ) \
497 { \
498         const inc_t     ldp       = 12; \
500         ctype* restrict beta_cast = beta; \
501         ctype* restrict pi1       = p; \
502         ctype* restrict alpha1    = a; \
504         if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
505         { \
506                 if ( bli_is_conj( conjp ) ) \
507                 { \
508                         for ( ; n != 0; --n ) \
509                         { \
510                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
511                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
512                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
513                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
514                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
515                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
516                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
517                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
518                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
519                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
520                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
521                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
523                                 pi1    += ldp; \
524                                 alpha1 += lda; \
525                         } \
526                 } \
527                 else \
528                 { \
529                         for ( ; n != 0; --n ) \
530                         { \
531                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
532                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
533                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
534                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
535                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
536                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
537                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
538                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
539                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
540                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
541                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
542                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
544                                 pi1    += ldp; \
545                                 alpha1 += lda; \
546                         } \
547                 } \
548         } \
549         else \
550         { \
551                 if ( bli_is_conj( conjp ) ) \
552                 { \
553                         for ( ; n != 0; --n ) \
554                         { \
555                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
556                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
557                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
558                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
559                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
560                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
561                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
562                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
563                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
564                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
565                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
566                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
568                                 pi1    += ldp; \
569                                 alpha1 += lda; \
570                         } \
571                 } \
572                 else \
573                 { \
574                         for ( ; n != 0; --n ) \
575                         { \
576                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
577                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
578                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
579                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
580                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
581                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
582                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
583                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
584                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
585                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
586                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
587                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
589                                 pi1    += ldp; \
590                                 alpha1 += lda; \
591                         } \
592                 } \
593         } \
596 INSERT_GENTFUNC_BASIC( unpackm_ref_12xk, unpackm_ref_12xk )
601 #undef  GENTFUNC
602 #define GENTFUNC( ctype, ch, opname, varname ) \
604 void PASTEMAC(ch,varname)( \
605                            conj_t  conjp, \
606                            dim_t   n, \
607                            void*   beta, \
608                            void*   p, \
609                            void*   a, inc_t inca, inc_t lda  \
610                          ) \
611 { \
612         const inc_t     ldp       = 14; \
614         ctype* restrict beta_cast = beta; \
615         ctype* restrict pi1       = p; \
616         ctype* restrict alpha1    = a; \
618         if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
619         { \
620                 if ( bli_is_conj( conjp ) ) \
621                 { \
622                         for ( ; n != 0; --n ) \
623                         { \
624                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
625                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
626                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
627                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
628                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
629                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
630                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
631                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
632                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
633                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
634                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
635                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
636                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
637                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
639                                 pi1    += ldp; \
640                                 alpha1 += lda; \
641                         } \
642                 } \
643                 else \
644                 { \
645                         for ( ; n != 0; --n ) \
646                         { \
647                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
648                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
649                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
650                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
651                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
652                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
653                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
654                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
655                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
656                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
657                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
658                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
659                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
660                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
662                                 pi1    += ldp; \
663                                 alpha1 += lda; \
664                         } \
665                 } \
666         } \
667         else \
668         { \
669                 if ( bli_is_conj( conjp ) ) \
670                 { \
671                         for ( ; n != 0; --n ) \
672                         { \
673                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
674                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
675                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
676                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
677                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
678                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
679                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
680                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
681                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
682                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
683                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
684                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
685                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
686                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
688                                 pi1    += ldp; \
689                                 alpha1 += lda; \
690                         } \
691                 } \
692                 else \
693                 { \
694                         for ( ; n != 0; --n ) \
695                         { \
696                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
697                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
698                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
699                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
700                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
701                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
702                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
703                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
704                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
705                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
706                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
707                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
708                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
709                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
711                                 pi1    += ldp; \
712                                 alpha1 += lda; \
713                         } \
714                 } \
715         } \
718 INSERT_GENTFUNC_BASIC( unpackm_ref_14xk, unpackm_ref_14xk )
723 #undef  GENTFUNC
724 #define GENTFUNC( ctype, ch, opname, varname ) \
726 void PASTEMAC(ch,varname)( \
727                            conj_t  conjp, \
728                            dim_t   n, \
729                            void*   beta, \
730                            void*   p, \
731                            void*   a, inc_t inca, inc_t lda  \
732                          ) \
733 { \
734         const inc_t     ldp       = 16; \
736         ctype* restrict beta_cast = beta; \
737         ctype* restrict pi1       = p; \
738         ctype* restrict alpha1    = a; \
740         if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \
741         { \
742                 if ( bli_is_conj( conjp ) ) \
743                 { \
744                         for ( ; n != 0; --n ) \
745                         { \
746                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
747                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
748                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
749                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
750                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
751                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
752                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
753                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
754                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
755                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
756                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
757                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
758                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
759                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
760                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 14), *(alpha1 + 14*inca) ); \
761                                 PASTEMAC2(ch,ch,copyjs)( *(pi1 + 15), *(alpha1 + 15*inca) ); \
763                                 pi1    += ldp; \
764                                 alpha1 += lda; \
765                         } \
766                 } \
767                 else \
768                 { \
769                         for ( ; n != 0; --n ) \
770                         { \
771                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
772                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
773                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
774                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
775                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
776                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
777                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
778                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
779                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \
780                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \
781                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \
782                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \
783                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \
784                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \
785                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 14), *(alpha1 + 14*inca) ); \
786                                 PASTEMAC2(ch,ch,copys)( *(pi1 + 15), *(alpha1 + 15*inca) ); \
788                                 pi1    += ldp; \
789                                 alpha1 += lda; \
790                         } \
791                 } \
792         } \
793         else \
794         { \
795                 if ( bli_is_conj( conjp ) ) \
796                 { \
797                         for ( ; n != 0; --n ) \
798                         { \
799                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
800                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
801                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
802                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
803                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
804                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
805                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
806                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
807                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
808                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
809                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
810                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
811                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
812                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
813                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
814                                 PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
816                                 pi1    += ldp; \
817                                 alpha1 += lda; \
818                         } \
819                 } \
820                 else \
821                 { \
822                         for ( ; n != 0; --n ) \
823                         { \
824                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
825                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
826                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
827                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
828                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
829                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
830                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
831                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
832                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \
833                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \
834                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \
835                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \
836                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \
837                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \
838                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \
839                                 PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \
841                                 pi1    += ldp; \
842                                 alpha1 += lda; \
843                         } \
844                 } \
845         } \
848 INSERT_GENTFUNC_BASIC( unpackm_ref_16xk, unpackm_ref_16xk )
849 #endif