2 /******************************************************************************
3 * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * * Neither the name of Texas Instruments Incorporated nor the
14 * names of its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27 * THE POSSIBILITY OF SUCH DAMAGE.
28 *****************************************************************************/
31 #ifndef TI_CBLAS_H
32 #define TI_CBLAS_H
34 #ifdef __cplusplus
35 #include <cstdlib>
36 #include <cmath>
37 #include <cstring>
38 #include <iostream>
39 #include <fstream>
40 #define __CL_ENABLE_EXCEPTIONS
41 #include <CL/cl.hpp>
42 using namespace std;
43 using namespace cl;
44 /* Both cl and std namespace define size_t, so we must be explicit */
45 #define size_t ::size_t
46 #ifndef TI_CBLAS_FAT_BINARY
47 #include "ocl_util.h"
48 #endif
49 #else
50 #include <stdio.h>
51 #include <stdlib.h>
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 #include <unistd.h>
55 #include <time.h>
56 #include <math.h>
57 #include <assert.h>
58 #include <memory.h>
59 #include <CL/cl.h>
60 #include <CL/cl_ext.h>
61 #endif
63 extern int ti_cblas_disable_debug;
65 /* useful macros */
66 #ifdef TI_CBLAS_DEBUG
67 #define TI_CBLAS_DEBUG_PRINT(...) { if (! ti_cblas_disable_debug) { fprintf(stderr,"TI_CBLAS DEBUG: "); fprintf(stderr, __VA_ARGS__); } }
68 #else
69 #define TI_CBLAS_DEBUG_PRINT(...)
70 #endif
72 #define TI_CBLAS_ERROR_PRINT(...) { fprintf(stderr,"TI_CBLAS ERROR: "); fprintf(stderr, __VA_ARGS__); }
73 #define TI_CBLAS_ERROR_EXIT(...) { fprintf(stderr,"TI_CBLAS ERROR: "); fprintf(stderr, __VA_ARGS__); exit(1); }
74 #define TI_CBLAS_OCL_CHKERROR(A, B) if (B != CL_SUCCESS) { TI_CBLAS_ERROR_PRINT("opencl %s, error %d\n", A, B); exit(B); }
76 #define MIN(a,b) ((a) < (b) ? (a) : (b))
77 #define MAX(a,b) ((a) > (b) ? (a) : (b))
78 #define XOR(a,b) ((!(a)) != (!(b)))
79 #define XOR3(a,b,c) (((!(a)) != (!(b))) == (!(c)))
81 /* Profiling support */
82 #ifdef TI_CBLAS_PROFILE
83 #define TI_CBLAS_CLOCK CLOCK_REALTIME
84 #define clock_diff ((clock2.tv_sec-clock1.tv_sec)*1e6 + (clock2.tv_nsec-clock1.tv_nsec)/1e3)
85 #define TI_CBLAS_PROFILE_PRINT(...) fprintf(stderr,"TI_CBLAS PROFILE: "); fprintf(stderr, __VA_ARGS__);
86 #define TI_CBLAS_PROFILE_START() struct timespec clock1; clock_gettime(TI_CBLAS_CLOCK, &clock1);
87 #define TI_CBLAS_PROFILE_REPORT(...) struct timespec clock2; clock_gettime(TI_CBLAS_CLOCK, &clock2); TI_CBLAS_PROFILE_PRINT(__VA_ARGS__)
89 #else /* TI_CBLAS_PROFILE */
90 #define TI_CBLAS_PROFILE_PRINT(...)
91 #define TI_CBLAS_PROFILE_START()
92 #define TI_CBLAS_PROFILE_REPORT(...)
93 #define TI_CBLAS_PROFILE_OCL_REPORT()
94 #endif /* TI_CBLAS_PROFILE */
96 #ifdef __cplusplus
97 extern "C" {
98 #endif
100 #include "cblas.h"
101 #ifdef __cplusplus
102 }
103 #endif
105 #ifdef __cplusplus
106 extern "C" {
107 #endif
108 #include "blis.h"
109 #ifdef __cplusplus
110 }
111 #endif
113 /* offload control */
114 /* NONE: Execute on ARM only */
115 /* DSP : Force offload to DSP */
116 /* SIZE:Decision to offload or not is decided based on size */
117 #define TI_CBLAS_OFFLOAD_NONE 0
118 #define TI_CBLAS_OFFLOAD_DSP 1
119 #define TI_CBLAS_OFFLOAD_SIZE 2
121 /* Global functions and variables */
122 extern void ti_cblas_error(const char* msg, int code);
123 extern void ti_cblas_init(void);
125 extern err_t bli_finalize();
126 extern err_t bli_init();
129 #ifdef _cplusplus
130 extern "C" { int ti_cblas_finalize(); }
131 else
132 int ti_cblas_finalize(void);
133 #endif
135 #ifdef _cplusplus
136 extern "C" { int ti_blis_init(); }
137 else
138 int ti_blis_init(void);
139 #endif
141 void ti_cblas_auto_finalize(void);
143 void ti_cblas_mem_free(void *ptr);
144 void *ti_cblas_mem_alloc(size_t size);
146 extern pthread_cond_t CV;
147 extern pthread_mutex_t MUTEX;
150 #ifdef __cplusplus
151 extern Kernel* ti_cblas_get_kernel(int idx, const char *fname);
152 int ti_cblas_delete_kernel(Kernel* K);
153 #if 0
154 extern Context ti_cblas_ocl_context;
155 extern std::vector<Device> ti_cblas_ocl_devices;
156 extern CommandQueue ti_cblas_ocl_Q;
157 extern Program::Binaries ti_cblas_ocl_binary;
158 extern Program ti_cblas_ocl_program;
159 extern Kernel* ti_cblas_ocl_kernels[];
160 #else
161 extern Context* ti_cblas_ocl_context;
162 extern std::vector<Device>* ti_cblas_ocl_devices;
163 extern CommandQueue* ti_cblas_ocl_Q;
164 extern Program::Binaries* ti_cblas_ocl_binary;
165 extern Program* ti_cblas_ocl_program;
166 #endif
167 #else
168 extern cl_kernel ti_cblas_get_kernel(int idx, const char *fname);
169 int ti_cblas_delete_kernel(cl_kernel K);
170 extern cl_context ti_cblas_ocl_context;
171 extern cl_command_queue ti_cblas_ocl_Q;
172 extern cl_program ti_cblas_ocl_program;
173 extern cl_kernel ti_cblas_ocl_kernels[];
174 #endif
176 extern int ti_cblas_init_done;
177 extern int ti_cblas_kernel_valid[];
178 extern int ti_cblas_offload;
180 #define TI_CBLAS_CBLAS_CAXPY_IDX 0
181 #define TI_CBLAS_CBLAS_CCOPY_IDX 1
182 #define TI_CBLAS_CBLAS_CDOTC_SUB_IDX 2
183 #define TI_CBLAS_CBLAS_CDOTU_SUB_IDX 3
184 #define TI_CBLAS_CBLAS_CGBMV_IDX 4
185 #define TI_CBLAS_CBLAS_CGEMM_IDX 5
186 #define TI_CBLAS_CBLAS_CGEMV_IDX 6
187 #define TI_CBLAS_CBLAS_CGERC_IDX 7
188 #define TI_CBLAS_CBLAS_CGERU_IDX 8
189 #define TI_CBLAS_CBLAS_CHBMV_IDX 9
190 #define TI_CBLAS_CBLAS_CHEMM_IDX 10
191 #define TI_CBLAS_CBLAS_CHEMV_IDX 11
192 #define TI_CBLAS_CBLAS_CHER_IDX 12
193 #define TI_CBLAS_CBLAS_CHER2_IDX 13
194 #define TI_CBLAS_CBLAS_CHER2K_IDX 14
195 #define TI_CBLAS_CBLAS_CHERK_IDX 15
196 #define TI_CBLAS_CBLAS_CHPMV_IDX 16
197 #define TI_CBLAS_CBLAS_CHPR_IDX 17
198 #define TI_CBLAS_CBLAS_CHPR2_IDX 18
199 #define TI_CBLAS_CBLAS_CROTG_IDX 19
200 #define TI_CBLAS_CBLAS_CSCAL_IDX 20
201 #define TI_CBLAS_CBLAS_CSSCAL_IDX 21
202 #define TI_CBLAS_CBLAS_CSWAP_IDX 22
203 #define TI_CBLAS_CBLAS_CSYMM_IDX 23
204 #define TI_CBLAS_CBLAS_CSYR2K_IDX 24
205 #define TI_CBLAS_CBLAS_CSYRK_IDX 25
206 #define TI_CBLAS_CBLAS_CTBMV_IDX 26
207 #define TI_CBLAS_CBLAS_CTBSV_IDX 27
208 #define TI_CBLAS_CBLAS_CTPMV_IDX 28
209 #define TI_CBLAS_CBLAS_CTPSV_IDX 29
210 #define TI_CBLAS_CBLAS_CTRMM_IDX 30
211 #define TI_CBLAS_CBLAS_CTRMV_IDX 31
212 #define TI_CBLAS_CBLAS_CTRSM_IDX 32
213 #define TI_CBLAS_CBLAS_CTRSV_IDX 33
214 #define TI_CBLAS_CBLAS_DASUM_IDX 34
215 #define TI_CBLAS_CBLAS_DAXPY_IDX 35
216 #define TI_CBLAS_CBLAS_DCOPY_IDX 36
217 #define TI_CBLAS_CBLAS_DDOT_IDX 37
218 #define TI_CBLAS_CBLAS_DGBMV_IDX 38
219 #define TI_CBLAS_CBLAS_DGEMM_IDX 39
220 #define TI_CBLAS_CBLAS_DGEMV_IDX 40
221 #define TI_CBLAS_CBLAS_DGER_IDX 41
222 #define TI_CBLAS_CBLAS_DNRM2_IDX 42
223 #define TI_CBLAS_CBLAS_DROT_IDX 43
224 #define TI_CBLAS_CBLAS_DROTG_IDX 44
225 #define TI_CBLAS_CBLAS_DROTM_IDX 45
226 #define TI_CBLAS_CBLAS_DROTMG_IDX 46
227 #define TI_CBLAS_CBLAS_DSBMV_IDX 47
228 #define TI_CBLAS_CBLAS_DSCAL_IDX 48
229 #define TI_CBLAS_CBLAS_DSDOT_IDX 49
230 #define TI_CBLAS_CBLAS_DSPMV_IDX 50
231 #define TI_CBLAS_CBLAS_DSPR_IDX 51
232 #define TI_CBLAS_CBLAS_DSPR2_IDX 52
233 #define TI_CBLAS_CBLAS_DSWAP_IDX 53
234 #define TI_CBLAS_CBLAS_DSYMM_IDX 54
235 #define TI_CBLAS_CBLAS_DSYMV_IDX 55
236 #define TI_CBLAS_CBLAS_DSYR_IDX 56
237 #define TI_CBLAS_CBLAS_DSYR2_IDX 57
238 #define TI_CBLAS_CBLAS_DSYR2K_IDX 58
239 #define TI_CBLAS_CBLAS_DSYRK_IDX 59
240 #define TI_CBLAS_CBLAS_DTBMV_IDX 60
241 #define TI_CBLAS_CBLAS_DTBSV_IDX 61
242 #define TI_CBLAS_CBLAS_DTPMV_IDX 62
243 #define TI_CBLAS_CBLAS_DTPSV_IDX 63
244 #define TI_CBLAS_CBLAS_DTRMM_IDX 64
245 #define TI_CBLAS_CBLAS_DTRMV_IDX 65
246 #define TI_CBLAS_CBLAS_DTRSM_IDX 66
247 #define TI_CBLAS_CBLAS_DTRSV_IDX 67
248 #define TI_CBLAS_CBLAS_DZASUM_IDX 68
249 #define TI_CBLAS_CBLAS_DZNRM2_IDX 69
250 #define TI_CBLAS_CBLAS_ICAMAX_IDX 70
251 #define TI_CBLAS_CBLAS_IDAMAX_IDX 71
252 #define TI_CBLAS_CBLAS_ISAMAX_IDX 72
253 #define TI_CBLAS_CBLAS_IZAMAX_IDX 73
254 #define TI_CBLAS_CBLAS_SASUM_IDX 74
255 #define TI_CBLAS_CBLAS_SAXPY_IDX 75
256 #define TI_CBLAS_CBLAS_SCASUM_IDX 76
257 #define TI_CBLAS_CBLAS_SCNRM2_IDX 77
258 #define TI_CBLAS_CBLAS_SCOPY_IDX 78
259 #define TI_CBLAS_CBLAS_SDOT_IDX 79
260 #define TI_CBLAS_CBLAS_SDSDOT_IDX 80
261 #define TI_CBLAS_CBLAS_SGBMV_IDX 81
262 #define TI_CBLAS_CBLAS_SGEMM_IDX 82
263 #define TI_CBLAS_CBLAS_SGEMV_IDX 83
264 #define TI_CBLAS_CBLAS_SGER_IDX 84
265 #define TI_CBLAS_CBLAS_SNRM2_IDX 85
266 #define TI_CBLAS_CBLAS_SROT_IDX 86
267 #define TI_CBLAS_CBLAS_SROTG_IDX 87
268 #define TI_CBLAS_CBLAS_SROTM_IDX 88
269 #define TI_CBLAS_CBLAS_SROTMG_IDX 89
270 #define TI_CBLAS_CBLAS_SSBMV_IDX 90
271 #define TI_CBLAS_CBLAS_SSCAL_IDX 91
272 #define TI_CBLAS_CBLAS_SSPMV_IDX 92
273 #define TI_CBLAS_CBLAS_SSPR_IDX 93
274 #define TI_CBLAS_CBLAS_SSPR2_IDX 94
275 #define TI_CBLAS_CBLAS_SSWAP_IDX 95
276 #define TI_CBLAS_CBLAS_SSYMM_IDX 96
277 #define TI_CBLAS_CBLAS_SSYMV_IDX 97
278 #define TI_CBLAS_CBLAS_SSYR_IDX 98
279 #define TI_CBLAS_CBLAS_SSYR2_IDX 99
280 #define TI_CBLAS_CBLAS_SSYR2K_IDX 100
281 #define TI_CBLAS_CBLAS_SSYRK_IDX 101
282 #define TI_CBLAS_CBLAS_STBMV_IDX 102
283 #define TI_CBLAS_CBLAS_STBSV_IDX 103
284 #define TI_CBLAS_CBLAS_STPMV_IDX 104
285 #define TI_CBLAS_CBLAS_STPSV_IDX 105
286 #define TI_CBLAS_CBLAS_STRMM_IDX 106
287 #define TI_CBLAS_CBLAS_STRMV_IDX 107
288 #define TI_CBLAS_CBLAS_STRSM_IDX 108
289 #define TI_CBLAS_CBLAS_STRSV_IDX 109
290 #define TI_CBLAS_CBLAS_XERBLA_IDX 110
291 #define TI_CBLAS_CBLAS_ZAXPY_IDX 111
292 #define TI_CBLAS_CBLAS_ZCOPY_IDX 112
293 #define TI_CBLAS_CBLAS_ZDOTC_SUB_IDX 113
294 #define TI_CBLAS_CBLAS_ZDOTU_SUB_IDX 114
295 #define TI_CBLAS_CBLAS_ZDSCAL_IDX 115
296 #define TI_CBLAS_CBLAS_ZGBMV_IDX 116
297 #define TI_CBLAS_CBLAS_ZGEMM_IDX 117
298 #define TI_CBLAS_CBLAS_ZGEMV_IDX 118
299 #define TI_CBLAS_CBLAS_ZGERC_IDX 119
300 #define TI_CBLAS_CBLAS_ZGERU_IDX 120
301 #define TI_CBLAS_CBLAS_ZHBMV_IDX 121
302 #define TI_CBLAS_CBLAS_ZHEMM_IDX 122
303 #define TI_CBLAS_CBLAS_ZHEMV_IDX 123
304 #define TI_CBLAS_CBLAS_ZHER_IDX 124
305 #define TI_CBLAS_CBLAS_ZHER2_IDX 125
306 #define TI_CBLAS_CBLAS_ZHER2K_IDX 126
307 #define TI_CBLAS_CBLAS_ZHERK_IDX 127
308 #define TI_CBLAS_CBLAS_ZHPMV_IDX 128
309 #define TI_CBLAS_CBLAS_ZHPR_IDX 129
310 #define TI_CBLAS_CBLAS_ZHPR2_IDX 130
311 #define TI_CBLAS_CBLAS_ZROTG_IDX 131
312 #define TI_CBLAS_CBLAS_ZSCAL_IDX 132
313 #define TI_CBLAS_CBLAS_ZSWAP_IDX 133
314 #define TI_CBLAS_CBLAS_ZSYMM_IDX 134
315 #define TI_CBLAS_CBLAS_ZSYR2K_IDX 135
316 #define TI_CBLAS_CBLAS_ZSYRK_IDX 136
317 #define TI_CBLAS_CBLAS_ZTBMV_IDX 137
318 #define TI_CBLAS_CBLAS_ZTBSV_IDX 138
319 #define TI_CBLAS_CBLAS_ZTPMV_IDX 139
320 #define TI_CBLAS_CBLAS_ZTPSV_IDX 140
321 #define TI_CBLAS_CBLAS_ZTRMM_IDX 141
322 #define TI_CBLAS_CBLAS_ZTRMV_IDX 142
323 #define TI_CBLAS_CBLAS_ZTRSM_IDX 143
324 #define TI_CBLAS_CBLAS_ZTRSV_IDX 144
325 #define TI_CBLAS_NUM_KERNELS 145
328 #define NUM_PNT_EACH_DIM 16
329 #define GEMM_OFFLOAD_TBL_SIZE (NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM)
330 #define SYRK_OFFLOAD_TBL_SIZE (NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM)
331 #define TRMM_OFFLOAD_TBL_SIZE (NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM)
332 #define TRSM_OFFLOAD_TBL_SIZE (NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM)
334 /* compile time defaults */
335 #ifndef TI_CBLAS_OFFLOAD
336 #define TI_CBLAS_OFFLOAD "002"
337 #endif
339 /* macros used for BLAS/LAPACK buffer size calculations */
340 #define BLAS_ORD(Order,a,b) ((Order==CblasRowMajor)? (a):(b))
341 #define BLAS_TRN(Trans,a,b) ((Trans==CblasNoTrans)? (a):(b))
342 #define BLAS_SIDE(Side,a,b) ((Side==CblasLeft)?(a):(b))
343 #define BLAS_ORD_T(Order,Trans,a,b) (XOR((Order==CblasRowMajor),(Trans==CblasNoTrans))?(b):(a))
344 #define BLAS_ORD_S(Order,Side,a,b) (XOR((Order==CblasRowMajor),((Side=='L')||(Side=='l')||(Side==CblasLeft)))?(b):(a))
345 #define BLAS_ORD_TS(Order,Trans,Side,a,b) (XOR3((Order==CblasRowMajor),(Trans==CblasNoTrans),((Side=='L')||(Side=='l')||(Side==CblasLeft)))?(b):(a))
347 extern int TI_CBLAS_L1_OFFLOAD;
348 extern int TI_CBLAS_L2_OFFLOAD;
349 extern int TI_CBLAS_L3_OFFLOAD;
351 // allocated MSMC and L2 buffer sizes;
352 // be careful to allocate enough so memory overrun
353 // does not happen in BLIS/BLAS calls
354 //#define L2_BUF_SIZE 0x84000
355 //L2 Cache
356 //MK: Pool Size 550976, Num Blocks 2, Block size 275424
357 //KN: Pool Size 128, Num Blocks 0, Block size 3071640
358 //MN: Pool Size 128, Num Blocks 0, Block size 1400832
359 //0x86940=551232 = 550976+128+128
360 //#define L2_BUF_SIZE 0x86940
361 //#define L2_BUF_SIZE 0x862A0
362 //Added 3 MNR buffers for C. and increasing Kc for Z
363 #define L2_BUF_SIZE 0xBFE00
364 //#define L2_BUF_SIZE 0xBF980 // MR=NR=4 for S
367 //L3 Cache
368 //MK: Pool Size 128, Num Blocks 0, Block size 275424
369 //KN: Pool Size 6143536, Num Blocks 2, Block size 3071640
370 //MN: Pool Size 128, Num Blocks 0, Block size 1400832
371 //0x5DBF30 = 6143792=6143536+128+128
372 //ccs map file says 5dbf40
373 //Changed KC values to fit in the 4.5M of MSMC
374 //4647B0
375 // 0x4664B4
376 // Within 4.5M
377 //#define MSMC_BUF_SIZE 0x4647C0
378 #define MSMC_BUF_SIZE 0x47FDC0
379 //#define MSMC_BUF_SIZE 0x47F100 // MR=NR=4 for S
382 //DSPBLIS
383 //#define MSMC_BUF_SIZE 0x400000
385 extern char ofld_tbl_sgemm[GEMM_OFFLOAD_TBL_SIZE];
386 extern char ofld_tbl_dgemm[GEMM_OFFLOAD_TBL_SIZE];
387 extern char ofld_tbl_cgemm[GEMM_OFFLOAD_TBL_SIZE];
388 extern char ofld_tbl_zgemm[GEMM_OFFLOAD_TBL_SIZE];
389 extern char ofld_tbl_ssyrk[SYRK_OFFLOAD_TBL_SIZE];
390 extern char ofld_tbl_dsyrk[SYRK_OFFLOAD_TBL_SIZE];
391 extern char ofld_tbl_csyrk[SYRK_OFFLOAD_TBL_SIZE];
392 extern char ofld_tbl_zsyrk[SYRK_OFFLOAD_TBL_SIZE];
393 extern char ofld_tbl_strmm[TRMM_OFFLOAD_TBL_SIZE];
394 extern char ofld_tbl_dtrmm[TRMM_OFFLOAD_TBL_SIZE];
395 extern char ofld_tbl_ctrmm[TRMM_OFFLOAD_TBL_SIZE];
396 extern char ofld_tbl_ztrmm[TRMM_OFFLOAD_TBL_SIZE];
397 extern char ofld_tbl_strsm[TRSM_OFFLOAD_TBL_SIZE];
398 extern char ofld_tbl_dtrsm[TRSM_OFFLOAD_TBL_SIZE];
399 extern char ofld_tbl_ctrsm[TRSM_OFFLOAD_TBL_SIZE];
400 extern char ofld_tbl_ztrsm[TRSM_OFFLOAD_TBL_SIZE];
401 extern int sgemm_offload_dsp(const enum CBLAS_ORDER Order, int M, int N, int K);
402 extern int dgemm_offload_dsp(const enum CBLAS_ORDER Order, int M, int N, int K);
403 extern int cgemm_offload_dsp(const enum CBLAS_ORDER Order, int M, int N, int K);
404 extern int zgemm_offload_dsp(const enum CBLAS_ORDER Order, int M, int N, int K);
405 extern int ssymm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
406 int M, int N);
407 extern int dsymm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
408 int M, int N);
409 extern int csymm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
410 int M, int N);
411 extern int zsymm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
412 int M, int N);
413 extern int chemm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
414 int M, int N);
415 extern int zhemm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
416 int M, int N);
417 extern int ssyrk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
418 extern int dsyrk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
419 extern int csyrk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
420 extern int zsyrk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
421 extern int cherk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
422 extern int zherk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
423 extern int ssyr2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
424 extern int dsyr2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
425 extern int csyr2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
426 extern int zsyr2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
427 extern int cher2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
428 extern int zher2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
429 extern int strmm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
430 int M, int N);
431 extern int dtrmm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
432 int M, int N);
433 extern int ctrmm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
434 int M, int N);
435 extern int ztrmm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
436 int M, int N);
437 extern int strsm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
438 int M, int N);
439 extern int dtrsm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
440 int M, int N);
441 extern int ctrsm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
442 int M, int N);
443 extern int ztrsm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
444 int M, int N);
446 #endif