1 #!/usr/bin/perl -sw
4 my $CTAGS = "/usr/bin/ctags";
5 my $OFFLOAD_MIN = 100; # overridden by -offmin arg; constant
6 my $OFFLOAD_MAX = 10000; # overridden by -offmax arg; constant
8 #### DO NOT EDIT BELOW THIS ####
9 my $namespace; # input
10 my @offloaded; # array of function names to be offloaded, filled in by generate_arm...
11 my $blas_prefix = 'cblas_';
13 my $blas_L1 = '.asum|.axpy|.copy|.dot|.sdot|.dotc|.dotu|.nrm2|.rot|.rotg|.rotmg|.scal|.swap|i.amax|i.amin|.cabs1|';
14 my $blas_L2 = '.gbmv|.gemv|.ger|.gerc|.geru|.hbmv|.hemv|.her|.her2|.hpmv|.hpr|.hpr2|.sbmv|.spmv|.spr|.spr2|.symv|.syr|.syr2|.tbmv|.tbsv|.tpmv|.tpsv|.trmv|.trsv|';
15 my $blas_L3 = '.gemm|.hemm|.herk|.her2k|.symm|.syrk|.syr2k|.trmm|.trsm|';
16 my $blas_L123 = "${blas_L1}|${blas_L2}|${blas_L3}";
18 my $source_code_header =
19 "/******************************************************************************
20 * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
21 * All rights reserved.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions are met:
25 * * Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * * Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 * * Neither the name of Texas Instruments Incorporated nor the
31 * names of its contributors may be used to endorse or promote products
32 * derived from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"
35 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
37 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
38 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
39 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
40 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
41 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
42 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
43 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
44 * THE POSSIBILITY OF SUCH DAMAGE.
45 *****************************************************************************/
46 ";
47 ### low-level functions ###
49 # remove leading and trailing spaces and all newlines
50 sub trim($)
51 {
52 my $string = shift;
53 $string =~ s/^\s+//;
54 $string =~ s/\s+$//;
55 $string =~ s/\s+/ /g;
56 $string =~ s/\t+/ /g;
57 $string =~ s/\n+//g;
58 return $string;
59 }
61 # generates the ocl kernel name
62 sub ocl_munge($)
63 {
64 my $string = shift;
65 $string =~ s/(.*)\ +(\**)\ *(.*)$/void\ $2ocl_$3/;
66 return $string;
67 }
69 # generates the trampoline name
70 sub trampoline_munge($)
71 {
72 my $string = shift;
73 $string =~ s/(.*)\ (.*)$/void\ $2_facade/;
74 return $string;
75 }
77 # /path/to/libcblas.a => libcblas
78 sub get_base_libname($)
79 {
80 my $string = shift;
81 $string = `basename $string`; chomp $string;
82 $string =~ m/([^.]+).*/;
83 return $1;
84 }
86 ### end low-level functons ###
89 ### High-level functions ###
91 # enums and defines need to be copied into the kernel.cl as otherwise
92 # the c66x code will not compiled. We can't simply include the header
93 # in the kernel.cl that breaks the ocl compile.
94 sub get_enums_and_defines
95 {
96 open(HEADER, "<$header") || die "Could not open $header for reading : $!\n";
97 my $code = "";
98 while (<HEADER>) {
99 chomp;
100 $code .= $_ . "\n" if (/^\s*enum\s+/);
101 $code .= $_ . "\n" if (/^\s*#define\s+/);
102 }
103 close(HEADER);
104 return $code;
105 }
107 # generates the initial portion of the Makefile. This needs to be done
108 # only once, and not on a per-function basis
109 sub generate_makefile_prologue
110 {
111 my $package = shift;
112 my $libname = shift;
113 my $uc_package = uc($package);
114 my $NAMESPACE = uc($namespace);
116 # TODO: Change default to create fat binary
117 my $fatbinary = "0";
118 $fatbinary = 1 if ($fatbin);
119 my $makecode = <<"END_MAKEFILE_PROLOGUE";
120 include ./make.inc
121 \$(eval \$(call FIND_DSP_PKG,FC_DIR,framework_components*,packages))
122 \$(eval \$(call FIND_DSP_PKG,XDAIS_DIR,xdais*,packages))
123 \$(eval \$(call FIND_DSP_PKG,${uc_package}_DIR,${package}*,packages))
125 $uc_package = \$(${uc_package}_DIR)/packages/ti/$package
127 # comment below to disable parallel make
128 MAKEFLAGS += -j4
130 # Defines
131 # set below to 1 for creating a fat binary, and 0 to create a .out file
132 # For large libraries you may want to create a .out as creating a fat binary
133 # can run out of space in /tmp
134 ${NAMESPACE}_FAT_BINARY = $fatbinary
137 # INCLUDE Directory
138 INCDIR := \$(TI_OCL_CGT_INSTALL)/include;\$(LINUX_DEVKIT_ROOT)/usr/include;\$(EDMA3LLD_DIR)/packages;\$(FC_DIR)/packages;\$(XDAIS_DIR)/packages;\$(${uc_package})/inc;\$(${uc_package})/example;\$(${uc_package})/example/Setup_ECPY/alg;\$(${uc_package})//example/Setup_ECPY/alg/ires
140 INCS = -I. -I\$(strip \$(subst ;, -I,\$(subst \$(space),\$(space),\$(INCDIR))))
141 OBJS = ${namespace}_initfini.o
142 HOST_OBJS =
144 CPP_FLAGS = -D_LITTLE_ENDIAN -D__ARMv7 -DDEVICE_K2H
145 CL6X_FLAGS = \$(INCS)
146 CLOCL_FLAGS =
147 OBJCOPY = objcopy
148 OBJCOPY_ARGS=
149 DSP_ONLY_LIB= ${libname}_dsponly.a
150 ARM_PLUS_DSP_LIB= ${libname}_armplusdsp.a
152 OCL_BIN = ${namespace}_kernel.out
154 ifeq (\$(${NAMESPACE}_FAT_BINARY), 1)
155 CPP_FLAGS += -D${NAMESPACE}_FAT_BINARY
156 OCL_BIN = ${namespace}_kernel.dsp_h
157 endif
159 # Set the variables so the compiler does not fill up /tmp
160 TMPDIR = \$(PWD)
161 TMP = \$(PWD)
162 export TMPDIR TMP
164 END_MAKEFILE_PROLOGUE
166 $makecode .= "
168 # setting to a default of empty to override environment settings
169 ${NAMESPACE}_OFFLOAD =
171 ifneq (\$(${NAMESPACE}_OFFLOAD),)
172 CPP_FLAGS += -D${NAMESPACE}_OFFLOAD=\\\"\$(${NAMESPACE}_OFFLOAD)\\\"
173 endif
174 " if ($header =~ /blas/);
175 return $makecode;
176 }
178 # generate header specific init code
179 sub generate_header_specific_init_code
180 {
181 my $code = "";
182 my $NAMESPACE = uc($namespace);
183 if ($header =~ /blas/) {
184 $code = "
185 /* 3-digit value: 012
186 * Left-most digit => L1 (0)
187 * Middle-digit => L2 (1)
188 * Right-most => L3 (2)
189 */
190 ${NAMESPACE}_L1_OFFLOAD = ${namespace}_offload / 100;
191 int tmp_offload = ${namespace}_offload % 100;
192 ${NAMESPACE}_L2_OFFLOAD = tmp_offload / 10;
193 ${NAMESPACE}_L3_OFFLOAD = tmp_offload % 10;
194 ${NAMESPACE}_DEBUG_PRINT(\"BLAS Offload values: L1=%d, L2=%d, L3=%d\\n\",
195 ${NAMESPACE}_L1_OFFLOAD, ${NAMESPACE}_L2_OFFLOAD, ${NAMESPACE}_L3_OFFLOAD);
196 if ((${NAMESPACE}_L1_OFFLOAD == ${NAMESPACE}_OFFLOAD_SIZE)) {
197 ${NAMESPACE}_ERROR_EXIT(\"Size-based offload NOT supported for BLAS Level 1 yet.\\n\");
198 }
199 if ((${NAMESPACE}_L2_OFFLOAD == ${NAMESPACE}_OFFLOAD_SIZE)) {
200 ${NAMESPACE}_ERROR_EXIT(\"Size-based offload NOT supported for BLAS Level 2 yet.\\n\");
201 }
202 ";
203 }
204 return $code;
205 }
207 sub generate_header_specific_global_vars
208 {
209 my $code = "";
210 my $NAMESPACE = uc($namespace);
211 if ($header =~ /blas/) {
212 for (my $lvl=1; $lvl<=3; $lvl++) {
213 $code .= "int ${NAMESPACE}_L${lvl}_OFFLOAD = ${NAMESPACE}_OFFLOAD_NONE;\n";
214 }
215 }
216 return $code;
217 }
219 # Generates the ARM init/fini code. Do only once, not on a per-function basis.
220 sub generate_arm_init()
221 {
222 my $NAMESPACE = uc($namespace);
223 my $var_code = generate_header_specific_global_vars();
224 my $hdr_init = generate_header_specific_init_code();
225 my $armcode = <<"END_ARM_INIT";
226 $source_code_header
227 #include "${namespace}.h"
229 #ifdef ${NAMESPACE}_FAT_BINARY
230 #include "${namespace}_kernel.dsp_h"
231 #endif
233 /* Global variables */
234 #ifdef __cplusplus
235 Context ${namespace}_ocl_context;
236 std::vector<Device> ${namespace}_ocl_devices;
237 CommandQueue ${namespace}_ocl_Q;
238 Program::Binaries ${namespace}_ocl_binary;
239 Program ${namespace}_ocl_program;
240 Kernel* ${namespace}_ocl_kernels[${NAMESPACE}_NUM_KERNELS];
241 #else
242 cl_context ${namespace}_ocl_context;
243 cl_command_queue ${namespace}_ocl_Q;
244 cl_program ${namespace}_ocl_program;
245 cl_kernel ${namespace}_ocl_kernels[${NAMESPACE}_NUM_KERNELS];
246 #endif
247 int ${namespace}_init_done = 0; /* flag to check if init is complete */
248 int ${namespace}_disable_debug = 0; /* runtime toggle to disable debug */
249 int ${namespace}_offload = ${NAMESPACE}_OFFLOAD_SIZE;
250 int ${namespace}_kernel_valid[${NAMESPACE}_NUM_KERNELS];
251 $var_code
253 void ${namespace}_error(const char* msg, int code)
254 {
255 fprintf(stderr, "ERROR: (%s,%d)\\n", msg, code);
256 }
258 /* This function is invoked exactly once on startup */
259 /* Its purpose is to parse the environment variables and do OpenCL init */
260 void ${namespace}_init(void)
261 {
262 #pragma omp critical
263 {
264 /* Add code for interception */
265 if (!${namespace}_init_done)
266 {
267 #ifdef ${NAMESPACE}_DEBUG
268 char *no_debug_env = getenv("${NAMESPACE}_NO_DEBUG");
269 if (no_debug_env) {
270 if (atoi(no_debug_env) > 0) {
271 ${namespace}_disable_debug = 1;
272 }
273 }
274 #endif
276 ${NAMESPACE}_DEBUG_PRINT("${namespace}_init: Initializing OpenCL on first use..\\n");
277 ${NAMESPACE}_PROFILE_START();
279 /* check environment variables */
280 const char *offload_env = getenv("${NAMESPACE}_OFFLOAD");
281 if (!offload_env) {
282 ${NAMESPACE}_DEBUG_PRINT("Using build time default for offload: ${NAMESPACE}_OFFLOAD=%s\\n", ${NAMESPACE}_OFFLOAD);
283 offload_env = ${NAMESPACE}_OFFLOAD;
284 }
285 else {
286 ${NAMESPACE}_DEBUG_PRINT("Using runtime override for offloads: ${NAMESPACE}_OFFLOAD=%s\\n", offload_env);
287 }
288 if (offload_env) {
289 ${namespace}_offload = atoi(offload_env);
290 if (${namespace}_offload == ${NAMESPACE}_OFFLOAD_NONE) {
291 ${NAMESPACE}_DEBUG_PRINT("Disabling all offloads\\n");
292 }
293 }
294 $hdr_init
297 /*------------------------------------------------------------------------
298 * Read the offline compiled kernel module
299 *-----------------------------------------------------------------------*/
300 const unsigned char* bin;
301 #ifdef ${NAMESPACE}_FAT_BINARY
302 bin = (unsigned char *)${namespace}_kernel_dsp_bin;
303 const size_t bin_length = ${namespace}_kernel_dsp_bin_len;
304 #else
305 const char binary[] = "./${namespace}_kernel.out";
306 unsigned int bin_length;
307 #ifdef __cplusplus
308 bin_length = ocl_read_binary(binary, (char*&)bin);
309 #else
310 FILE *fp = fopen(binary, "r");
311 if (!fp) {
312 ${NAMESPACE}_ERROR_EXIT("Could not open OpenCL pre-compiled binary %s for reading\\n", binary);
313 }
314 struct stat fileinfo;
315 stat(binary, &fileinfo);
316 bin_length = fileinfo.st_size;
317 bin = (char *)malloc(bin_length);
318 if (!bin) {
319 ${NAMESPACE}_ERROR_EXIT("Could not malloc of size %d for reading OpenCL binary\\n", bin_length);
320 }
321 if (fread((char *)bin, bin_length, 1, fp) != 1) {
322 ${NAMESPACE}_ERROR_EXIT("Could not read %d bytes of OpenCL binary\\n", bin_length);
323 }
324 fclose(fp);
325 #endif /* cplusplus */
326 #endif /* FAT_BINARY */
328 /* OpenCL init */
329 #ifdef __cplusplus
330 ${namespace}_ocl_context = Context(CL_DEVICE_TYPE_ACCELERATOR);
331 ${namespace}_ocl_devices = ${namespace}_ocl_context.getInfo<CL_CONTEXT_DEVICES>();
332 ${namespace}_ocl_binary = Program::Binaries(1, std::make_pair(bin, bin_length));
333 ${namespace}_ocl_program = Program(${namespace}_ocl_context, ${namespace}_ocl_devices, ${namespace}_ocl_binary);
334 ${namespace}_ocl_program.build(${namespace}_ocl_devices);
335 ${namespace}_ocl_Q = CommandQueue(${namespace}_ocl_context, ${namespace}_ocl_devices[0], CL_QUEUE_PROFILING_ENABLE);
336 #else
337 cl_int err;
338 cl_device_id device;
339 /* Create an in-order command queue by default*/
340 int queue_flags = 0;
341 #ifdef ${NAMESPACE}_PROFILE
342 queue_flags |= CL_QUEUE_PROFILING_ENABLE;
343 #endif
345 ${namespace}_ocl_context = clCreateContextFromType(0,CL_DEVICE_TYPE_ACCELERATOR,0,0,&err);
346 ${NAMESPACE}_OCL_CHKERROR("clCreateContextFromType",err);
347 err = clGetDeviceIDs(0,CL_DEVICE_TYPE_ACCELERATOR,1,&device,0);
348 ${NAMESPACE}_OCL_CHKERROR("clGetDeviceIDs",err);
349 ${namespace}_ocl_Q = clCreateCommandQueue(${namespace}_ocl_context, device, queue_flags, &err);
350 ${NAMESPACE}_OCL_CHKERROR("clCreateCommandQueue",err);
351 ${namespace}_ocl_program = clCreateProgramWithBinary(${namespace}_ocl_context, 1, &device, &bin_length, &bin, NULL, &err);
352 ${NAMESPACE}_OCL_CHKERROR("clCreateProgramWithBinary",err);
353 const char *compile_options = "";
354 err = clBuildProgram(${namespace}_ocl_program, 1, &device, compile_options, 0, 0);
355 ${NAMESPACE}_OCL_CHKERROR("clBuildProgram",err);
357 #endif
359 #ifndef ${NAMESPACE}_FAT_BINARY
360 #ifdef __cplusplus
361 delete [] bin;
362 #else
363 free((char*)bin);
364 #endif
365 #endif /* FAT_BINARY */
367 ${NAMESPACE}_PROFILE_REPORT(" Initialization took %8.2f us\\n", (float) clock_diff);
368 ${namespace}_init_done = 1;
369 ${NAMESPACE}_DEBUG_PRINT("${namespace}_init: Finished OpenCL initialization\\n");
370 } //end of !ti_cblas_init_done
371 } // End of critical section
373 return;
374 }
376 /* Returns a handle to the kernel for the specified
377 * function with index 'idx'. Initializes the handle if it's
378 * not been used before, otherwise returns earlier handle
379 */
380 #ifdef __cplusplus
381 Kernel*
382 #else
383 cl_kernel
384 #endif
385 ${namespace}_get_kernel(int idx, const char *fname)
386 {
387 if (!${namespace}_kernel_valid[idx]) {
388 #ifdef __cplusplus
389 ${namespace}_ocl_kernels[idx] = new Kernel(${namespace}_ocl_program, fname);
390 #else
391 cl_int err;
392 ${namespace}_ocl_kernels[idx] = clCreateKernel(${namespace}_ocl_program,fname,&err);
393 ${NAMESPACE}_OCL_CHKERROR("clCreateKernel",err);
394 #endif
395 ${namespace}_kernel_valid[idx] = 1;
396 }
397 return ${namespace}_ocl_kernels[idx];
398 }
400 END_ARM_INIT
401 }
404 # checks if a function is defined in a library
405 sub is_func_in_lib
406 {
407 my $func = shift;
408 my $lib = shift;
409 return (system("nm $lib | grep -w \"$func\" | grep -w T > /dev/null")==0);
410 }
412 # For each function prototype seen in the header, this function
413 # generates the correpsonding Makefile code. This needs to be done
414 # on a per-function basis.
415 # Arg: Function prototype string
416 sub generate_makefile_from_proto
417 {
418 my $string = shift;
419 $string =~ s/;//;
420 # func decl
422 # func return type
423 my @tmp = split /[\(\)]/,$string;
425 # func name
426 my $trampname = $tmp[0]; chomp $trampname;
427 $trampname =~ s/.*\ \**(.*)$/$1/;
428 my $makecode = "";
430 # unless we have $f2cwraplib set, we can simply assume the function is defined
431 # in the supplied arm library. If we have f2clib set, then we should check if
432 # the arm library actually has the function defined, or should we use the f2c
433 # wrapper function name
434 my $f2csym = $trampname; $f2csym =~ s/^(.*)_$/f2c_$1/ ;
435 if ($::f2cwraplib && is_func_in_lib("$f2csym", "$::f2cwraplib")) {
436 print "using $f2csym wrapper for $trampname\n";
437 $makecode .= "OBJCOPY_ARGS += --redefine-sym=$f2csym=__real_${trampname}\n";
438 }
439 else {
440 $makecode .= "OBJCOPY_ARGS += --redefine-sym=$trampname=__real_${trampname}\n";
441 }
442 $makecode .= "OBJS += ${namespace}_$offloaded[-1].o\n";
443 $makecode .= "HOST_OBJS += ${namespace}_$offloaded[-1].o";
444 return $makecode;
445 }
447 # returns the ARM condition code for dynamic offload
448 # based on function name
449 # Arg 1: function name
450 # Arg 2: Args array
451 sub get_func_based_arm_cond
452 {
453 $fname = shift;
454 $argsref = shift;
456 my $bufsize = "";
457 my $type = 0;
459 if ($namespace =~ /clapack$/) {
460 # saxpy, daxpy, caxpy, zaxpy
461 if ($fname =~ /^[sdcz]axpy_$/) {
462 $bufsize = "*$$argsref[0]*abs(*$$argsref[5])";
463 }
464 # scopy, dcopy, ccopy, zcopy
465 if ($fname =~ /^[sdcz]copy_$/) {
466 $bufsize = "*$$argsref[0]*abs(*$$argsref[4])";
467 }
468 # [cz]dot[cu]
469 if ($fname =~ /^[sdcz]dot[cu]_$/) {
470 $bufsize = "*$$argsref[1]*abs(*$$argsref[5])";
471 }
472 # sgbmv, dgbmv, cgbmv, zgbmv
473 if ($fname =~ /^[sdcz]gbmv_$/) {
474 $bufsize = "*$$argsref[1]*abs(*$$argsref[2])"; # A: M x N
475 }
476 # sgemm, dgemm, cgemm, zgemm
477 if ($fname =~ /^[sdcz]gemm_$/) {
478 $bufsize = "*$$argsref[2]*(*$$argsref[3])";# C: M x N
479 }
480 # sgemv, dgemv, cgemv, zgemv
481 if ($fname =~ /^[sdcz]gemv_$/) {
482 $bufsize = "*$$argsref[1]*abs(*$$argsref[2])"; # A : M x N
483 }
484 # cgeru, zgeru, cgerc, zgerc
485 if ($fname =~ /^[cz]ger[uc]_$/) {
486 $bufsize = "*$$argsref[0]*abs(*$$argsref[1])"; # A : M x N
487 }
488 # chbmv, zhbmv
489 if ($fname =~ /^[cz]hbmv_$/) {
490 $bufsize = "*$$argsref[1]*abs(*$$argsref[1])"; # A : N x N
491 }
492 # chemm, zhemm
493 if ($fname =~ /^[cz]hemm_$/) {
494 $bufsize = "*$$argsref[2]*(*$$argsref[3])";# C: MxN
495 }
496 # chemv, zhemv
497 if ($fname =~ /^[cz]hemv_$/) {
498 $bufsize = "*$$argsref[1]*abs(*$$argsref[1])"; # A : N x N
499 }
500 # cher, zher, chpr, zhpr
501 if ($fname =~ /^[cz]h[ep]r_$/) {
502 $bufsize = "*$$argsref[1]*abs(*$$argsref[1])"; # A : N x N
503 }
504 # cher2, zher2, chpr2, zhpr2
505 if ($fname =~ /^[cz]h[ep]r2_$/) {
506 $bufsize = "(*$$argsref[1])*(*$$argsref[1])"; # A : N x N
507 }
508 # cher2k, zher2k
509 if ($fname =~ /^[cz]her2k_$/) {
510 $bufsize = "(*$$argsref[2])*(*$$argsref[2])"; # C: NxN
511 }
512 # cherk, zherk
513 if ($fname =~ /^[cz]herk_$/) {
514 $bufsize = "(*$$argsref[2])*(*$$argsref[3])";# C: NxN
515 }
516 # chpmv, zhpmv
517 if ($fname =~ /^[cz]hpmv_$/) {
518 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # Ap : N x N
519 }
520 if ($fname =~ /^[sdcz]gesvd_$/) {
521 $bufsize = "(*$$argsref[2])*(*$$argsref[3])" ; # A : M x N
522 }
523 if ($fname =~ /^[sdcz]gesv_$/) {
524 $bufsize = "(*$$argsref[0])*(*$$argsref[0])" ; # A : N x N
525 }
526 if ($fname =~ /^[sdcz]gesv_$/) {
527 $bufsize = "(*$$argsref[0])*(*$$argsref[0])" ; # A : N x N
528 }
529 # cscal, dscal, zscal, sscal
530 if ($fname =~ /^[sdcz]scal_$/) {
531 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[-1]))" ;# cx : 1+(N-1)*incx
532 }
533 # csscal, zdscal
534 if ($fname =~ /^[cz][sd]scal_$/) {
535 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[-1]))" ;# cx : 1+(N-1)*incx
536 }
537 # srot, crot, drot, zrot
538 if ($fname =~ /^[sdcz]rot_$/) {
539 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))" ;# cy : 1+(N-1)*incy
540 }
541 # srotm, drotm
542 if ($fname =~ /^[sd]rotm_$/) {
543 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))" ;# cy : 1+(N-1)*incy
544 }
545 # csrot, zdrot
546 if ($fname =~ /^[cz][sd]rot_$/) {
547 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))" ;# cy : 1+(N-1)*incy
548 }
549 # cswap, sswap, dswap, zswap
550 if ($fname =~ /^[sdcz]swap_$/) {
551 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))" ;# cy : 1+(N-1)*incy
552 }
553 # sdot, dsdot, ddot
554 if ($fname =~ /^d?[sd]dot_$/) {
555 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))" ;# cy : 1+(N-1)*incy
556 }
557 # sasum, dasum, scasum, dzasum
558 if ($fname =~ /^sasum_|dasum_|scasum_|dzasum_$/) {
559 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))" ;# sx : 1+(N-1)*incx
560 }
561 # snrm2, dnrm2, dznrm2, scnrm2
562 if ($fname =~ /^snrm2_|dnrm2_|dznrm2_|scnrm2_$/) {
563 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))" ;# sx : 1+(N-1)*incx
564 }
565 # sspr, dspr, cspr, zspr
566 if ($fname =~ /^[sdcz]spr_$/) {
567 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # Ap : N x N
568 }
569 # sspr2, dspr2
570 if ($fname =~ /^[sd]spr2_$/) {
571 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # Ap : N x N
572 }
573 # ssymv, dsymv, csymv, zsymv
574 if ($fname =~ /^[sdcz]symv_$/) {
575 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # A : N x N
576 }
577 # ssymm, dsymm, csymm, zsymm
578 if ($fname =~ /^[sdcz]symm_$/) {
579 $bufsize = "*$$argsref[2]*(*$$argsref[3])" ;# C: MxN
580 }
581 # ssyr, dsyr, csyr, zsyr
582 if ($fname =~ /^[sdcz]syr_$/) {
583 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # A : N x N
584 }
585 # ssyr2, dsyr2, csyr2, zsyr2
586 if ($fname =~ /^[sd]syr2_$/) {
587 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # A : N x N
588 }
589 # ssyrk, csyrk, dsyrk, zsyrk
590 if ($fname =~ /^[sdcz]syrk_$/) {
591 $bufsize = "(*$$argsref[2])*(*$$argsref[2])" ; # C : N x N
592 }
593 # stbmv, dtbmv, ctbmv, ztbmv, stbsv, dtbsv, ztbsv, ctbsv
594 if ($fname =~ /^[sdcz]tb[ms]v_$/) {
595 $bufsize = "(*$$argsref[3])*(*$$argsref[3])" ; # A : N x N
596 }
597 # [sdcz]tpmv, [scdz]tpsv
598 if ($fname =~ /^[sdcz]tp[ms]v_$/) {
599 $bufsize = "(*$$argsref[3])*(*$$argsref[3])" ; # A : N x N
600 }
601 # [sdcz]trmv, [sdcz]trsv
602 if ($fname =~ /^[sdcz]tr[ms]v_$/) {
603 $bufsize = "(*$$argsref[3])*(*$$argsref[3])" ; # A : N x N
604 }
605 # [scdz]trmm
606 if ($fname =~ /^[sdcz]trmm_$/) {
607 $bufsize = "*$$argsref[4]*(*$$argsref[5])" ;# B: MxN
608 }
609 # [scdz]trsm
610 if ($fname =~ /^[sdcz]trsm_$/) {
611 $bufsize = "*$$argsref[4]*(*$$argsref[5])" ;# B: MxN
612 }
613 # [sd]ger
614 if ($fname =~ /^[sd]ger_$/) {
615 $bufsize = "*$$argsref[0]*(*$$argsref[1])" ;# A: MxN
616 }
617 # [sd]sbmv
618 if ($fname =~ /^[sd]sbmv_$/) {
619 $bufsize = "(*$$argsref[1])*(*$$argsref[1])"; # A : N x N
620 }
621 # [sdcz]spmv
622 if ($fname =~ /^[sd]spmv_$/) {
623 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # A : N x N
624 }
625 # [s,d,c,z]syr2k
626 if ($fname =~ /^[sdcz]syr2k_$/) {
627 $bufsize = "(*$$argsref[2])*(*$$argsref[2])" ; # C : N x N
628 }
629 if ($fname =~ /^i[sdcz]amax_$/) {
630 $bufsize = "(1+ (*$$argsref[0]-1)*abs(*$$argsref[-1]))" ;# x : 1+(N-1)*incx
631 }
632 if ($fname =~ /^xerbla_array__$/) {
633 $bufsize = "(*$$argsref[1])" ;# srname_array length is srnam_len__
634 }
635 if ($fname =~ /^[sdcz]bdsqr_$/) {
636 $bufsize = "(*$$argsref[1])*(*$$argsref[4])" ; # C : n x ncc
637 }
638 if ($fname =~ /^[sdcz]gbbrd_$/) {
639 $bufsize = "(*$$argsref[1])*(*$$argsref[2])" ; # a : m x n
640 }
641 if ($fname =~ /^[sdcz]gbcon_$/) {
642 $bufsize = "(*$$argsref[1]>*$$argsref[5]?*$$argsref[1]:*$$argsref[5])*(*$$argsref[1])" ; # ab : max(ldab,n) x n
643 }
644 # scdz/gbequ, scdz/gbequb
645 if ($fname =~ /^[sdcz]gbequb?_$/) {
646 $bufsize = "(*$$argsref[0]>*$$argsref[5]?*$$argsref[0]:*$$argsref[5])*(*$$argsref[1])" ; # ab : max(ldab,m) x n
647 }
648 # scdz/gbrfs
649 if ($fname =~ /^[sdcz]gbrfs_$/) {
650 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # a : n x n
651 }
652 # scdz/pbrfs
653 if ($fname =~ /^[sdcz]pbrfs_$/) {
654 $bufsize = "(*$$argsref[1])*(*$$argsref[1])" ; # a : n x n
655 }
656 # sdcz/gbsv
657 if ($fname =~ /^[sdcz]gbsv_$/) {
658 $bufsize = "(*$$argsref[0]>*$$argsref[5]?*$$argsref[0]:*$$argsref[5])*(*$$argsref[0])" ; # ab : max(ldab,n) x n
659 }
660 # sdcz/gbtf2
661 if ($fname =~ /^[sdcz]gbtf2_$/) {
662 $bufsize = "(*$$argsref[0]>*$$argsref[5]?*$$argsref[0]:*$$argsref[5])*(*$$argsref[1])" ; # ab : max(ldab,m) x n
663 }
664 # sdcz/gbtrf
665 if ($fname =~ /^[sdcz]gbtrf_$/) {
666 $bufsize = "(*$$argsref[0]>*$$argsref[5]?*$$argsref[0]:*$$argsref[5])*(*$$argsref[1])" ; # ab : max(ldab,m) x n
667 }
668 # sdcz/gbtrs
669 if ($fname =~ /^[sdcz]gbtrs_$/) {
670 $bufsize = "(*$$argsref[1]>*$$argsref[6]?*$$argsref[1]:*$$argsref[6])*(*$$argsref[1])" ; # ab : max(ldab,n) x n
671 }
672 # sdcz/gebak
673 if ($fname =~ /^[sdcz]gebak_$/) {
674 $bufsize = "(*$$argsref[2]>*$$argsref[8]?*$$argsref[2]:*$$argsref[8])*(*$$argsref[6])" ; # v : max(ldv,n) x m
675 }
676 # sdcz/gebal
677 if ($fname =~ /^[sdcz]gebal_$/) {
678 $bufsize = "(*$$argsref[1]>*$$argsref[3]?*$$argsref[1]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,n) x n
679 $bufsize = "((*$$argsref[1]))" if ($argnum == 6); # scale: array of dimension n
680 }
681 # sdcz/gebd2
682 if ($fname =~ /^[sdcz]gebd2_$/) {
683 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
684 }
685 # sdcz/gebrd
686 if ($fname =~ /^[sdcz]gebrd_$/) {
687 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
688 }
689 # sdcz/gecon
690 if ($fname =~ /^[sdcz]gecon_$/) {
691 $bufsize = "(*$$argsref[1]>*$$argsref[3]?*$$argsref[1]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,n) x n
692 }
693 # sdcz/geequ sdcz/geequb
694 if ($fname =~ /^[sdcz]geequb?_$/) {
695 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
696 }
697 # sdcz/gees
698 if ($fname =~ /^[sdcz]gees_$/) {
699 $bufsize = "(*$$argsref[3]>*$$argsref[5]?*$$argsref[3]:*$$argsref[5])*(*$$argsref[3])" ; # a : max(lda,n) x n
700 }
701 # sdcz/geesx
702 if ($fname =~ /^[sdcz]geesx_$/) {
703 $bufsize = "(*$$argsref[4]>*$$argsref[6]?*$$argsref[4]:*$$argsref[6])*(*$$argsref[4])" ; # a : max(lda,n) x n
704 }
705 # sdcz/geev
706 if ($fname =~ /^[sdcz]geev_$/) {
707 $bufsize = "(*$$argsref[2]>*$$argsref[4]?*$$argsref[2]:*$$argsref[4])*(*$$argsref[2])" ; # a : max(lda,n) x n
708 }
709 # sdcz/geevx
710 if ($fname =~ /^[sdcz]geevx_$/) {
711 $bufsize = "(*$$argsref[4]>*$$argsref[6]?*$$argsref[4]:*$$argsref[6])*(*$$argsref[4])" ; # a : max(lda,n) x n
712 }
713 # sdcz/gegs
714 if ($fname =~ /^[sdcz]gegs_$/) {
715 $bufsize = "(*$$argsref[2]>*$$argsref[6]?*$$argsref[2]:*$$argsref[6])*(*$$argsref[2])" ; # b : max(ldb,n) x n
716 }
717 # sdcz/gegv
718 if ($fname =~ /^[sdcz]gegv_$/) {
719 $bufsize = "(*$$argsref[2]>*$$argsref[6]?*$$argsref[2]:*$$argsref[6])*(*$$argsref[2])" ; # b : max(ldb,n) x n
720 }
721 # sdcz/gehd2
722 if ($fname =~ /^[sdcz]gehd2_$/) {
723 $bufsize = "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[0])" ; # a : max(lda,n) x n
724 }
725 # sdcz/gehrd
726 if ($fname =~ /^[sdcz]gehrd_$/) {
727 $bufsize = "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[0])" ; # a : max(lda,n) x n
728 }
729 # sdcz/gelq2
730 if ($fname =~ /^[sdcz]gelq2_$/) {
731 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
732 }
733 # sdcz/gelqf
734 if ($fname =~ /^[sdcz]gelqf_$/) {
735 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
736 }
737 # sdcz/gels
738 if ($fname =~ /^[sdcz]gels_$/) {
739 $bufsize = "(*$$argsref[1]>*$$argsref[5]?*$$argsref[1]:*$$argsref[5])*(*$$argsref[2])" ; # a : max(lda,m) x n
740 }
741 # sdcz/gelsd
742 if ($fname =~ /^[sdcz]gelsd_$/) {
743 $bufsize = "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[1])" ; # a : max(lda,m) x n
744 }
745 # sdcz/gelss
746 if ($fname =~ /^[sdcz]gelss_$/) {
747 $bufsize = "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[1])" ; # a : max(lda,m) x n
748 }
749 # sdcz/gelsx
750 if ($fname =~ /^[sdcz]gelsx_$/) {
751 $bufsize = "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[1])" ; # a : max(lda,m) x n
752 }
753 # sdcz/gelsy
754 if ($fname =~ /^[sdcz]gelsy_$/) {
755 $bufsize = "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[1])" ; # a : max(lda,m) x n
756 }
757 # sdcz/geql2
758 if ($fname =~ /^[sdcz]geql2_$/) {
759 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
760 }
761 # sdcz/geqlf
762 if ($fname =~ /^[sdcz]geqlf_$/) {
763 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
764 }
765 # sdcz/geqp3
766 if ($fname =~ /^[sdcz]geqp3_$/) {
767 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
768 }
769 # sdcz/geqpf
770 if ($fname =~ /^[sdcz]geqpf_$/) {
771 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
772 }
773 # sdcz/geqr2
774 if ($fname =~ /^[sdcz]geqr2_$/) {
775 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
776 }
777 # sdcz/geqrf
778 if ($fname =~ /^[sdcz]geqrf_$/) {
779 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
780 }
781 # sdcz/gerfs
782 if ($fname =~ /^[sdcz]gerfs_$/) {
783 $bufsize = "(*$$argsref[1]>*$$argsref[4]?*$$argsref[1]:*$$argsref[4])*(*$$argsref[1])" ; # a : max(lda,n) x n
784 }
785 # sdcz/gerfsx
786 if ($fname =~ /^[sdcz]gerfsx_$/) {
787 $bufsize = "(*$$argsref[2]>*$$argsref[5]?*$$argsref[2]:*$$argsref[5])*(*$$argsref[2])" ; # a : max(lda,n) x n
788 }
789 # sdcz/gerq2
790 if ($fname =~ /^[sdcz]gerq2_$/) {
791 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
792 }
793 # sdcz/gerqf
794 if ($fname =~ /^[sdcz]gerqf_$/) {
795 $bufsize = "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])" ; # a : max(lda,m) x n
796 }
797 # sdcz/gesc2
798 if ($fname =~ /^[sdcz]gesc2_$/) {
799 $bufsize = "(*$$argsref[0]>*$$argsref[2]?*$$argsref[0]:*$$argsref[2])*(*$$argsref[0])" ; # a : max(lda,n) x n
800 }
801 }
803 if ($namespace =~ /cblas$/) {
804 ### Level 3 BLAS ###
805 # sgemm, dgemm, cgemm, zgemm
806 if ($fname =~ /^cblas_[sdcz]gemm$/) {
807 #$bufsize = "$$argsref[3]*$$argsref[4]";
808 $bufsize = "BLAS_ORD($$argsref[0],$$argsref[3],$$argsref[4])*$$argsref[13]";
809 $type = 3;
810 }
811 # ssymm, dsymm, csymm, zsymm
812 if ($fname =~ /^cblas_[sdcz]symm$/) {
813 $bufsize = "$$argsref[3]*$$argsref[4]";
814 $type = 3;
815 }
816 # ssyrk, dsyrk, csyrk, zsyrk
817 if ($fname =~ /^cblas_[sdcz]syrk$/) {
818 $bufsize = "$$argsref[3]*$$argsref[4]";
819 $type = 3;
820 }
821 # ssyr2k, dsyr2k, csyr2k, zsyr2k
822 if ($fname =~ /^cblas_[sdcz]syr2k$/) {
823 $bufsize = "$$argsref[3]*$$argsref[3]";
824 $type = 3;
825 }
826 # strmm, dtrmm, ctrmm, ztrmm, strsm, dtrsm, ctrsm, ztrsm
827 if ($fname =~ /^cblas_[sdcz]tr[ms]m$/) {
828 $bufsize = "$$argsref[5]*$$argsref[6]";
829 $type = 3;
830 }
831 # chemm, zhemm
832 if ($fname =~ /^cblas_[cz]hemm$/) {
833 #$bufsize = "$$argsref[3]*$$argsref[4]";
834 $bufsize = "BLAS_ORD_S($$argsref[0],$$argsref[1],$$argsref[4],$$argsref[3])*$$argsref[12]"; # C: ldc x (n or m)
835 $type = 3;
836 }
837 # cherk, zherk
838 if ($fname =~ /^cblas_[cz]herk$/) {
839 $bufsize = "$$argsref[3]*$$argsref[3]";
840 $type = 3;
841 }
842 # cher2k, zher2k
843 if ($fname =~ /^cblas_[cz]her2k$/) {
844 $bufsize = "$$argsref[3]*$$argsref[3]";
845 $type = 3;
846 }
847 }
849 if ($bufsize) {
850 $bufsize = "($bufsize)";
851 my $min = "";
852 my $max = "";
853 my $NAMESPACE = uc($namespace);
854 $min = "${NAMESPACE}_" . uc($fname) . "_OFFLOAD_MIN";
855 $max = "${NAMESPACE}_" . uc($fname) . "_OFFLOAD_MAX";
856 return "($bufsize <= $min) || ($bufsize >= $max)" if ($min && $max);
857 return "($bufsize <= $min)" if ($min);
858 return "($bufsize >= $max)" if ($max);
859 }
860 return ""; # if control reaches here then no modifier
861 }
862 #end of get_func_based_arm_cond
864 sub get_offload_decision
865 {
866 $fname = shift;
867 $argsref = shift;
868 $cond_offload = "";
870 if ($namespace =~ /cblas$/) {
871 ### Level 3 BLAS ###
872 $func_name = substr($fname, 6);
873 # sgemm, dgemm, cgemm, zgemm
874 if ($fname =~ /^cblas_[sdcz]gemm$/) {
875 #print "fname is " . $fname. "\n";
876 $cond_offload = "!$func_name\_offload_dsp($$argsref[0],$$argsref[3],$$argsref[4],$$argsref[5])";
877 #print "cond_offload is " . $cond_offload. "\n";
878 }
879 # ssymm, dsymm, csymm, zsymm
880 if ($fname =~ /^cblas_[sdcz]symm$/) {
881 $cond_offload = "!$func_name\_offload_dsp($$argsref[0],$$argsref[1],$$argsref[3],$$argsref[4])";
882 }
883 # ssyrk, dsyrk, csyrk, zsyrk
884 if ($fname =~ /^cblas_[sdcz]syrk$/) {
885 $cond_offload = "!$func_name\_offload_dsp($$argsref[0],$$argsref[3],$$argsref[4])";
886 }
887 # ssyr2k, dsyr2k, csyr2k, zsyr2k
888 if ($fname =~ /^cblas_[sdcz]syr2k$/) {
889 $cond_offload = "!$func_name\_offload_dsp($$argsref[0],$$argsref[3],$$argsref[4])";
890 }
891 # strmm, dtrmm, ctrmm, ztrmm, strsm, dtrsm, ctrsm, ztrsm
892 if ($fname =~ /^cblas_[sdcz]tr[ms]m$/) {
893 $cond_offload = "!$func_name\_offload_dsp($$argsref[0],$$argsref[1],$$argsref[5],$$argsref[6])";
894 }
895 # chemm, zhemm
896 if ($fname =~ /^cblas_[cz]hemm$/) {
897 $cond_offload = "!$func_name\_offload_dsp($$argsref[0],$$argsref[1],$$argsref[3],$$argsref[4])";
898 }
899 # cherk, zherk
900 if ($fname =~ /^cblas_[cz]herk$/) {
901 $cond_offload = "!$func_name\_offload_dsp($$argsref[0],$$argsref[3],$$argsref[4])";
902 }
903 # cher2k, zher2k
904 if ($fname =~ /^cblas_[cz]her2k$/) {
905 $cond_offload = "!$func_name\_offload_dsp($$argsref[0],$$argsref[3],$$argsref[4])";
906 }
907 }
909 if ($cond_offload) {
910 return "$cond_offload";
911 }
912 return ""; # if control reaches here then no modifier
913 }
914 #end of get_offload_decision
916 # put this function in a separate file ideally
917 sub clapack_bufsize_modifier
918 {
919 $fname = shift;
920 $argnum = shift;
921 $argsref = shift;
923 # saxpy, daxpy, caxpy, zaxpy
924 if ($fname =~ /^[sdcz]axpy_$/) {
925 return "*$$argsref[0]*abs(*$$argsref[3])*" if ($argnum == 2);
926 return "*$$argsref[0]*abs(*$$argsref[5])*" if ($argnum == 4);
927 }
928 # scopy, dcopy, ccopy, zcopy
929 if ($fname =~ /^[sdcz]copy_$/) {
930 return "*$$argsref[0]*abs(*$$argsref[2])*" if ($argnum == 1);
931 return "*$$argsref[0]*abs(*$$argsref[4])*" if ($argnum == 3);
932 }
933 # [cz]dot[cu]
934 if ($fname =~ /^[sdcz]dot[cu]_$/) {
935 return "*$$argsref[1]*abs(*$$argsref[3])*" if ($argnum == 2);
936 return "*$$argsref[1]*abs(*$$argsref[5])*" if ($argnum == 4);
937 }
938 # sgbmv, dgbmv, cgbmv, zgbmv
939 if ($fname =~ /^[sdcz]gbmv_$/) {
940 return "*$$argsref[1]*abs(*$$argsref[2])*" if ($argnum == 6); # A: M x N
941 return "(*$$argsref[0] == 'N' ? *$$argsref[1]:*$$argsref[2])*abs(*$$argsref[-4])*" if ($argnum == 8);
942 return "(*$$argsref[0] == 'N' ? *$$argsref[2]:*$$argsref[1])*abs(*$$argsref[-1])*" if ($argnum == 11);
943 }
944 # sgemm, dgemm, cgemm, zgemm
945 if ($fname =~ /^[sdcz]gemm_$/) {
946 return "*$$argsref[2]*(*$$argsref[4])*" if ($argnum == 6); # A: M x K
947 return "*$$argsref[4]*(*$$argsref[3])*" if ($argnum == 8); # B: K x N
948 return "*$$argsref[2]*(*$$argsref[3])*" if ($argnum == 11);# C: M x N
949 }
950 # sgemv, dgemv, cgemv, zgemv
951 if ($fname =~ /^[sdcz]gemv_$/) {
952 return "*$$argsref[1]*abs(*$$argsref[2])*" if ($argnum == 4); # A : M x N
953 return "(1+(*$$argsref[0] == 'N' ? *$$argsref[1]-1:*$$argsref[2]-1)*abs(*$$argsref[-4]))*" if ($argnum == 6);
954 return "(1+(*$$argsref[0] == 'N' ? *$$argsref[2]-1:*$$argsref[1]-1)*abs(*$$argsref[-1]))*" if ($argnum == 9);
955 }
956 # cgeru, zgeru, cgerc, zgerc
957 if ($fname =~ /^[cz]ger[uc]_$/) {
958 return "*$$argsref[0]*abs(*$$argsref[1])*" if ($argnum == 7); # A : M x N
959 return "(1 + (*$$argsref[0] -1)*abs(*$$argsref[4]))*" if ($argnum == 3);# X : 1+ (M-1)*incX
960 return "(1 + (*$$argsref[1] -1)*abs(*$$argsref[6]))*" if ($argnum == 5);# Y : 1+ (N-1)*incY
961 }
962 # chbmv, zhbmv
963 if ($fname =~ /^[cz]hbmv_$/) {
964 return "*$$argsref[1]*abs(*$$argsref[1])*" if ($argnum == 4); # A : N x N
965 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[-4]))*" if ($argnum == 6);# X : 1+ (N-1) x incX
966 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[-1]))*" if ($argnum == 9);# Y : 1+ (N-1) x incX
967 }
968 # chemm, zhemm
969 if ($fname =~ /^[cz]hemm_$/) {
970 return "(((*$$argsref[0] == 'L') || (*$$argsref[0] == 'l')) ? *$$argsref[2]*(*$$argsref[2]):*$$argsref[3]*(*$$argsref[3]))*" if ($argnum == 5); # A: MxM or NxN
971 return "*$$argsref[2]*(*$$argsref[3])*" if ($argnum == 7);# B: MxN
972 return "*$$argsref[2]*(*$$argsref[3])*" if ($argnum == 10);# C: MxN
973 }
974 # chemv, zhemv
975 if ($fname =~ /^[cz]hemv_$/) {
976 return "*$$argsref[1]*abs(*$$argsref[1])*" if ($argnum == 3); # A : N x N
977 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[-4]))*" if ($argnum == 5);# X : 1+(N-1) x incX
978 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[-1]))*" if ($argnum == 8);# Y : 1+(N-1) x incY
979 }
980 # cher, zher, chpr, zhpr
981 if ($fname =~ /^[cz]h[ep]r_$/) {
982 return "*$$argsref[1]*abs(*$$argsref[1])*" if ($argnum == 5); # A : N x N
983 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# X : 1+(N-1) x incX
984 }
985 # cher2, zher2, chpr2, zhpr2
986 if ($fname =~ /^[cz]h[ep]r2_$/) {
987 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 7); # A : N x N
988 return "(*$$argsref[1])*abs(*$$argsref[4])*" if ($argnum == 3);# X : N x incX
989 return "(*$$argsref[1])*abs(*$$argsref[6])*" if ($argnum == 5);# Y : N x incY
990 }
991 # cher2k, zher2k
992 if ($fname =~ /^[cz]her2k_$/) {
993 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 5); # A: NxK or KxN
994 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 7); # B: KxN or NxK
995 return "(*$$argsref[2])*(*$$argsref[2])*" if ($argnum == 10);# C: NxN
996 }
997 # cherk, zherk
998 if ($fname =~ /^[cz]herk_$/) {
999 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 5); # A: NxK or KxN
1000 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 8);# C: NxN
1001 }
1002 # chpmv, zhpmv
1003 if ($fname =~ /^[cz]hpmv_$/) {
1004 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 4); # Ap : N x N
1005 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[5]))*" if ($argnum == 4);# X : 1+(N-1) x incX
1006 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[-1]))*" if ($argnum == 7);# Y : 1+(N-1) x incY
1007 }
1008 if ($fname =~ /^[sdcz]gesvd_$/) {
1009 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 4); # A : M x N
1010 return "(*$$argsref[2])*(*$$argsref[2])*" if ($argnum == 7); # U : M x M
1011 return "(*$$argsref[3])*(*$$argsref[3])*" if ($argnum == 9); # V : N x N
1012 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 6); # S : M x N
1013 return "(*$$argsref[-2] > 1 ? *$$argsref[-2] : 1)*" if ($argnum == 11); # work: max(1, *lwork)
1014 }
1015 if ($fname =~ /^[sdcz]gesv_$/) {
1016 return "(*$$argsref[0])*(*$$argsref[0])*" if ($argnum == 2); # A : N x N
1017 return "(*$$argsref[0])*" if ($argnum == 4); # IPIV : N
1018 return "(*$$argsref[0])*(*$$argsref[1])*" if ($argnum == 5); # B : N x NRHS
1019 }
1020 if ($fname =~ /^[sdcz]gesv_$/) {
1021 return "(*$$argsref[0])*(*$$argsref[0])*" if ($argnum == 2); # A : N x N
1022 return "(*$$argsref[0])*" if ($argnum == 4); # IPIV : N
1023 return "(*$$argsref[0])*(*$$argsref[1])*" if ($argnum == 5); # B : N x NRHS
1024 }
1025 # cscal, dscal, zscal, sscal
1026 if ($fname =~ /^[sdcz]scal_$/) {
1027 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[-1]))*" if ($argnum == 2);# cx : 1+(N-1)*incx
1028 }
1029 # csscal, zdscal
1030 if ($fname =~ /^[cz][sd]scal_$/) {
1031 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[-1]))*" if ($argnum == 2);# cx : 1+(N-1)*incx
1032 }
1033 # srot, crot, drot, zrot
1034 if ($fname =~ /^[sdcz]rot_$/) {
1035 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))*" if ($argnum == 1);# cx : 1+(N-1)*incx
1036 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# cy : 1+(N-1)*incy
1037 }
1038 # srotm, drotm
1039 if ($fname =~ /^[sd]rotm_$/) {
1040 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))*" if ($argnum == 1);# cx : 1+(N-1)*incx
1041 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# cy : 1+(N-1)*incy
1042 }
1043 # csrot, zdrot
1044 if ($fname =~ /^[cz][sd]rot_$/) {
1045 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))*" if ($argnum == 1);# cx : 1+(N-1)*incx
1046 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# cy : 1+(N-1)*incy
1047 }
1048 # cswap, sswap, dswap, zswap
1049 if ($fname =~ /^[sdcz]swap_$/) {
1050 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))*" if ($argnum == 1);# cx : 1+(N-1)*incx
1051 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# cy : 1+(N-1)*incy
1052 }
1053 # sdot, dsdot, ddot
1054 if ($fname =~ /^d?[sd]dot_$/) {
1055 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))*" if ($argnum == 1);# cx : 1+(N-1)*incx
1056 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# cy : 1+(N-1)*incy
1057 }
1058 # sasum, dasum, scasum, dzasum
1059 if ($fname =~ /^sasum_|dasum_|scasum_|dzasum_$/) {
1060 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))*" if ($argnum == 1);# sx : 1+(N-1)*incx
1061 }
1062 # snrm2, dnrm2, dznrm2, scnrm2
1063 if ($fname =~ /^snrm2_|dnrm2_|dznrm2_|scnrm2_$/) {
1064 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[2]))*" if ($argnum == 1);# sx : 1+(N-1)*incx
1065 }
1066 # sspr, dspr, cspr, zspr
1067 if ($fname =~ /^[sdcz]spr_$/) {
1068 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# x : 1+(N-1)*incx
1069 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 5); # Ap : N x N
1070 }
1071 # sspr2, dspr2
1072 if ($fname =~ /^[sd]spr2_$/) {
1073 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# x : 1+(N-1)*incx
1074 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[6]))*" if ($argnum == 5);# y : 1+(N-1)*incy
1075 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 7); # Ap : N x N
1076 }
1077 # ssymv, dsymv, csymv, zsymv
1078 if ($fname =~ /^[sdcz]symv_$/) {
1079 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[6]))*" if ($argnum == 5);# x : 1+(N-1)*incx
1080 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[-1]))*" if ($argnum == 8);# y : 1+(N-1)*incy
1081 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 3); # A : N x N
1082 }
1083 # ssymm, dsymm, csymm, zsymm
1084 if ($fname =~ /^[sdcz]symm_$/) {
1085 return "(((*$$argsref[0] == 'L') || (*$$argsref[0] == 'l')) ? (*$$argsref[2])*(*$$argsref[2]):(*$$argsref[3])*(*$$argsref[3]))*" if ($argnum == 5); # A: MxM or NxN
1086 return "*$$argsref[2]*(*$$argsref[3])*" if ($argnum == 7);# B: MxN
1087 return "*$$argsref[2]*(*$$argsref[3])*" if ($argnum == 10);# C: MxN
1088 }
1089 # ssyr, dsyr, csyr, zsyr
1090 if ($fname =~ /^[sdcz]syr_$/) {
1091 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# x : 1+(N-1)*incx
1092 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 5); # A : N x N
1093 }
1094 # ssyr2, dsyr2, csyr2, zsyr2
1095 if ($fname =~ /^[sd]syr2_$/) {
1096 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# x : 1+(N-1)*incx
1097 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[6]))*" if ($argnum == 5);# y : 1+(N-1)*incy
1098 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 7); # A : N x N
1099 }
1100 # ssyrk, csyrk, dsyrk, zsyrk
1101 if ($fname =~ /^[sdcz]syrk_$/) {
1102 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 5); # A : N x K or K x N
1103 return "(*$$argsref[2])*(*$$argsref[2])*" if ($argnum == 8); # C : N x N
1104 }
1105 # stbmv, dtbmv, ctbmv, ztbmv, stbsv, dtbsv, ztbsv, ctbsv
1106 if ($fname =~ /^[sdcz]tb[ms]v_$/) {
1107 return "(*$$argsref[3])*(*$$argsref[3])*" if ($argnum == 5); # A : N x N
1108 return "(1+ (*$$argsref[3]-1)*abs(*$$argsref[-1]))*" if ($argnum == 7);# x : 1+(N-1)*incx
1109 }
1110 # [sdcz]tpmv, [scdz]tpsv
1111 if ($fname =~ /^[sdcz]tp[ms]v_$/) {
1112 return "(*$$argsref[3])*(*$$argsref[3])*" if ($argnum == 4); # A : N x N
1113 return "(1+ (*$$argsref[3]-1)*abs(*$$argsref[-1]))*" if ($argnum == 5);# x : 1+(N-1)*incx
1114 }
1115 # [sdcz]trmv, [sdcz]trsv
1116 if ($fname =~ /^[sdcz]tr[ms]v_$/) {
1117 return "(*$$argsref[3])*(*$$argsref[3])*" if ($argnum == 4); # A : N x N
1118 return "(1+ (*$$argsref[3]-1)*abs(*$$argsref[-1]))*" if ($argnum == 6);# x : 1+(N-1)*incx
1119 }
1120 # [scdz]trmm
1121 if ($fname =~ /^[sdcz]trmm_$/) {
1122 return "(((*$$argsref[0] == 'L') || (*$$argsref[0] == 'l')) ? (*$$argsref[4])*(*$$argsref[4]):(*$$argsref[5])*(*$$argsref[5]))*" if ($argnum == 7); # A: MxM or NxN
1123 return "*$$argsref[4]*(*$$argsref[5])*" if ($argnum == 9);# B: MxN
1124 }
1125 # [scdz]trsm
1126 if ($fname =~ /^[sdcz]trsm_$/) {
1127 return "*$$argsref[4]*(*$$argsref[4])*" if ($argnum == 7);# A: MxM
1128 return "*$$argsref[4]*(*$$argsref[5])*" if ($argnum == 9);# B: MxN
1129 }
1130 # [sd]ger
1131 if ($fname =~ /^[sd]ger_$/) {
1132 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[4]))*" if ($argnum == 3);# x : 1+(M-1)*incx
1133 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[6]))*" if ($argnum == 5);# x : 1+(N-1)*incy
1134 return "*$$argsref[0]*(*$$argsref[1])*" if ($argnum == 7);# A: MxN
1135 }
1136 # [sd]sbmv
1137 if ($fname =~ /^[sd]sbmv_$/) {
1138 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 4); # A : N x N
1139 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[7]))*" if ($argnum == 6);# x : 1+(N-1)*incx
1140 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[-1]))*" if ($argnum == 9);# y : 1+(N-1)*incy
1141 }
1142 # [sdcz]spmv
1143 if ($fname =~ /^[sd]spmv_$/) {
1144 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 3); # A : N x N
1145 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[5]))*" if ($argnum == 4);# x : 1+(N-1)*incx
1146 return "(1+ (*$$argsref[1]-1)*abs(*$$argsref[-1]))*" if ($argnum == 7);# y : 1+(N-1)*incy
1147 }
1148 # [s,d,c,z]syr2k
1149 if ($fname =~ /^[sdcz]syr2k_$/) {
1150 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 5); # A : N x K or K x N
1151 return "(*$$argsref[2])*(*$$argsref[3])*" if ($argnum == 7); # B : N x K or K x N
1152 return "(*$$argsref[2])*(*$$argsref[2])*" if ($argnum == 10); # C : N x N
1153 }
1154 if ($fname =~ /^i[sdcz]amax_$/) {
1155 return "(1+ (*$$argsref[0]-1)*abs(*$$argsref[-1]))*" if ($argnum == 1);# x : 1+(N-1)*incx
1156 }
1157 if ($fname =~ /^xerbla_array__$/) {
1158 return "(*$$argsref[1])*" if ($argnum == 0);# srname_array length is srnam_len__
1159 }
1160 if ($fname =~ /^[sdcz]bdsqr_$/) {
1161 return "(*$$argsref[1])*(*$$argsref[4])*" if ($argnum == 11); # C : n x ncc
1162 return "(*$$argsref[1])*(*$$argsref[3])*" if ($argnum == 9); # U : nru x n
1163 return "(*$$argsref[1])*(*$$argsref[2])*" if ($argnum == 7); # vt : n x ncvt
1164 return "((*$$argsref[1])*2)*" if ($argnum == 13); # rwork: array of dimension 2n
1165 return "((*$$argsref[1]) - 1)*" if ($argnum == 6); # e : array of dimension n-1
1166 return "(*$$argsref[1])*" if ($argnum == 5); # d : array of dimension n-1
1167 }
1168 if ($fname =~ /^[sdcz]gbbrd_$/) {
1169 return "(*$$argsref[1])*(*$$argsref[2])*" if ($argnum == 6); # a : m x n
1170 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 10); # q: m x m
1171 return "(*$$argsref[1])*(*$$argsref[3])*" if ($argnum == 14); # C : m x ncc
1172 return "((*$$argsref[1]) < (*$$argsref[2]) ? (*$$argsref[1]):(*$$argsref[2]))*" if ($argnum == 8); # D : min(m,n)
1173 return "((*$$argsref[1]) < (*$$argsref[2]) ? (*$$argsref[1] -1):(*$$argsref[2] -1))*" if ($argnum == 9); # e : min(m,n) -1
1174 return "(*$$argsref[2])*(*$$argsref[2])*" if ($argnum == 12); # pt: n x n
1175 return "((*$$argsref[1]) > (*$$argsref[2]) ? (*$$argsref[1]):(*$$argsref[2]))*" if ($argnum == 16); # work : max(m,n)
1176 return "((*$$argsref[1]) > (*$$argsref[2]) ? (*$$argsref[1]):(*$$argsref[2]))*" if ($argnum == 17); # rwork : max(m,n)
1177 }
1178 if ($fname =~ /^[sdcz]gbcon_$/) {
1179 return "(*$$argsref[1]>*$$argsref[5]?*$$argsref[1]:*$$argsref[5])*(*$$argsref[1])*" if ($argnum == 4); # ab : max(ldab,n) x n
1180 return "((*$$argsref[1])*2)*" if ($argnum == 9); # work: array of dimension 2n
1181 return "((*$$argsref[1]))*" if ($argnum == 10); # rwork: array of dimension n
1182 return "(*$$argsref[1])*" if ($argnum == 6); # ipiv : array of dimension n
1183 }
1184 # scdz/gbequ, scdz/gbequb
1185 if ($fname =~ /^[sdcz]gbequb?_$/) {
1186 return "(*$$argsref[0]>*$$argsref[5]?*$$argsref[0]:*$$argsref[5])*(*$$argsref[1])*" if ($argnum == 4); # ab : max(ldab,m) x n
1187 return "((*$$argsref[0]))*" if ($argnum == 6); # r: array of dimension m
1188 return "((*$$argsref[1]))*" if ($argnum == 7); # c: array of dimension n
1189 }
1190 # scdz/gbrfs
1191 if ($fname =~ /^[sdcz]gbrfs_$/) {
1192 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 5); # a : n x n
1193 return "(*$$argsref[11])*(*$$argsref[4])*" if ($argnum == 10); # b : ldb x nrhs
1194 return "(*$$argsref[13])*(*$$argsref[4])*" if ($argnum == 12); # x : ldx x nrhs
1195 return "((*$$argsref[4]))*" if ($argnum == 14); # ferr: array of dimension nrhs
1196 return "((*$$argsref[4]))*" if ($argnum == 15); # berr: array of dimension nrhs
1197 return "((*$$argsref[1])*2)*" if ($argnum == 16); # work: array of dimension 2n
1198 return "((*$$argsref[1]))*" if ($argnum == 17); # rwork: array of dimension n
1199 return "((*$$argsref[1]))*" if ($argnum == 9); # ipiv: array of dimension n
1200 return "(*$$argsref[1])*(*$$argsref[8])*" if ($argnum == 7); # afb : ldafb x n
1201 }
1202 # scdz/pbrfs
1203 if ($fname =~ /^[sdcz]pbrfs_$/) {
1204 return "(*$$argsref[1])*(*$$argsref[1])*" if ($argnum == 4); # a : n x n
1205 return "(*$$argsref[9])*(*$$argsref[3])*" if ($argnum == 8); # b : ldb x nrhs
1206 return "(*$$argsref[11])*(*$$argsref[3])*" if ($argnum == 10); # x : ldx x nrhs
1207 return "((*$$argsref[3]))*" if ($argnum == 12); # ferr: array of dimension nrhs
1208 return "((*$$argsref[3]))*" if ($argnum == 13); # berr: array of dimension nrhs
1209 return "((*$$argsref[1])*3)*" if ($argnum == 14); # work: array of dimension 3n
1210 return "((*$$argsref[1]))*" if ($argnum == 15); # iwork: array of dimension n
1211 return "(*$$argsref[1])*(*$$argsref[7])*" if ($argnum == 6); # afb : ldafb x n
1212 }
1213 # sdcz/gbsv
1214 if ($fname =~ /^[sdcz]gbsv_$/) {
1215 return "(*$$argsref[0]>*$$argsref[5]?*$$argsref[0]:*$$argsref[5])*(*$$argsref[0])*" if ($argnum == 4); # ab : max(ldab,n) x n
1216 return "((*$$argsref[0]))*" if ($argnum == 6); # ipiv: array of dimension n
1217 return "(*$$argsref[8])*(*$$argsref[3])*" if ($argnum == 7); # b : ldb x nrhs
1218 }
1219 # sdcz/gbtf2
1220 if ($fname =~ /^[sdcz]gbtf2_$/) {
1221 return "(*$$argsref[0]>*$$argsref[5]?*$$argsref[0]:*$$argsref[5])*(*$$argsref[1])*" if ($argnum == 4); # ab : max(ldab,m) x n
1222 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 6); # ipiv : min(m,n)
1223 }
1224 # sdcz/gbtrf
1225 if ($fname =~ /^[sdcz]gbtrf_$/) {
1226 return "(*$$argsref[0]>*$$argsref[5]?*$$argsref[0]:*$$argsref[5])*(*$$argsref[1])*" if ($argnum == 4); # ab : max(ldab,m) x n
1227 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 6); # ipiv : min(m,n)
1228 }
1229 # sdcz/gbtrs
1230 if ($fname =~ /^[sdcz]gbtrs_$/) {
1231 return "(*$$argsref[1]>*$$argsref[6]?*$$argsref[1]:*$$argsref[6])*(*$$argsref[1])*" if ($argnum == 5); # ab : max(ldab,n) x n
1232 return "((*$$argsref[1]))*" if ($argnum == 7); # ipiv: array of dimension n
1233 }
1234 # sdcz/gebak
1235 if ($fname =~ /^[sdcz]gebak_$/) {
1236 return "(*$$argsref[2]>*$$argsref[8]?*$$argsref[2]:*$$argsref[8])*(*$$argsref[6])*" if ($argnum == 7); # v : max(ldv,n) x m
1237 return "((*$$argsref[2]))*" if ($argnum == 5); # scale: array of dimension n
1238 }
1239 # sdcz/gebal
1240 if ($fname =~ /^[sdcz]gebal_$/) {
1241 return "(*$$argsref[1]>*$$argsref[3]?*$$argsref[1]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,n) x n
1242 return "((*$$argsref[1]))*" if ($argnum == 6); # scale: array of dimension n
1243 }
1244 # sdcz/gebd2
1245 if ($fname =~ /^[sdcz]gebd2_$/) {
1246 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1247 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # d : dimension min(m,n)
1248 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0])-1:(*$$argsref[1])-1)*" if ($argnum == 5); # e : dim min(m,n) -1
1249 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 6); # tauq : dim min(m,n)
1250 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 7); # taup : dim min(m,n)
1251 return "((*$$argsref[0]) > (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 8); # work : dim max(m,n)
1252 }
1253 # sdcz/gebrd
1254 if ($fname =~ /^[sdcz]gebrd_$/) {
1255 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1256 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # d : dimension min(m,n)
1257 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0])-1:(*$$argsref[1])-1)*" if ($argnum == 5); # e : dim min(m,n) -1
1258 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 6); # tauq : dim min(m,n)
1259 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 7); # taup : dim min(m,n)
1260 return "((*$$argsref[9]))*" if ($argnum == 8); # work : dim lwork
1261 }
1262 # sdcz/gecon
1263 if ($fname =~ /^[sdcz]gecon_$/) {
1264 return "(*$$argsref[1]>*$$argsref[3]?*$$argsref[1]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,n) x n
1265 return "((*$$argsref[1])*2)*" if ($argnum == 6); # work : dim 2n
1266 return "((*$$argsref[1])*2)*" if ($argnum == 7); # work : dim 2n
1267 }
1268 # sdcz/geequ sdcz/geequb
1269 if ($fname =~ /^[sdcz]geequb?_$/) {
1270 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1271 return "((*$$argsref[0]))*" if ($argnum == 4); # r__ : dim m
1272 return "((*$$argsref[1]))*" if ($argnum == 5); # c__ : dim n
1273 }
1274 # sdcz/gees
1275 if ($fname =~ /^[sdcz]gees_$/) {
1276 return "(*$$argsref[3]>*$$argsref[5]?*$$argsref[3]:*$$argsref[5])*(*$$argsref[3])*" if ($argnum == 4); # a : max(lda,n) x n
1277 return "((*$$argsref[3]))*" if ($argnum == 7); # w : dim n
1278 return "(*$$argsref[3]>*$$argsref[9]?*$$argsref[3]:*$$argsref[9])*(*$$argsref[3])*" if ($argnum == 8); # vs : max(ldvs,n) x n
1279 return "(1>*$$argsref[11]?1:*$$argsref[11])*" if ($argnum == 10); # work: dim max(1,lwork)
1280 return "((*$$argsref[3]))*" if ($argnum == 12); # rwork : dim n
1281 return "((*$$argsref[3]))*" if ($argnum == 13); # bwork : dim n
1282 }
1283 # sdcz/geesx
1284 if ($fname =~ /^[sdcz]geesx_$/) {
1285 return "(*$$argsref[4]>*$$argsref[6]?*$$argsref[4]:*$$argsref[6])*(*$$argsref[4])*" if ($argnum == 5); # a : max(lda,n) x n
1286 return "((*$$argsref[4]))*" if ($argnum == 8); # w : dim n
1287 return "(*$$argsref[4]>*$$argsref[10]?*$$argsref[4]:*$$argsref[10])*(*$$argsref[4])*" if ($argnum == 9); # vs :max(ldvs,n) x n
1288 return "(1>*$$argsref[14]?1:*$$argsref[14])*" if ($argnum == 13); # work: dim max(1,lwork)
1289 return "((*$$argsref[4]))*" if ($argnum == 15); # rwork : dim n
1290 return "((*$$argsref[4]))*" if ($argnum == 16); # bwork : dim n
1291 }
1292 # sdcz/geev
1293 if ($fname =~ /^[sdcz]geev_$/) {
1294 return "(*$$argsref[2]>*$$argsref[4]?*$$argsref[2]:*$$argsref[4])*(*$$argsref[2])*" if ($argnum == 3); # a : max(lda,n) x n
1295 return "((*$$argsref[2]))*" if ($argnum == 5); # w : dim n
1296 return "(*$$argsref[2]>*$$argsref[7]?*$$argsref[2]:*$$argsref[7])*(*$$argsref[2])*" if ($argnum == 6); # vl : max(ldvl,n) x n
1297 return "(*$$argsref[2]>*$$argsref[9]?*$$argsref[2]:*$$argsref[9])*(*$$argsref[2])*" if ($argnum == 8); # vr : max(ldvr,n) x n
1298 return "(1>*$$argsref[11]?1:*$$argsref[11])*" if ($argnum == 10); # work: dim max(1,lwork)
1299 return "((*$$argsref[2])*2)*" if ($argnum == 12); # rwork : dim 2n
1300 }
1301 # sdcz/geevx
1302 if ($fname =~ /^[sdcz]geevx_$/) {
1303 return "(*$$argsref[4]>*$$argsref[6]?*$$argsref[4]:*$$argsref[6])*(*$$argsref[4])*" if ($argnum == 5); # a : max(lda,n) x n
1304 return "((*$$argsref[4]))*" if ($argnum == 7); # w : dim n
1305 return "(*$$argsref[4]>*$$argsref[9]?*$$argsref[4]:*$$argsref[9])*(*$$argsref[4])*" if ($argnum == 8); # vl : max(ldvl,n) x n
1306 return "(*$$argsref[2]>*$$argsref[11]?*$$argsref[4]:*$$argsref[11])*(*$$argsref[4])*" if ($argnum == 10);#vr : max(ldvr,n) x n
1307 return "((*$$argsref[4]))*" if ($argnum == 14); # scale : dim n
1308 return "((*$$argsref[4]))*" if ($argnum == 16); # rconde : dim n
1309 return "((*$$argsref[4]))*" if ($argnum == 17); # rcondv : dim n
1310 return "(1>*$$argsref[19]?1:*$$argsref[19])*" if ($argnum == 18); # work: dim max(1,lwork)
1311 return "((*$$argsref[4])*2)*" if ($argnum == 20); # rwork : dim 2n
1312 }
1313 # sdcz/gegs
1314 if ($fname =~ /^[sdcz]gegs_$/) {
1315 return "(*$$argsref[2]>*$$argsref[4]?*$$argsref[2]:*$$argsref[4])*(*$$argsref[2])*" if ($argnum == 3); # a : max(lda,n) x n
1316 return "(*$$argsref[2]>*$$argsref[6]?*$$argsref[2]:*$$argsref[6])*(*$$argsref[2])*" if ($argnum == 5); # b : max(ldb,n) x n
1317 return "((*$$argsref[2]))*" if ($argnum == 7); # alpha : dim n
1318 return "((*$$argsref[2]))*" if ($argnum == 8); # beta : dim n
1319 return "(*$$argsref[2]>*$$argsref[10]?*$$argsref[2]:*$$argsref[10])*(*$$argsref[2])*" if ($argnum == 9);# vsl:max(ldvsl,n) x n
1320 return "(*$$argsref[2]>*$$argsref[12]?*$$argsref[2]:*$$argsref[12])*(*$$argsref[2])*" if ($argnum == 11);#vsr:max(ldvsr,n) x n
1321 return "(1>*$$argsref[14]?1:*$$argsref[14])*" if ($argnum == 13); # work: dim max(1,lwork)
1322 return "((*$$argsref[2])*3)*" if ($argnum == 15); # rwork : dim 3n
1323 }
1324 # sdcz/gegv
1325 if ($fname =~ /^[sdcz]gegv_$/) {
1326 return "(*$$argsref[2]>*$$argsref[4]?*$$argsref[2]:*$$argsref[4])*(*$$argsref[2])*" if ($argnum == 3); # a : max(lda,n) x n
1327 return "(*$$argsref[2]>*$$argsref[6]?*$$argsref[2]:*$$argsref[6])*(*$$argsref[2])*" if ($argnum == 5); # b : max(ldb,n) x n
1328 return "((*$$argsref[2]))*" if ($argnum == 7); # alpha : dim n
1329 return "((*$$argsref[2]))*" if ($argnum == 8); # beta : dim n
1330 return "(*$$argsref[2]>*$$argsref[10]?*$$argsref[2]:*$$argsref[10])*(*$$argsref[2])*" if ($argnum == 9);# vl : max(ldvl,n) x n
1331 return "(*$$argsref[2]>*$$argsref[12]?*$$argsref[2]:*$$argsref[12])*(*$$argsref[2])*" if ($argnum == 11);# vr: max(ldvr,n) x n
1332 return "(1>*$$argsref[14]?1:*$$argsref[14])*" if ($argnum == 13); # work: dim max(1,lwork)
1333 return "((*$$argsref[2])*8)*" if ($argnum == 15); # rwork : dim 8n
1334 }
1335 # sdcz/gehd2
1336 if ($fname =~ /^[sdcz]gehd2_$/) {
1337 return "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[0])*" if ($argnum == 3); # a : max(lda,n) x n
1338 return "((*$$argsref[0]) -1)*" if ($argnum == 5); # tau: dim n-1
1339 return "((*$$argsref[0]))*" if ($argnum == 6); # work: dim n
1340 }
1341 # sdcz/gehrd
1342 if ($fname =~ /^[sdcz]gehrd_$/) {
1343 return "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[0])*" if ($argnum == 3); # a : max(lda,n) x n
1344 return "((*$$argsref[0]) -1)*" if ($argnum == 5); # tau: dim n-1
1345 return "((*$$argsref[7]))*" if ($argnum == 6); # work: dim lwork
1346 }
1347 # sdcz/gelq2
1348 if ($fname =~ /^[sdcz]gelq2_$/) {
1349 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1350 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # tau : dim min(m,n)
1351 return "((*$$argsref[0]))*" if ($argnum == 5); # work: dim m
1352 }
1353 # sdcz/gelqf
1354 if ($fname =~ /^[sdcz]gelqf_$/) {
1355 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1356 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # tau : dim min(m,n)
1357 return "(1>*$$argsref[6]?1:*$$argsref[6])*" if ($argnum == 5); # work: dim max(1,lwork)
1358 }
1359 # sdcz/gels
1360 if ($fname =~ /^[sdcz]gels_$/) {
1361 return "(*$$argsref[1]>*$$argsref[5]?*$$argsref[1]:*$$argsref[5])*(*$$argsref[2])*" if ($argnum == 4); # a : max(lda,m) x n
1362 return "(*$$argsref[1]>*$$argsref[7]?*$$argsref[1]:*$$argsref[7])*(*$$argsref[3])*" if ($argnum == 6); # b : max(ldb,m) x nrhs
1363 return "(1>*$$argsref[9]?1:*$$argsref[9])*" if ($argnum == 8); # work: dim max(1,lwork)
1364 }
1365 # sdcz/gelsd
1366 if ($fname =~ /^[sdcz]gelsd_$/) {
1367 return "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[1])*" if ($argnum == 3); # a : max(lda,m) x n
1368 return "(*$$argsref[0]>*$$argsref[6]?*$$argsref[0]:*$$argsref[6])*(*$$argsref[2])*" if ($argnum == 5); # b : max(ldb,m) x nrhs
1369 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 7); # s : dim min(m,n)
1370 return "(1>*$$argsref[11]?1:*$$argsref[11])*" if ($argnum == 10); # work: dim max(1,lwork)
1371 return "(1>*$$argsref[11]?1:*$$argsref[11])*" if ($argnum == 12); # rwork: dim max(1,lwork)
1372 return "(1>*$$argsref[11]?1:*$$argsref[11])*" if ($argnum == 13); # iwork: dim max(1,lwork)
1373 }
1374 # sdcz/gelss
1375 if ($fname =~ /^[sdcz]gelss_$/) {
1376 return "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[1])*" if ($argnum == 3); # a : max(lda,m) x n
1377 return "(*$$argsref[0]>*$$argsref[6]?*$$argsref[0]:*$$argsref[6])*(*$$argsref[2])*" if ($argnum == 5); # b : max(ldb,m) x nrhs
1378 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 7); # s : dim min(m,n)
1379 return "(1>*$$argsref[11]?1:*$$argsref[11])*" if ($argnum == 10); # work: dim max(1,lwork)
1380 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*5*" if ($argnum == 12); # rwork : 5*dim min(m,n)
1381 }
1382 # sdcz/gelsx
1383 if ($fname =~ /^[sdcz]gelsx_$/) {
1384 return "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[1])*" if ($argnum == 3); # a : max(lda,m) x n
1385 return "(*$$argsref[0]>*$$argsref[6]?*$$argsref[0]:*$$argsref[6])*(*$$argsref[2])*" if ($argnum == 5); # b : max(ldb,m) x nrhs
1386 return "((*$$argsref[1]))*" if ($argnum == 7); # jpvt: dim n
1387 return "(MIN(*$$argsref[0],*$$argsref[1]) + MAX(*$$argsref[1], (2*MIN(*$$argsref[0],*$$argsref[1])+*$$argsref[2])))*" if ($argnum == 10); # work: dim (min(M,N) + max( N, 2*min(M,N)+NRHS ))
1388 return "(2*(*$$argsref[1]))*" if ($argnum == 11); # rwork: dim 2n
1389 }
1390 # sdcz/gelsy
1391 if ($fname =~ /^[sdcz]gelsy_$/) {
1392 return "(*$$argsref[0]>*$$argsref[4]?*$$argsref[0]:*$$argsref[4])*(*$$argsref[1])*" if ($argnum == 3); # a : max(lda,m) x n
1393 return "(*$$argsref[6])*(*$$argsref[2])*" if ($argnum == 5); # b : ldb x nrhs
1394 return "((*$$argsref[1]))*" if ($argnum == 7); # jpvt: dim n
1395 return "(1>*$$argsref[11]?1:*$$argsref[11])*" if ($argnum == 10); # work: dim max(1,lwork)
1396 return "(2*(*$$argsref[1]))*" if ($argnum == 12); # rwork: dim 2n
1397 }
1398 # sdcz/geql2
1399 if ($fname =~ /^[sdcz]geql2_$/) {
1400 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1401 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # tau : dim min(m,n)
1402 return "((*$$argsref[1]))*" if ($argnum == 5); # work: dim n
1403 }
1404 # sdcz/geqlf
1405 if ($fname =~ /^[sdcz]geqlf_$/) {
1406 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1407 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # tau : dim min(m,n)
1408 return "(1>*$$argsref[6]?1:*$$argsref[6])*" if ($argnum == 5); # work: dim max(1,lwork)
1409 }
1410 # sdcz/geqp3
1411 if ($fname =~ /^[sdcz]geqp3_$/) {
1412 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1413 return "((*$$argsref[1]))*" if ($argnum == 4); # jpvt: dim n
1414 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 5); # tau : dim min(m,n)
1415 return "(1>*$$argsref[7]?1:*$$argsref[7])*" if ($argnum == 6); # work: dim max(1,lwork)
1416 return "(2*(*$$argsref[1]))*" if ($argnum == 8); # rwork: dim 2n
1417 }
1418 # sdcz/geqpf
1419 if ($fname =~ /^[sdcz]geqpf_$/) {
1420 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1421 return "((*$$argsref[1]))*" if ($argnum == 4); # jpvt: dim n
1422 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 5); # tau : dim min(m,n)
1423 return "((*$$argsref[1]))*" if ($argnum == 6); # jpvt: dim n
1424 return "(2*(*$$argsref[1]))*" if ($argnum == 7); # rwork: dim 2n
1425 }
1426 # sdcz/geqr2
1427 if ($fname =~ /^[sdcz]geqr2_$/) {
1428 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1429 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # tau : dim min(m,n)
1430 return "((*$$argsref[1]))*" if ($argnum == 5); # work: dim n
1431 }
1432 # sdcz/geqrf
1433 if ($fname =~ /^[sdcz]geqrf_$/) {
1434 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1435 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # tau : dim min(m,n)
1436 return "(1>*$$argsref[6]?1:*$$argsref[6])*" if ($argnum == 5); # work: dim max(1,lwork)
1437 }
1438 # sdcz/gerfs
1439 if ($fname =~ /^[sdcz]gerfs_$/) {
1440 return "(*$$argsref[1]>*$$argsref[4]?*$$argsref[1]:*$$argsref[4])*(*$$argsref[1])*" if ($argnum == 3); # a : max(lda,n) x n
1441 return "(*$$argsref[1]>*$$argsref[6]?*$$argsref[1]:*$$argsref[6])*(*$$argsref[1])*" if ($argnum == 5); # af: max(ldaf,n) x n
1442 return "((*$$argsref[1]))*" if ($argnum == 7); # ipiv: dim n
1443 return "(*$$argsref[1]>*$$argsref[9]?*$$argsref[1]:*$$argsref[9])*(*$$argsref[2])*" if ($argnum == 8); # b : max(ldb,n) x nrhs
1444 return "(*$$argsref[1]>*$$argsref[11]?*$$argsref[1]:*$$argsref[11])*(*$$argsref[2])*" if ($argnum == 10);#x:max(ldx,n) x nrhs
1445 return "((*$$argsref[2]))*" if ($argnum == 12); # ferr: dim nrhs
1446 return "((*$$argsref[2]))*" if ($argnum == 13); # berr: dim nrhs
1447 return "((*$$argsref[1])*2)*" if ($argnum == 14); # work: dim 2n
1448 return "((*$$argsref[1]))*" if ($argnum == 15); # rwork: dim n
1449 }
1450 # sdcz/gerfsx
1451 if ($fname =~ /^[sdcz]gerfsx_$/) {
1452 return "(*$$argsref[2]>*$$argsref[5]?*$$argsref[2]:*$$argsref[5])*(*$$argsref[2])*" if ($argnum == 4); # a : max(lda,n) x n
1453 return "(*$$argsref[2]>*$$argsref[7]?*$$argsref[2]:*$$argsref[7])*(*$$argsref[2])*" if ($argnum == 6); # af: max(ldaf,n) x n
1454 return "((*$$argsref[2]))*" if ($argnum == 8); # ipiv: dim n
1455 return "((*$$argsref[2]))*" if ($argnum == 9); # r: dim n
1456 return "((*$$argsref[2]))*" if ($argnum == 10); # c: dim n
1457 return "(*$$argsref[2]>*$$argsref[12]?*$$argsref[2]:*$$argsref[12])*(*$$argsref[3])*" if ($argnum == 11);# b:max(ldb,n) x nrhs
1458 return "(*$$argsref[2]>*$$argsref[14]?*$$argsref[2]:*$$argsref[14])*(*$$argsref[3])*" if ($argnum == 13);#x:max(ldx,n) x nrhs
1459 return "((*$$argsref[3]))*" if ($argnum == 16); # berr: dim nrhs
1460 return "(*$$argsref[3])*(*$$argsref[17])*" if ($argnum == 18);# err_bnds_norm_: nrhs x n_err_bnds
1461 return "(*$$argsref[3])*(*$$argsref[17])*" if ($argnum == 19);# err_bnds_comp_: nrhs x n_err_bnds
1462 return "((*$$argsref[20]))*" if ($argnum == 21); # params: dim nparams
1463 return "((*$$argsref[2])*2)*" if ($argnum == 22); # work: dim 2n
1464 return "((*$$argsref[2])*2)*" if ($argnum == 23); # rwork: dim 2n
1465 }
1466 # sdcz/gerq2
1467 if ($fname =~ /^[sdcz]gerq2_$/) {
1468 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1469 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # tau : dim min(m,n)
1470 return "((*$$argsref[0]))*" if ($argnum == 5); # work: dim m
1471 }
1472 # sdcz/gerqf
1473 if ($fname =~ /^[sdcz]gerqf_$/) {
1474 return "(*$$argsref[0]>*$$argsref[3]?*$$argsref[0]:*$$argsref[3])*(*$$argsref[1])*" if ($argnum == 2); # a : max(lda,m) x n
1475 return "((*$$argsref[0]) < (*$$argsref[1]) ? (*$$argsref[0]):(*$$argsref[1]))*" if ($argnum == 4); # tau : dim min(m,n)
1476 return "(1>*$$argsref[6]?1:*$$argsref[6])*" if ($argnum == 5); # work: dim max(1,lwork)
1477 }
1478 # sdcz/gesc2
1479 if ($fname =~ /^[sdcz]gesc2_$/) {
1480 return "(*$$argsref[0]>*$$argsref[2]?*$$argsref[0]:*$$argsref[2])*(*$$argsref[0])*" if ($argnum == 2); # a : max(lda,n) x n
1481 return "((*$$argsref[0]))*" if ($argnum == 3); # rhs: dim n
1482 return "((*$$argsref[0]))*" if ($argnum == 4); # ipiv: dim n
1483 return "((*$$argsref[0]))*" if ($argnum == 5); # jpiv: dim n
1484 }
1485 }
1488 # returns the constant modifier (if any) for the supplied function
1489 # if none, returns an empty string
1490 # $argnum starts from 0 (for first argument)
1491 # This modifier is generated by multiplying constants in the function
1492 # arguments based on the function name. This modifier when multiplied with
1493 # the argument type size, determines the full size of the buffer for
1494 # USE_HOST_PTR
1495 # Arg 1: function name
1496 # Arg 2: the argument number for whom we need to determine the multiplier
1497 # Arg 3: Args array
1498 sub get_bufsize_modifier
1499 {
1500 $fname = shift;
1501 $argnum = shift;
1502 $argsref = shift;
1504 # clapack
1505 return clapack_bufsize_modifier($fname, $argnum, $argsref) if ($namespace =~ /_clapack$/);
1507 # BLAS modifiers below
1508 my $z_mult = "" ; # set to 2 for all double-complex precisions
1509 my $c_mult = "" ; # set to 2 for all complex and double-complex precisions
1511 ### Level 1 ###
1512 # dsdot, sdot, ddot
1513 if ($fname =~ /^cblas_d?[sd]dot$/) {
1514 return "(1+($$argsref[0]-1)*abs($$argsref[-3]))*" if ($argnum == 1);
1515 return "(1+($$argsref[0]-1)*abs($$argsref[-1]))*" if ($argnum == 3);
1516 }
1517 # sdsdot
1518 if ($fname =~ /^cblas_sdsdot$/) {
1519 return "(1+($$argsref[0]-1)*abs($$argsref[-3]))*" if ($argnum == 2);
1520 return "(1+($$argsref[0]-1)*abs($$argsref[-1]))*" if ($argnum == 4);
1521 }
1522 # cdotu_sub, cdotc_sub, zdotu_sub, zdotc_sub
1523 if ($fname =~ /^cblas_[cz]dot[uc]_sub$/) {
1524 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1525 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1526 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[2]))*" if ($argnum == 1);
1527 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[4]))*" if ($argnum == 3);
1528 return "${z_mult}${c_mult}" if ($argnum == 5);
1529 }
1530 # snrm2, dnrm2, scnrm2 dznrm2
1531 if ($fname =~ /^cblas_[sd][cz]?nrm2$/) {
1532 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_.z.*$/) ;
1533 $c_mult = "2*" if ($fname =~ /^cblas_.[cz].*$/) ;
1534 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[2]))*" if ($argnum == 1);
1535 }
1536 # sasum, dasum, scasum, dzasum
1537 if ($fname =~ /^cblas_[sd][cz]?asum$/) {
1538 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_.z.*$/) ;
1539 $c_mult = "2*" if ($fname =~ /^cblas_.[cz].*$/) ;
1540 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[2]))*" if ($argnum == 1);
1541 }
1542 # isamax, idamax, icamax, izamax
1543 if ($fname =~ /^cblas_i[sdcz]amax$/) {
1544 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_.z.*$/) ;
1545 $c_mult = "2*" if ($fname =~ /^cblas_.[cz].*$/) ;
1546 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[2]))*" if ($argnum == 1);
1547 }
1548 # sswap, dswap, cswap, zswap
1549 if ($fname =~ /^cblas_[sdcz]swap$/) {
1550 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1551 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1552 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[2]))*" if ($argnum == 1);
1553 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[4]))*" if ($argnum == 3);
1554 }
1555 # scopy, dcopy, ccopy, zcopy
1556 if ($fname =~ /^cblas_[sdcz]copy$/) {
1557 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1558 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1559 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[2]))*" if ($argnum == 1);
1560 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[4]))*" if ($argnum == 3);
1561 }
1562 # saxpy, daxpy, caxpy, zaxpy
1563 if ($fname =~ /^cblas_[sdcz]axpy$/) {
1564 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1565 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1566 return "${z_mult}2*" if (($argnum == 1) && ($fname =~ /^cblas_[cz].*$/)) ; # a
1567 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[3]))*" if ($argnum == 2);
1568 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[5]))*" if ($argnum == 4);
1569 }
1570 # srot, drot, srotm, drotm
1571 if ($fname =~ /^cblas_[sd]rotm?$/) {
1572 return "(1+($$argsref[0]-1)*abs($$argsref[2]))*" if ($argnum == 1);
1573 return "(1+($$argsref[0]-1)*abs($$argsref[4]))*" if ($argnum == 3);
1574 return "5*" if ($argnum == 5);
1575 }
1576 if ($fname =~ /^cblas_[sd]rotmg$/) {
1577 return "5*" if ($argnum == 4);
1578 }
1579 # sscal, dscal, cscal, zscal, csscal, zdscal
1580 if ($fname =~ /^cblas_[sdcz][sd]?scal$/) {
1581 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1582 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1583 return "${z_mult}2*" if (($argnum == 1) && ($fname =~ /^cblas_[cz].*$/)) ; # a
1584 return "${z_mult}${c_mult}(1+($$argsref[0]-1)*abs($$argsref[3]))*" if ($argnum == 2);
1585 }
1587 ### Level 2 BLAS ###
1588 # void cblas_sgemv(const enum CBLAS_ORDER Order,
1589 # const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
1590 # const float alpha, const float *A, const int lda,
1591 # const float *X, const int incX, const float beta,
1592 # float *Y, const int incY);
1593 # sgemv, dgemv, cgemv, zgemv
1594 # SGEMV ('T', 4, 9, 1.0, A, 5, X, 1, .0, Y, 1)
1595 if ($fname =~ /^cblas_[sdcz]gemv$/) {
1596 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1597 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1598 return "${z_mult}${c_mult}BLAS_ORD($$argsref[0],$$argsref[2],$$argsref[3])*$$argsref[6]*" if ($argnum == 5); # A: lda x (n or m)
1599 return "${z_mult}${c_mult}(1+(BLAS_TRN($$argsref[1],$$argsref[3],$$argsref[2])-1)*abs($$argsref[8]))*" if ($argnum == 7); # X: incX x (n or m)
1600 return "${z_mult}${c_mult}(1+(BLAS_TRN($$argsref[1],$$argsref[2],$$argsref[3])-1)*abs($$argsref[11]))*" if ($argnum == 10); # Y: incY x (m or n)
1601 return "${z_mult}2*" if (($argnum == 4) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1602 return "${z_mult}2*" if (($argnum == 9) && ($fname =~ /^cblas_[cz].*$/)) ; # beta
1603 #return "$$argsref[2]*abs($$argsref[3])*" if ($argnum == 5);
1604 #return "($$argsref[1] == CblasNoTrans ? $$argsref[2]:$$argsref[3])*abs($$argsref[-4])*" if ($argnum == 7);
1605 #return "($$argsref[1] == CblasNoTrans ? $$argsref[3]:$$argsref[2])*abs($$argsref[-1])*" if ($argnum == 10);
1606 }
1608 #void cblas_sgbmv(const enum CBLAS_ORDER Order,
1609 # const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
1610 # const int KL, const int KU,
1611 # const float alpha, const float *A, const int lda,
1612 # const float *X, const int incX, const float beta,
1613 # float *Y, const int incY);
1614 # sgbmv, dgbmv, cgbmv, zgbmv
1615 if ($fname =~ /^cblas_[sdcz]gbmv$/) {
1616 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1617 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1618 return "${z_mult}${c_mult}BLAS_ORD($$argsref[0],$$argsref[2],$$argsref[3])*$$argsref[8]*" if ($argnum == 7); # A: lda x (n or m)
1619 return "${z_mult}${c_mult}(1+(BLAS_TRN($$argsref[1],$$argsref[3],$$argsref[2])-1)*abs($$argsref[10]))*" if ($argnum == 9); # X: incX x (n or m)
1620 return "${z_mult}${c_mult}(1+(BLAS_TRN($$argsref[1],$$argsref[2],$$argsref[3])-1)*abs($$argsref[13]))*" if ($argnum == 12); # Y: incY x (m or n)
1621 return "${z_mult}2*" if (($argnum == 6) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1622 return "${z_mult}2*" if (($argnum == 11) && ($fname =~ /^cblas_[cz].*$/)) ; # beta
1623 }
1625 #void cblas_strmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1626 # const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
1627 # const int N, const float *A, const int lda,
1628 # float *X, const int incX);
1629 # strmv, dtrmv, ctrmv, ztrmv
1630 if ($fname =~ /^cblas_[sdcz]trmv$/) {
1631 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1632 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1633 return "${z_mult}${c_mult}$$argsref[4]*$$argsref[6]*" if ($argnum == 5); # A : lda x n
1634 return "${z_mult}${c_mult}(1+($$argsref[4]-1)*abs($$argsref[-1]))*" if ($argnum == 7); # X: 1 + (n-1)*incX
1635 #return "$$argsref[4]*abs($$argsref[-1])*" if ($argnum == 7);# X : N x incX
1636 }
1638 #void cblas_stbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1639 # const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
1640 # const int N, const int K, const float *A, const int lda,
1641 # float *X, const int incX);
1642 # stbmv, dtbmv, ctbmv, ztbmv
1643 if ($fname =~ /^cblas_[sdcz]tbmv$/) {
1644 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1645 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1646 return "${z_mult}${c_mult}$$argsref[4]*$$argsref[7]*" if ($argnum == 6); # A : lda x n
1647 return "${z_mult}${c_mult}(1+($$argsref[4]-1)*abs($$argsref[-1]))*" if ($argnum == 8); # X: 1 + (n-1)*incX
1648 #return "$$argsref[4]*abs($$argsref[4])*" if ($argnum == 6); # A : N x N
1649 #return "$$argsref[4]*abs($$argsref[-1])*" if ($argnum == 8);# X : N x incX
1650 }
1651 #void cblas_strsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1652 # const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
1653 # const int N, const float *A, const int lda, float *X,
1654 # const int incX);
1655 # strsv, dtrsv, ctrsv, ztrsv
1656 if ($fname =~ /^cblas_[sdcz]trsv$/) {
1657 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1658 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1659 return "${z_mult}${c_mult}$$argsref[4]*$$argsref[6]*" if ($argnum == 5); # A : lda x n
1660 return "${z_mult}${c_mult}(1+($$argsref[4]-1)*abs($$argsref[-1]))*" if ($argnum == 7); # X: 1 + (n-1)*incX
1661 #return "$$argsref[4]*abs($$argsref[4])*" if ($argnum == 5); # A : N x N
1662 #return "$$argsref[4]*abs($$argsref[-1])*" if ($argnum == 7);# X : N x incX
1663 }
1664 #void cblas_stbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1665 # const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
1666 # const int N, const int K, const float *A, const int lda,
1667 # float *X, const int incX);
1668 # stbsv, dtbsv, ctbsv, ztbsv
1669 if ($fname =~ /^cblas_[sdcz]tbsv$/) {
1670 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1671 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1672 return "${z_mult}${c_mult}$$argsref[4]*$$argsref[7]*" if ($argnum == 6); # A : lda x n
1673 return "${z_mult}${c_mult}(1+($$argsref[4]-1)*abs($$argsref[-1]))*" if ($argnum == 8); # X: 1 + (n-1)*incX
1674 #return "$$argsref[4]*abs($$argsref[4])*" if ($argnum == 6); # A : N x N
1675 #return "$$argsref[4]*abs($$argsref[-1])*" if ($argnum == 8);# X : N x incX
1676 }
1677 #void cblas_stpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1678 # const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
1679 # const int N, const float *Ap, float *X, const int incX);
1680 # stpsv, dtpsv, ctpsv, ztpsv
1681 if ($fname =~ /^cblas_[sdcz]tpsv$/) {
1682 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1683 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1684 return "${z_mult}${c_mult}$$argsref[4]*($$argsref[4]+1)/2*" if ($argnum == 5); # Ap : n(n+1)/2
1685 return "${z_mult}${c_mult}(1+($$argsref[4]-1)*abs($$argsref[-1]))*" if ($argnum == 6); # X: 1 + (n-1)*incX
1686 #return "$$argsref[4]*abs($$argsref[4])*" if ($argnum == 5); # Ap : N x N
1687 #return "$$argsref[4]*abs($$argsref[-1])*" if ($argnum == 6);# X : N x incX
1688 }
1690 #void cblas_ssymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1691 # const int N, const float alpha, const float *A,
1692 # const int lda, const float *X, const int incX,
1693 # const float beta, float *Y, const int incY);
1694 # ssymv, dsymv
1695 if ($fname =~ /^cblas_[sd]symv$/) {
1696 return "$$argsref[2]*$$argsref[5]*" if ($argnum == 4); # A : lda x n
1697 return "(1+($$argsref[2]-1)*abs($$argsref[-4]))*" if ($argnum == 6); # X: 1 + (n-1)*incX
1698 return "(1+($$argsref[2]-1)*abs($$argsref[-1]))*" if ($argnum == 9); # Y: 1 + (n-1)*incY
1699 #return "$$argsref[2]*abs($$argsref[-4])*" if ($argnum == 6);# X : N x incX
1700 #return "$$argsref[2]*abs($$argsref[-1])*" if ($argnum == 9);# Y : N x incY
1701 }
1702 #void cblas_ssbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1703 # const int N, const int K, const float alpha, const float *A,
1704 # const int lda, const float *X, const int incX,
1705 # const float beta, float *Y, const int incY);
1706 # ssbmv, dsbmv
1707 if ($fname =~ /^cblas_[sd]sbmv$/) {
1708 return "$$argsref[2]*$$argsref[6]*" if ($argnum == 5); # A : lda x n
1709 return "(1+($$argsref[2]-1)*abs($$argsref[-4]))*" if ($argnum == 7); # X: 1 + (n-1)*incX
1710 return "(1+($$argsref[2]-1)*abs($$argsref[-1]))*" if ($argnum == 10);# Y: 1 + (n-1)*incY
1711 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 5); # A : N x N
1712 #return "$$argsref[2]*abs($$argsref[-4])*" if ($argnum == 7);# X : N x incX
1713 #return "$$argsref[2]*abs($$argsref[-1])*" if ($argnum == 10);# Y : N x incY
1714 }
1715 # sspmv, dspmv
1716 if ($fname =~ /^cblas_[sd]spmv$/) {
1717 return "$$argsref[2]*($$argsref[2]+1)/2*" if ($argnum == 4); # Ap : n x n
1718 return "(1+($$argsref[2]-1)*abs($$argsref[-4]))*" if ($argnum == 5); # X: 1 + (n-1)*incX
1719 return "(1+($$argsref[2]-1)*abs($$argsref[-1]))*" if ($argnum == 8); # Y: 1 + (n-1)*incY
1720 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 4); # Ap : N x N
1721 #return "$$argsref[2]*abs($$argsref[-4])*" if ($argnum == 5);# X : N x incX
1722 #return "$$argsref[2]*abs($$argsref[-1])*" if ($argnum == 8);# Y : N x incY
1723 }
1724 #void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1725 # const int N, const float alpha, const float *X,
1726 # const int incX, float *A, const int lda);
1727 # ssyr, dsyr
1728 if ($fname =~ /^cblas_[sd]syr$/) {
1729 return "$$argsref[2]*$$argsref[-1]*" if ($argnum == 6); # A : lda x n
1730 return "(1+($$argsref[2]-1)*abs($$argsref[5]))*" if ($argnum == 4); # X: 1 + (n-1)*incX
1731 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 6); # A : N x N
1732 #return "$$argsref[2]*abs($$argsref[5])*" if ($argnum == 4);# X : N x incX
1733 }
1735 #void cblas_ssyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1736 # const int N, const float alpha, const float *X,
1737 # const int incX, const float *Y, const int incY, float *A,
1738 # const int lda);
1739 # ssyr2, dsyr2
1740 if ($fname =~ /^cblas_[sd]syr2$/) {
1741 return "$$argsref[2]*$$argsref[-1]*" if ($argnum == 8); # A : lda x n
1742 return "(1+($$argsref[2]-1)*abs($$argsref[5]))*" if ($argnum == 4); # X: 1 + (n-1)*incX
1743 return "(1+($$argsref[2]-1)*abs($$argsref[7]))*" if ($argnum == 6); # Y: 1 + (n-1)*incY
1744 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 8); # A : N x N
1745 #return "$$argsref[2]*abs($$argsref[5])*" if ($argnum == 4);# X : N x incX
1746 #return "$$argsref[2]*abs($$argsref[7])*" if ($argnum == 6);# Y : N x incY
1747 }
1749 #void cblas_sspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1750 # const int N, const float alpha, const float *X,
1751 # const int incX, float *Ap);
1752 # sspr, dspr
1753 if ($fname =~ /^cblas_[sd]spr$/) {
1754 return "$$argsref[2]*($$argsref[2]+1)/2*" if ($argnum == 6); # Ap: N x N
1755 return "(1+($$argsref[2]-1)*abs($$argsref[5]))*" if ($argnum == 4); # X: 1 + (n-1)*incX
1756 }
1758 #void cblas_sspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1759 # const int N, const float alpha, const float *X,
1760 # const int incX, const float *Y, const int incY, float *A);
1761 # sspr2, dspr2
1762 if ($fname =~ /^cblas_[sd]spr2$/) {
1763 return "$$argsref[2]*($$argsref[2]+1)/2*" if ($argnum == 8); # Ap: n x n
1764 return "(1+($$argsref[2]-1)*abs($$argsref[5]))*" if ($argnum == 4); # X: 1 + (n-1)*incX
1765 return "(1+($$argsref[2]-1)*abs($$argsref[7]))*" if ($argnum == 6); # Y: 1 + (n-1)*incY
1766 }
1768 #void cblas_zhemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1769 # const int N, const void *alpha, const void *A,
1770 # const int lda, const void *X, const int incX,
1771 # const void *beta, void *Y, const int incY);
1772 # chemv, zhemv
1773 if ($fname =~ /^cblas_[cz]hemv$/) {
1774 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1775 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1776 return "${z_mult}${c_mult}$$argsref[2]*$$argsref[5]*" if ($argnum == 4); # A : lda x n
1777 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[7]))*" if ($argnum == 6); # X: 1 + (n-1)*incX
1778 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[-1]))*" if ($argnum == 9); # Y: 1 + (n-1)*incY
1779 return "${z_mult}2*" if (($argnum == 3) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1780 return "${z_mult}2*" if (($argnum == 8) && ($fname =~ /^cblas_[cz].*$/)) ; # beta
1781 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 4); # A : N x N
1782 #return "$$argsref[2]*abs($$argsref[-4])*" if ($argnum == 6);# X : N x incX
1783 #return "$$argsref[2]*abs($$argsref[-1])*" if ($argnum == 9);# Y : N x incY
1784 }
1785 #void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1786 # const int N, const int K, const void *alpha, const void *A,
1787 # const int lda, const void *X, const int incX,
1788 # const void *beta, void *Y, const int incY);
1789 # chbmv, zhbmv
1790 if ($fname =~ /^cblas_[cz]hbmv$/) {
1791 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1792 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1793 return "${z_mult}${c_mult}$$argsref[2]*$$argsref[6]*" if ($argnum == 5); # A : lda x n
1794 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[8]))*" if ($argnum == 7); # X: 1 + (n-1)*incX
1795 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[-1]))*" if ($argnum == 10); # Y: 1 + (n-1)*incY
1796 return "${z_mult}2*" if (($argnum == 4) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1797 return "${z_mult}2*" if (($argnum == 9) && ($fname =~ /^cblas_[cz].*$/)) ; # beta
1798 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 5); # A : N x N
1799 #return "$$argsref[2]*abs($$argsref[-4])*" if ($argnum == 7);# X : N x incX
1800 #return "$$argsref[2]*abs($$argsref[-1])*" if ($argnum == 10);# Y : N x incY
1801 }
1802 #void cblas_ztpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1803 # const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
1804 # const int N, const void *Ap, void *X, const int incX);
1805 # ctpmv, ztpmv
1806 if ($fname =~ /^cblas_[scdz]tpmv$/) {
1807 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1808 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1809 return "${z_mult}${c_mult}$$argsref[4]*($$argsref[4]+1)/2*" if ($argnum == 5); # Ap : n x (n+1)/2
1810 return "${z_mult}${c_mult}(1+($$argsref[4]-1)*abs($$argsref[-1]))*" if ($argnum == 6); # X: 1 + (n-1)*incX
1811 }
1813 #void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1814 # const int N, const void *alpha, const void *Ap,
1815 # const void *X, const int incX,
1816 # const void *beta, void *Y, const int incY);
1817 # chpmv, zhpmv
1818 if ($fname =~ /^cblas_[cz]hpmv$/) {
1819 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1820 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1821 return "${z_mult}${c_mult}$$argsref[2]*($$argsref[2]+1)/2*" if ($argnum == 4); # Ap : n x n
1822 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[6]))*" if ($argnum == 5); # X: 1 + (n-1)*incX
1823 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[-1]))*" if ($argnum == 8); # Y: 1 + (n-1)*incY
1824 return "${z_mult}2*" if (($argnum == 3) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1825 return "${z_mult}2*" if (($argnum == 7) && ($fname =~ /^cblas_[cz].*$/)) ; # beta
1826 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 4); # Ap : N x N
1827 #return "$$argsref[2]*abs($$argsref[-4])*" if ($argnum == 5);# X : N x incX
1828 #return "$$argsref[2]*abs($$argsref[-1])*" if ($argnum == 8);# Y : N x incY
1829 }
1830 #void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N,
1831 # const float alpha, const float *X, const int incX,
1832 # const float *Y, const int incY, float *A, const int lda);
1833 # sger, dger
1834 if ($fname =~ /^cblas_[sd]ger$/) {
1835 return "BLAS_ORD($$argsref[0],$$argsref[1],$$argsref[2])*$$argsref[-1]*" if($argnum== 8);#A:lda x (n or m)
1836 return "(1+($$argsref[1]-1)*abs($$argsref[5]))*" if ($argnum == 4);# X: 1 + (m-1)*incX
1837 return "(1+($$argsref[2]-1)*abs($$argsref[7]))*" if ($argnum == 6);# Y: 1 + (n-1)*incX
1838 #return "$$argsref[1]*abs($$argsref[2])*" if ($argnum == 8); # A : M x N
1839 #return "$$argsref[1]*abs($$argsref[-5])*" if ($argnum == 4);# X : M x incX
1840 #return "$$argsref[2]*abs($$argsref[-3])*" if ($argnum == 6);# Y : N x incY
1841 }
1842 #void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N,
1843 # const void *alpha, const void *X, const int incX,
1844 # const void *Y, const int incY, void *A, const int lda);
1845 #void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
1846 # const void *alpha, const void *X, const int incX,
1847 # const void *Y, const int incY, void *A, const int lda);
1848 # cgeru, zgeru, cgerc, zgerc
1849 if ($fname =~ /^cblas_[cz]ger[uc]$/) {
1850 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1851 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1852 return "${z_mult}${c_mult}BLAS_ORD($$argsref[0],$$argsref[1],$$argsref[2])*$$argsref[-1]*" if($argnum== 8);#A:lda x (n or m)
1853 return "${z_mult}${c_mult}(1+($$argsref[1]-1)*abs($$argsref[5]))*" if ($argnum == 4);# X: 1 + (m-1)*incX
1854 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[7]))*" if ($argnum == 6);# Y: 1 + (n-1)*incX
1855 return "${z_mult}2*" if (($argnum == 3) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1856 #return "$$argsref[1]*abs($$argsref[2])*" if ($argnum == 8); # A : M x N
1857 #return "$$argsref[1]*abs($$argsref[5])*" if ($argnum == 4);# X : M x incX
1858 #return "$$argsref[2]*abs($$argsref[7])*" if ($argnum == 6);# Y : N x incY
1859 }
1860 #void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1861 # const int N, const double alpha, const void *X, const int incX,
1862 # void *A, const int lda);
1863 # cher, zher
1864 if ($fname =~ /^cblas_[cz]her$/) {
1865 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1866 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1867 return "${z_mult}${c_mult}$$argsref[2]*$$argsref[-1]*" if ($argnum == 6); # A : lda x n
1868 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[5]))*" if ($argnum == 4); # X: 1 + (n-1)*incX
1869 #return "${z_mult}${c_mult}$$argsref[2]*abs($$argsref[5])*" if ($argnum == 4);# X : N x incX
1870 }
1871 #void cblas_zhpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1872 # const int N, const double alpha, const void *X,
1873 # const int incX, void *A);
1874 # chpr, zhpr
1875 if ($fname =~ /^cblas_[cz]hpr$/) {
1876 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1877 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1878 return "${z_mult}${c_mult}$$argsref[2]*($$argsref[2]+1)/2*" if ($argnum == 6); # A : n x (n+1)/2
1879 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[5]))*" if ($argnum == 4); # X: 1 + (n-1)*incX
1880 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 6); # A : N x N
1881 #return "$$argsref[2]*abs($$argsref[5])*" if ($argnum == 4);# X : N x incX
1882 }
1883 #void cblas_zher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
1884 # const void *alpha, const void *X, const int incX,
1885 # const void *Y, const int incY, void *A, const int lda);
1886 # cher2, zher2
1887 if ($fname =~ /^cblas_[cz]her2$/) {
1888 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1889 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1890 return "${z_mult}${c_mult}$$argsref[2]*$$argsref[-1]*" if ($argnum == 8); # A : lda x n
1891 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[5]))*" if ($argnum == 4); # X: 1 + (n-1)*incX
1892 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[7]))*" if ($argnum == 6); # Y: 1 + (n-1)*incY
1893 return "${z_mult}2*" if (($argnum == 3) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1894 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 8); # A : N x N
1895 #return "$$argsref[2]*abs($$argsref[5])*" if ($argnum == 4);# X : N x incX
1896 #return "$$argsref[2]*abs($$argsref[7])*" if ($argnum == 6);# Y : N x incY
1897 }
1898 #void cblas_zhpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
1899 # const void *alpha, const void *X, const int incX,
1900 # const void *Y, const int incY, void *Ap);
1901 # chpr2, zhpr2
1902 if ($fname =~ /^cblas_[cz]hpr2$/) {
1903 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1904 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1905 return "${z_mult}${c_mult}$$argsref[2]*($$argsref[2]+1)/2*" if ($argnum == 8); # Ap : n x (n+1)/2
1906 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[5]))*" if ($argnum == 4); # X: 1 + (n-1)*incX
1907 return "${z_mult}${c_mult}(1+($$argsref[2]-1)*abs($$argsref[7]))*" if ($argnum == 6); # Y: 1 + (n-1)*incY
1908 return "${z_mult}2*" if (($argnum == 3) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1909 #return "$$argsref[2]*abs($$argsref[2])*" if ($argnum == 8); # A : N x N
1910 #return "$$argsref[2]*abs($$argsref[5])*" if ($argnum == 4);# X : N x incX
1911 #return "$$argsref[2]*abs($$argsref[7])*" if ($argnum == 6);# Y : N x incY
1912 }
1914 ### BLAS Level 3 ###
1915 # sgemm, dgemm, cgemm, zgemm
1916 # void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
1917 # const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
1918 # const int K, const double alpha, const double *A,
1919 # const int lda, const double *B, const int ldb,
1920 # const double beta, double *C, const int ldc);
1921 #void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
1922 # const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
1923 # const int K, const void *alpha, const void *A,
1924 # const int lda, const void *B, const int ldb,
1925 # const void *beta, void *C, const int ldc);
1926 if ($fname =~ /^cblas_[sdcz]gemm$/) {
1927 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1928 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1929 return "${z_mult}${c_mult}BLAS_ORD_T($$argsref[0],$$argsref[1],$$argsref[3],$$argsref[5])*$$argsref[8]*" if ($argnum == 7);
1930 return "${z_mult}${c_mult}BLAS_ORD_T($$argsref[0],$$argsref[2],$$argsref[5],$$argsref[4])*$$argsref[10]*" if ($argnum == 9);
1931 return "${z_mult}${c_mult}BLAS_ORD($$argsref[0],$$argsref[3],$$argsref[4])*$$argsref[13]*" if ($argnum == 12);
1932 return "${z_mult}2*" if (($argnum == 6) && ($fname =~ /^cblas_[cz]gemm$/)) ; # alpha
1933 return "${z_mult}2*" if (($argnum == 11) && ($fname =~ /^cblas_[cz]gemm$/)) ;# beta
1934 }
1935 #void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
1936 # const enum CBLAS_UPLO Uplo, const int M, const int N,
1937 # const float alpha, const float *A, const int lda,
1938 # const float *B, const int ldb, const float beta,
1939 # float *C, const int ldc);
1940 #void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
1941 # const enum CBLAS_UPLO Uplo, const int M, const int N,
1942 # const void *alpha, const void *A, const int lda,
1943 # const void *B, const int ldb, const void *beta,
1944 # void *C, const int ldc);
1945 # ssymm, dsymm, csymm, zsymm
1946 # chemm, zhemm
1947 if ($fname =~ /^cblas_[sdcz]symm|cblas_[cz]hemm$/) {
1948 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1949 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1950 return "${z_mult}${c_mult}BLAS_SIDE($$argsref[1],$$argsref[3],$$argsref[4])*$$argsref[7]*" if ($argnum == 6); # A: lda x (m or n)
1951 return "${z_mult}${c_mult}BLAS_ORD($$argsref[0],$$argsref[3],$$argsref[4])*$$argsref[9]*" if ($argnum == 8);#B:ldb x (n or m)
1952 return "${z_mult}${c_mult}BLAS_ORD($$argsref[0],$$argsref[3],$$argsref[4])*$$argsref[12]*" if ($argnum==11);#C:ldc X (n or m)
1953 return "${z_mult}2*" if (($argnum == 5) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1954 return "${z_mult}2*" if (($argnum == 10) && ($fname =~ /^cblas_[cz].*$/)) ;# beta
1955 }
1956 # void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1957 # const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
1958 # const float alpha, const float *A, const int lda,
1959 # const float beta, float *C, const int ldc);
1960 # ssyrk, dsyrk, csyrk, zsyrk
1961 # cherk, zherk
1962 if ($fname =~ /^cblas_[sdcz]syrk|cblas_[cz]herk$/) {
1963 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1964 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1965 return "${z_mult}${c_mult}BLAS_ORD_T($$argsref[0],$$argsref[2],$$argsref[3],$$argsref[4])*$$argsref[7]*" if ($argnum == 6); # A: lda X (k or n)
1966 return "${z_mult}${c_mult}$$argsref[3]*$$argsref[10]*" if ($argnum == 9); # C : ldc x n
1967 return "${z_mult}2*" if (($argnum == 5) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1968 return "${z_mult}2*" if (($argnum == 8) && ($fname =~ /^cblas_[cz].*$/)) ;# beta
1969 }
1972 #void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
1973 # const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
1974 # const enum CBLAS_DIAG Diag, const int M, const int N,
1975 # const float alpha, const float *A, const int lda,
1976 # float *B, const int ldb);
1977 # strmm, dtrmm, ctrmm, ztrmm, strsm, dtrsm, ctrsm, ztrsm
1978 if ($fname =~ /^cblas_[sdcz]tr[ms]m$/) {
1979 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
1980 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
1981 return "${z_mult}${c_mult}BLAS_SIDE($$argsref[1],$$argsref[5],$$argsref[6])*$$argsref[9]*" if ($argnum == 8); # A: lda x (m or n)
1982 return "${z_mult}${c_mult}BLAS_ORD($$argsref[0],$$argsref[5],$$argsref[6])*$$argsref[11]*" if($argnum== 10);#B:ldb x (n or m)
1983 return "${z_mult}2*" if (($argnum == 7) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
1984 #return "$$argsref[5]*$$argsref[5]*" if ($argnum == 8); # A: MxM
1985 #return "$$argsref[5]*$$argsref[6]*" if ($argnum == 10);# B: MxN
1986 }
1988 # ssyr2k, dsyr2k, csyr2k, zsyr2k
1989 #void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1990 # const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
1991 # const float alpha, const float *A, const int lda,
1992 # const float *B, const int ldb, const float beta,
1993 # float *C, const int ldc);
1994 # cher2k, zher2k
1995 #void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
1996 # const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
1997 # const void *alpha, const void *A, const int lda,
1998 # const void *B, const int ldb, const float beta,
1999 # void *C, const int ldc);
2000 if ($fname =~ /^cblas_[sdcz]syr2k|cblas_[cz]her2k$/) {
2001 $z_mult = "COMP_DBL" if ($fname =~ /^cblas_z.*$/) ;
2002 $c_mult = "2*" if ($fname =~ /^cblas_[cz].*$/) ;
2003 return "${z_mult}${c_mult}BLAS_ORD_T($$argsref[0],$$argsref[2],$$argsref[3],$$argsref[4])*$$argsref[7]*" if ($argnum == 6); # A: lda X (k or n)
2004 return "${z_mult}${c_mult}BLAS_ORD_T($$argsref[0],$$argsref[2],$$argsref[3],$$argsref[4])*$$argsref[9]*" if ($argnum == 8); # A: ldb X (k or n)
2005 return "${z_mult}${c_mult}$$argsref[3]*$$argsref[12]*" if ($argnum == 11); # C : ldc x n
2006 return "${z_mult}2*" if (($argnum == 5) && ($fname =~ /^cblas_[cz].*$/)) ; # alpha
2007 return "${z_mult}2*" if (($argnum == 10) && ($fname =~ /^cblas_[cz].*$/)) ;# beta
2008 }
2009 return ""; # if control reaches here then no modifier
2010 }
2012 sub get_func_specific_offload_var
2013 {
2014 my $func = shift;
2015 my $NAMESPACE = uc($namespace);
2016 $func_name = substr($func, 7);
2018 if (index($blas_L1, "${func_name}|") != -1) {
2019 return "${NAMESPACE}_L1_OFFLOAD";
2020 }
2021 if (index($blas_L3, "${func_name}|") != -1) {
2022 return "${NAMESPACE}_L3_OFFLOAD";
2023 }
2024 if (index($blas_L2, "${func_name}|") != -1) {
2025 return "${NAMESPACE}_L2_OFFLOAD";
2026 }
2027 if (index("${func_name}|", "amax") != -1) {
2028 return "${NAMESPACE}_L1_OFFLOAD";
2029 }
2030 # return "${NAMESPACE}_L1_OFFLOAD" if ($func_name =~ /$blas_L1/);
2031 # return "${NAMESPACE}_L3_OFFLOAD" if ($func_name =~ /$blas_L3/);
2032 # return "${NAMESPACE}_L2_OFFLOAD" if ($func_name =~ /$blas_L2/);
2034 # if no match then use the default offload variable
2035 return "${namespace}_offload";
2036 }
2038 # takes as argument a trimmed string that is a prototype. Semicolon at end does not matter.
2039 # It will generate ARM C++ code that launches an OpenCL kernel code here
2041 sub generate_arm_from_proto($)
2042 {
2043 my $string = shift;
2044 $string =~ s/;//;
2045 my $NAMESPACE = uc($namespace);
2047 # func return type
2048 my @tmp = split /[\(\)]/,$string;
2049 my $tramptype = $tmp[0];
2050 $tramptype =~ s/(.*)\ ([\* ]*).*/$1 $2/;
2051 $tramptype = trim($tramptype);
2053 # func name
2054 my $trampname = $tmp[0];
2055 $trampname =~ s/.*\ \**(.*)$/$1/;
2057 # global side effect!
2058 push @offloaded,$trampname;
2060 # kernel index
2061 my $trampdef = $trampname;
2062 $trampdef = uc $namespace . "_" . (uc $trampname) . "_IDX";
2064 # func args
2065 shift @tmp;
2066 my @args = split /\,/,$tmp[0]; #/;
2067 my $lastargind = $#args;
2068 my @kernelargs;
2069 my @kerneltypes;
2070 # test for a vararg last argument and trunc, ugh
2071 if (($#args > -1) && (trim($args[$#args]) eq "...")) {
2072 $#args--;
2073 $lastargind = $#args;
2074 }
2076 LOOP1:
2077 {
2078 foreach my $arg (@args) {
2079 $arg = trim($arg);
2080 my $argname = $arg;
2081 # get arg name
2082 $argname =~ s/.*\ \**(.*)$/$1/;
2083 $argname = trim($argname);
2084 if ($argname =~ /void/i) {
2085 next LOOP1;
2086 }
2087 push @kernelargs,$argname;
2088 # get arg type
2089 my $argtype = $arg;
2090 $argtype =~ s/(.*)$argname/$1/;
2091 $argtype = trim($argtype);
2092 push @kerneltypes,$argtype;
2093 }
2094 }
2097 my $arm_comment_header = "/****** UNCOMMENT CODE BELOW FOR ARM EXECUTION (NO OFFLOAD) *******";
2098 my $arm_comment_trailer ="******************************************************************/";
2099 #my $arm_func_cond = get_func_based_arm_cond($trampname, \@kernelargs);
2100 my $arm_func_cond = get_offload_decision($trampname, \@kernelargs);
2101 my $arm_condition_code = "";
2102 my $offload_var = get_func_specific_offload_var($trampname);
2103 my $indent = "";
2104 my $arm_end_condition_code = "";
2105 if (!$commentarm) {
2106 $arm_comment_header = "/* Dynamic condtional offload to ARM */";
2107 $arm_comment_trailer= "/* End ARM offload */";
2108 $arm_func_cond = " || (($offload_var == ${NAMESPACE}_OFFLOAD_SIZE) && ($arm_func_cond))" if ($arm_func_cond);
2109 $arm_condition_code .= "if (($offload_var == ${NAMESPACE}_OFFLOAD_NONE)$arm_func_cond) { ";
2110 $indent .= "\t";
2111 $arm_end_condition_code = "}";
2112 }
2113 my $no_offload_arm_call = "";
2114 $no_offload_arm_call .= "$tramptype rval = " unless ($tramptype =~ /^void$/i);
2115 $no_offload_arm_call .= "__real_$trampname(" . join(',', @kernelargs) . "); ";
2116 my $no_offload_arm_return = "return ";
2117 $no_offload_arm_return .= "rval" unless ($tramptype =~ /^void$/i);
2118 $no_offload_arm_return .= ";";
2119 my $arm_real_proto = $string;
2120 $arm_real_proto =~ s/$trampname/__real_$trampname/ ;
2122 my $armcode = <<"ARM_FROM_PROTO";
2123 $source_code_header
2124 #include "${namespace}.h"
2126 #ifdef __cplusplus
2127 extern "C" {
2128 #endif
2129 $arm_real_proto ;
2131 #ifdef __cplusplus
2132 }
2133 extern "C"
2134 #endif
2135 $string
2136 {
2138 /* Do an init on first use */
2139 if (!${namespace}_init_done) ${namespace}_init();
2140 ${NAMESPACE}_DEBUG_PRINT("Intercepted call to %s\\n", "$trampname");
2142 ${NAMESPACE}_PROFILE_START();
2144 $arm_comment_header
2145 $arm_condition_code
2146 ${indent}${NAMESPACE}_DEBUG_PRINT("Executing ARM %s\\n", "$trampname");
2147 ${indent}$no_offload_arm_call
2148 ${indent}${NAMESPACE}_PROFILE_REPORT(" Entire %s call (ARM) took %8.2f us\\n","$trampname", (float) clock_diff);
2149 ${indent}$no_offload_arm_return
2150 $arm_end_condition_code
2151 $arm_comment_trailer
2153 /******************************************************************/
2154 /* DSP offload WILL be done if control reaches here */
2155 ${indent}${NAMESPACE}_DEBUG_PRINT("Offloading to DSP %s\\n", "$trampname");
2157 /* Lookup kernel pointer from global table */
2158 #ifdef __cplusplus
2159 Event e;
2160 Kernel* __K;
2161 #else
2162 cl_kernel __K;
2163 #endif
2164 __K = ${namespace}_get_kernel($trampdef, "ocl_$trampname");
2165 #ifdef __cplusplus
2166 try
2167 #else
2168 cl_int err = CL_SUCCESS;
2169 #endif
2170 {
2172 ARM_FROM_PROTO
2174 my $i = 0;
2175 foreach $arg (@kernelargs) {
2176 if ($kerneltypes[$i] =~ /.*\*\s*/) {
2177 # pointer type arg
2178 my $perms = "CL_MEM_READ_WRITE";
2179 my $modifier = "";
2180 my $argtype = trim($kerneltypes[$i]);
2181 if ($argtype =~ /\s*const\s.*/) {
2182 $armcode .= "\t\t/* For const arguments we use CL_MEM_USE_READ_ONLY */\n";
2183 $perms = "CL_MEM_READ_ONLY";
2184 $modifier = "(void *)"; # needed as Buffer uses void* and not const <type>*
2185 $argtype =~ s/\s*const//; # remove const
2186 }
2187 my $modify_bufsize = get_bufsize_modifier($trampname, $i, \@kernelargs);
2188 #print "modify_bufsize = $modify_bufsize\n";
2189 $argtype =~ s/\*//;
2190 $argtype = trim($argtype);
2191 my $sizeoftype = $argtype;
2192 #$sizeoftype = "float" if ($argtype =~ /void/i);
2193 if ($argtype =~ /void/i) {
2194 $sizeoftype = "float";
2195 if (index($modify_bufsize, "COMP_DBL") != -1) {
2196 $sizeoftype = "double";
2197 $modify_bufsize = $modify_bufsize =~ s/COMP_DBL//r;
2198 }
2199 }
2200 #print "sizeoftype = $sizeoftype\n";
2201 #print "new modify_bufsize = $modify_bufsize\n";
2203 # when modify bufsize is a non-empty, non-constant string, then
2204 # we have to guard against the possibility
2205 # the the buffer size empty because one of the multipliers is 0 (BLAS allows that, for example)
2206 if ($modify_bufsize && ($modify_bufsize !~ /^[0-9*]+$/)) {
2207 $armcode .= "
2208 int size_buf$arg;
2209 size_buf$arg = ${modify_bufsize}sizeof($sizeoftype);
2210 size_buf$arg = MAX(size_buf$arg,1);
2212 #ifdef __cplusplus
2213 Buffer buf_$arg(${namespace}_ocl_context, $perms|CL_MEM_USE_HOST_PTR, size_buf$arg, (void *)$arg);
2214 __K->setArg($i, buf_$arg);
2215 #else
2216 cl_mem buf_$arg = clCreateBuffer(${namespace}_ocl_context, $perms|CL_MEM_USE_HOST_PTR, size_buf$arg, (void *)$arg, &err);
2217 ${NAMESPACE}_OCL_CHKERROR(\"clCreateBuffer\",err);
2218 err |= clSetKernelArg(__K, $i, sizeof(buf_$arg), &buf_$arg);
2219 ${NAMESPACE}_OCL_CHKERROR(\"clSetKernelArg\",err);
2220 #endif
2221 ";
2222 }
2223 else {
2224 $armcode .= "
2225 #ifdef __cplusplus
2226 Buffer buf_$arg(${namespace}_ocl_context, $perms|CL_MEM_USE_HOST_PTR, ${modify_bufsize}sizeof($sizeoftype), (void *)$arg);
2227 __K->setArg($i, buf_$arg);
2228 #else
2229 cl_mem buf_$arg = clCreateBuffer(${namespace}_ocl_context, $perms|CL_MEM_USE_HOST_PTR, ${modify_bufsize}sizeof($sizeoftype), (void *)$arg, &err);
2230 ${NAMESPACE}_OCL_CHKERROR(\"clCreateBuffer\",err);
2231 err |= clSetKernelArg(__K, $i, sizeof(buf_$arg), &buf_$arg);
2232 ${NAMESPACE}_OCL_CHKERROR(\"clSetKernelArg\",err);
2233 #endif
2234 ";
2235 }
2236 }
2237 else {
2238 $armcode .= "
2239 #ifdef __cplusplus
2240 __K->setArg($i, $arg);
2241 #else
2242 err |= clSetKernelArg(__K, $i, sizeof($arg), &$arg);
2243 #endif
2244 ";
2245 }
2246 $i++;
2247 }
2248 # print $i;
2249 # print "\n";
2250 # print "trampname is " . $trampname . "\n";
2251 $kernel_name = substr($trampname, 7);
2252 # print "kernel_name is " . $kernel_name . "\n";
2253 # print "blas_L3 string is ". $blas_L3 . "\n";
2254 if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
2255 print "This is a level 3 function - " . $trampname . "\n";
2257 $i_plus_1 = $i+1;
2258 $armcode .= "
2259 #ifdef __cplusplus
2260 Buffer buf_MSMC(${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_MSMC_TI, MSMC_BUF_SIZE);
2261 __K->setArg($i, buf_MSMC);
2263 #else
2264 cl_mem buf_MSMC = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_MSMC_TI, MSMC_BUF_SIZE, NULL, &err);
2265 TI_CBLAS_OCL_CHKERROR(\"clCreateBuffer\",err);
2266 err |= clSetKernelArg(__K, $i, sizeof(buf_MSMC), &buf_MSMC);
2267 TI_CBLAS_OCL_CHKERROR(\"clSetKernelArg\",err);
2268 #endif
2270 #ifdef __cplusplus
2271 __K->setArg($i_plus_1, __local(L2_BUF_SIZE));
2272 #else
2273 err |= clSetKernelArg(__K, $i_plus_1, L2_BUF_SIZE, NULL);
2274 #endif
2275 ";
2276 }
2277 if ($tramptype !~ /^void$/i) {
2278 $armcode .= "
2279 /* create a buffer argument to get the return value from the DSP */
2280 $tramptype retval;
2281 #ifdef __cplusplus
2282 Buffer buf_retval(${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof($tramptype), &retval);
2283 __K->setArg($i, buf_retval);
2284 #else
2285 cl_mem buf_retval = clCreateBuffer(${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof($tramptype), &retval, &err);
2286 ${NAMESPACE}_OCL_CHKERROR(\"clCreateBuffer\",err);
2287 err |= clSetKernelArg(__K, $i, sizeof(buf_retval), &buf_retval);
2288 ${NAMESPACE}_OCL_CHKERROR(\"clSetKernelArg\",err);
2289 #endif
2290 ";
2291 }
2292 $armcode .= "
2293 #ifdef __cplusplus
2294 ${namespace}_ocl_Q.enqueueTask(*__K, 0, &e);
2295 e.wait();
2296 #else
2297 cl_event e;
2298 err |= clEnqueueTask(${namespace}_ocl_Q, __K, 0, 0, &e);
2299 ${NAMESPACE}_OCL_CHKERROR(\"clEnqueueTask\",err);
2300 err |= clWaitForEvents(1, &e);
2301 ${NAMESPACE}_OCL_CHKERROR(\"clWaitForEvents\",err);
2302 err |= clReleaseEvent(e);
2303 ${NAMESPACE}_OCL_CHKERROR(\"clReleaseEvent\",err);
2305 #endif
2306 ${NAMESPACE}_DEBUG_PRINT(\"Finished executing %s\\n\", \"$trampname\");
2307 ${NAMESPACE}_PROFILE_REPORT(\" Entire %s call (DSP) took %8.2f us\\n\",\"$trampname\", (float) clock_diff);
2308 return ";
2309 $armcode .= "retval" unless ($tramptype =~ /^void$/i);
2310 $armcode .= ";\n";
2311 $armcode .= "\t}\n";
2312 $armcode .= "
2313 #ifdef __cplusplus
2314 catch (Error err)
2315 {
2316 ${namespace}_error(err.what(),err.err());
2317 return ";
2318 $armcode .= "0" unless ($tramptype =~ /^void$/i);
2319 $armcode .= ";\n";
2320 $armcode .= "\t}\n#endif\n";
2321 $armcode .= "}\n";
2323 return $armcode;
2324 }
2327 # Generates the kernel.cl code from the prototype.
2328 # Takes as argument the function prototype string.
2329 sub generate_kernel_from_proto($)
2330 {
2331 my $string = shift;
2333 my $oclcode = "";
2334 my @tmp = split /[\(\)]/,$string;
2335 $oclcode .= "kernel " . ocl_munge($tmp[0]) . "(";
2336 my $trampname = trampoline_munge($tmp[0]);
2337 my $trampproto = $trampname . "(";
2339 my $kernel_name = substr($tmp[0], 12);
2340 # print "In generate_kernel_from_proto, kernel is " . $kernel_name . "\n";
2341 # print "In generate_kernel_from_proto, trampname is " . $trampname . "\n";
2342 my $tramptype = $tmp[0];
2343 $tramptype =~ s/(.*)\ ([\* ]*).*/$1 $2/;
2344 $tramptype = trim($tramptype);
2345 # strip type
2346 $trampname =~ s/.*\ \**(.*)$/$1/;
2348 shift @tmp;
2349 my @args = split /\,/,$tmp[0]; #/;
2350 my $lastargind = $#args;
2351 my @trampargs;
2352 # test for a vararg last argument, ugh
2353 if (($#args > -1) && (trim($args[$#args]) eq "...")) {
2354 $#args--;
2355 $lastargind = $#args;
2356 }
2357 my $comma = "";
2358 LOOP1:
2359 {
2360 foreach my $arg (@args) {
2361 $arg = trim($arg);
2362 my $argname = $arg;
2363 # strip types
2364 $argname =~ s/.*\ \**(.*)$/$1/;
2365 if ($argname =~ /void/i) {
2366 next LOOP1;
2367 }
2368 $comma = ", ";
2369 push @trampargs,$argname;
2370 if ($arg =~ /.*\*.*$/) {
2371 $oclcode .= "global $arg";
2372 $trampproto .= "global $arg";
2373 } else {
2374 $oclcode .= "$arg";
2375 $trampproto .= "$arg"; }
2376 # if it's last argument don't add comma
2377 if ($arg ne trim($args[$lastargind])) {
2378 $oclcode .= ", ";
2379 $trampproto .= ", ";
2380 }
2381 else {
2382 if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
2383 print "In generate_kernel_from_proto, this is a level 3 function - " . $trampname . "\n";
2384 $oclcode .= ", global double *l3_buf, local double *l2_buf_loc";
2385 $trampproto .= ", global double *l3_buf, local double *l2_buf_loc";
2386 }
2387 }
2388 }
2389 }
2390 $oclcode .= "${comma}$tramptype *retval" unless ($tramptype =~ /^void$/i);
2391 $trampproto .= "${comma}$tramptype *retval" unless ($tramptype =~ /^void$/i);
2393 $trampproto .= ");";
2394 $oclcode .= ")\n{ ";
2396 $oclcode .= $trampname . "(";
2398 foreach my $arg (@trampargs) {
2399 $oclcode .= "$arg";
2400 if ($arg ne trim($trampargs[$lastargind])) {
2401 $oclcode .= ", "; }
2402 }
2403 $oclcode .= "${comma}retval" unless ($tramptype =~ /^void$/i);
2405 if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
2406 $oclcode .= ", l3_buf, l2_buf_loc";
2407 }
2408 $oclcode .= "); }";
2409 $oclcode = $trampproto . "\n" . $oclcode;
2410 return $oclcode;
2411 }
2413 sub generate_kernel_prologue
2414 {
2415 my $rc = $source_code_header;
2416 $rc = "$::dep\n" if ($::dep);
2417 return $rc;
2418 }
2420 # generates the top portion of facade.c (c66x code)
2421 # use once (not on per-function basis)
2422 sub generate_facade_prologue
2423 {
2424 my $facade_prologue = <<"FACADE_PROLOGUE";
2425 $source_code_header
2426 #include <stdio.h>
2427 #include "../../cblas/include/cblas.h"
2428 #include "blis.h"
2429 #define DEVICE_K2H
2431 #include <dsp_c.h>
2433 #define getNextMultiple(x, y) ( ( ((x)+(y)-1)/(y) )* (y) )
2434 // L1 buffer is hardwared here
2435 #define L1_BUF_LOC 0x00F00000
2437 // note these pointers must be filled if used functions
2438 char *pool_mk_mem_L1;
2439 char *pool_kn_mem_L1;
2440 char *pool_mn_mem_L1;
2442 char *pool_mk_mem_L2;
2443 char *pool_kn_mem_L2;
2444 char *pool_mn_mem_L2;
2446 char *pool_mk_mem_L3;
2447 char *pool_kn_mem_L3;
2448 char *pool_mn_mem_L3;
2450 FACADE_PROLOGUE
2451 return $facade_prologue;
2452 }
2454 # takes as argument a trimmed string that is a prototype. Semicolon at end does not matter.
2455 # It will generate DSP code like the following:
2456 # void FFT_plan_1d_dp_c2c_facade(int N, global double *in, global double *out,
2457 # global double *tw, int type, int mode, global char* plan)
2458 # { FFT_plan_1d_dp_c2c(N, in, out, tw, type, mode, plan); }
2459 sub generate_facade_from_proto($)
2460 {
2461 my $string = shift;
2463 my @tmp = split /[\(\)]/,$string;
2464 my $dspcode = trampoline_munge($tmp[0]) . "(";
2465 my $trampname = $tmp[0];
2466 my $trampproto = $trampname . "(";
2468 my $tramptype = $tmp[0];
2469 $tramptype =~ s/(.*)\ ([\* ]*).*/$1 $2/;
2470 $tramptype = trim($tramptype);
2471 # strip type
2472 $trampname =~ s/.*\ \**(.*)$/$1/;
2474 shift @tmp;
2475 my @args = split /\,/,$tmp[0]; #/;
2476 my $lastargind = $#args;
2477 my @trampargs;
2478 # test for a vararg last argument, ugh
2479 if (($#args > -1) && (trim($args[$#args]) eq "...")) {
2480 $#args--;
2481 $lastargind = $#args;
2482 }
2483 my $comma = "";
2484 LOOP1:
2485 {
2486 foreach my $arg (@args) {
2487 $arg = trim($arg);
2488 my $argname = $arg;
2489 # strip types
2490 $argname =~ s/.*\ \**(.*)$/$1/;
2491 if ($argname eq "void") {
2492 next LOOP1;
2493 }
2494 $comma = ", ";
2495 push @trampargs,$argname;
2496 $dspcode .= "$arg";
2497 $trampproto .= "$arg";
2498 # if it's last argument don't add comma
2499 if ($arg ne trim($args[$lastargind])) {
2500 $dspcode .= ", ";
2501 $trampproto .= ", ";
2502 }
2503 }
2504 }
2505 $dspcode .= "${comma}$tramptype *retval" unless ($tramptype =~ /^void$/);
2506 $trampproto .= "${comma}$tramptype *retval" unless ($tramptype =~ /^void$/);
2508 $kernel_name = substr($trampname, 7);
2509 if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
2510 $dspcode .= ", float *l3_buf, float *l2_buf_loc";
2511 $trampproto .= ", float *l3_buf, float *l2_buf_loc" ;
2512 }
2514 $trampproto .= ");";
2515 $dspcode .= ")\n{";
2516 # print "trampproto is " . $trampproto . "\n";
2517 # print "dspcode is " . $dspcode . "\n";
2519 # $dspcode .= "\tCACHE_L1Size prevL1 = CACHE_getL1DSize();\n" if ($dspl1);
2520 # $dspcode .= "\tCACHE_setL1DSize(CACHE_L1_${dspl1}KCACHE);\n" if ($dspl1);
2521 # $dspcode .= "\tCACHE_L2Size prevL2 = CACHE_getL2Size();\n" if ($dspl2);
2522 # $dspcode .= "\tCACHE_setL2Size(CACHE_${dspl2}KCACHE);\n" if ($dspl2);
2524 if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
2525 # print "facade code to setup cache for level 3 function ". $trampname ."\n";
2526 $dspcode .= "
2527 pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
2528 pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
2529 pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
2531 pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
2532 pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
2533 pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
2535 pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
2536 pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
2537 pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
2539 #pragma omp parallel
2540 {
2541 __cache_l1d_flush();
2542 __cache_l1d_4k();
2543 }
2544 ";
2545 }
2546 else {
2547 # print "facade code to setup cache for level 1 or 2 function ". $trampname ."\n";
2548 $dspcode .= "
2549 #pragma omp parallel
2550 {
2551 __cache_l2_flush();
2552 __cache_l2_512k();
2553 }
2554 ";
2555 }
2557 # if (!($tramptype =~ /^void$/)) {
2558 # $dspcode .= "return ";
2559 # }
2560 $dspcode .= "\t";
2561 $dspcode .= "*retval = " unless ($tramptype =~ /^void$/);
2562 $dspcode .= $trampname . "(";
2564 foreach my $arg (@trampargs) {
2565 $dspcode .= "$arg";
2566 if ($arg ne trim($trampargs[$lastargind])) {
2567 $dspcode .= ", "; }
2568 }
2569 $dspcode .= ");\n";
2570 # $dspcode .= "\tCACHE_setL1DSize(prevL1);\n" if ($dspl1);
2571 # $dspcode .= "\tCACHE_setL2Size(prevL2);\n" if ($dspl2);
2572 if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
2573 # print "facade code to return default cache for level 3 function ". $trampname ."\n";
2574 $dspcode .= "
2575 #pragma omp parallel
2576 {
2577 __cache_l1d_flush();
2578 __cache_l1d_all();
2579 }
2580 ";
2581 }
2582 else {
2583 # print "facade code to return default cache for level 1 or 2 function ". $trampname ."\n";
2584 $dspcode .= "
2585 // return default L2 cache (128 K)
2586 #pragma omp parallel
2587 {
2588 __cache_l2_flush();
2589 __cache_l2_128k();
2590 }
2591 ";
2592 }
2593 $dspcode .= "}\n";
2594 # $dspcode = $trampproto . "\n" . $dspcode;
2595 return $dspcode;
2596 }
2598 # Generates arm header. Use only once.
2599 sub generate_arm_header()
2600 {
2601 my $i = 0;
2602 my $hdr = "";
2603 my $NAMESPACE = uc($namespace);
2604 $hdr .= "
2605 $source_code_header
2607 #ifndef ${NAMESPACE}_H
2608 #define ${NAMESPACE}_H
2610 #ifdef __cplusplus
2611 #include <cstdlib>
2612 #include <cmath>
2613 #include <cstring>
2614 #include <iostream>
2615 #include <fstream>
2616 #define __CL_ENABLE_EXCEPTIONS
2617 #include <CL/cl.hpp>
2618 using namespace std;
2619 using namespace cl;
2620 /* Both cl and std namespace define size_t, so we must be explicit */
2621 #define size_t ::size_t
2622 #ifndef ${NAMESPACE}_FAT_BINARY
2623 #include \"ocl_util.h\"
2624 #endif
2625 #else
2626 #include <stdio.h>
2627 #include <stdlib.h>
2628 #include <sys/types.h>
2629 #include <sys/stat.h>
2630 #include <unistd.h>
2631 #include <time.h>
2632 #include <math.h>
2633 #include <assert.h>
2634 #include <memory.h>
2635 #include <CL/cl.h>
2636 #include <CL/cl_ext.h>
2637 #endif
2639 extern int ${namespace}_disable_debug;
2641 /* useful macros */
2642 #ifdef ${NAMESPACE}_DEBUG
2643 #define ${NAMESPACE}_DEBUG_PRINT(...) { if (! ${namespace}_disable_debug) { fprintf(stderr,\"$NAMESPACE DEBUG: \"); fprintf(stderr, __VA_ARGS__); } }
2644 #else
2645 #define ${NAMESPACE}_DEBUG_PRINT(...)
2646 #endif
2648 #define ${NAMESPACE}_ERROR_PRINT(...) { fprintf(stderr,\"$NAMESPACE ERROR: \"); fprintf(stderr, __VA_ARGS__); }
2649 #define ${NAMESPACE}_ERROR_EXIT(...) { fprintf(stderr,\"$NAMESPACE ERROR: \"); fprintf(stderr, __VA_ARGS__); exit(1); }
2650 #define ${NAMESPACE}_OCL_CHKERROR(A, B) if (B != CL_SUCCESS) { ${NAMESPACE}_ERROR_PRINT(\"opencl %s, error %d\\n\", A, B); exit(B); }
2652 #define MIN(a,b) ((a) < (b) ? (a) : (b))
2653 #define MAX(a,b) ((a) > (b) ? (a) : (b))
2654 #define XOR(a,b) ((!(a)) != (!(b)))
2655 #define XOR3(a,b,c) (((!(a)) != (!(b))) == (!(c)))
2657 /* Profiling support */
2658 #ifdef ${NAMESPACE}_PROFILE
2659 #define ${NAMESPACE}_CLOCK CLOCK_REALTIME
2660 #define clock_diff ((clock2.tv_sec-clock1.tv_sec)*1e6 + (clock2.tv_nsec-clock1.tv_nsec)/1e3)
2661 #define ${NAMESPACE}_PROFILE_PRINT(...) fprintf(stderr,\"$NAMESPACE PROFILE: \"); fprintf(stderr, __VA_ARGS__);
2662 #define ${NAMESPACE}_PROFILE_START() struct timespec clock1; clock_gettime(${NAMESPACE}_CLOCK, &clock1);
2663 #define ${NAMESPACE}_PROFILE_REPORT(...) struct timespec clock2; clock_gettime(${NAMESPACE}_CLOCK, &clock2); ${NAMESPACE}_PROFILE_PRINT(__VA_ARGS__)
2665 #else /* ${NAMESPACE}_PROFILE */
2666 #define ${NAMESPACE}_PROFILE_PRINT(...)
2667 #define ${NAMESPACE}_PROFILE_START()
2668 #define ${NAMESPACE}_PROFILE_REPORT(...)
2669 #define ${NAMESPACE}_PROFILE_OCL_REPORT()
2670 #endif /* ${NAMESPACE}_PROFILE */
2672 #ifdef __cplusplus
2673 extern \"C\" {
2674 #endif
2675 $::dep
2676 #include \"cblas.h\"
2677 #ifdef __cplusplus
2678 }
2679 #endif
2681 /* offload control */
2682 /* NONE: Execute on ARM only */
2683 /* DSP : Force offload to DSP */
2684 /* SIZE:Decision to offload or not is decided based on size */
2685 #define ${NAMESPACE}_OFFLOAD_NONE 0
2686 #define ${NAMESPACE}_OFFLOAD_DSP 1
2687 #define ${NAMESPACE}_OFFLOAD_SIZE 2
2689 /* Global functions and variables */
2690 extern void ${namespace}_error(const char* msg, int code);
2691 extern void ${namespace}_init(void);
2692 #ifdef __cplusplus
2693 extern Kernel* ${namespace}_get_kernel(int idx, const char *fname);
2694 extern Context ${namespace}_ocl_context;
2695 extern std::vector<Device> ${namespace}_ocl_devices;
2696 extern CommandQueue ${namespace}_ocl_Q;
2697 extern Program::Binaries ${namespace}_ocl_binary;
2698 extern Program ${namespace}_ocl_program;
2699 extern Kernel* ${namespace}_ocl_kernels[];
2700 #else
2701 extern cl_kernel ${namespace}_get_kernel(int idx, const char *fname);
2702 extern cl_context ${namespace}_ocl_context;
2703 extern cl_command_queue ${namespace}_ocl_Q;
2704 extern cl_program ${namespace}_ocl_program;
2705 extern cl_kernel ${namespace}_ocl_kernels[];
2706 #endif
2708 extern int ${namespace}_init_done;
2709 extern int ${namespace}_kernel_valid[];
2710 extern int ${namespace}_offload;
2712 ";
2714 foreach my $o (@offloaded) {
2715 $hdr .= "#define " . uc $namespace . "_" . uc $o . "_IDX $i\n";
2716 $i++;
2717 }
2718 $hdr .= "#define " . uc $namespace . "_" . "NUM_KERNELS $i\n\n";
2720 $hdr .= "
2721 #define NUM_PNT_EACH_DIM 16
2722 #define GEMM_OFFLOAD_TBL_SIZE (NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM)
2723 #define SYRK_OFFLOAD_TBL_SIZE (NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM)
2724 #define TRMM_OFFLOAD_TBL_SIZE (NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM)
2725 #define TRSM_OFFLOAD_TBL_SIZE (NUM_PNT_EACH_DIM*NUM_PNT_EACH_DIM)
2726 ";
2727 # BLAS-specific code
2728 if ($header =~ /blas|lapack/) {
2729 $hdr .= "
2730 /* compile time defaults */
2731 #ifndef ${NAMESPACE}_OFFLOAD
2732 #define ${NAMESPACE}_OFFLOAD \"$offload\"
2733 #endif
2735 /* macros used for BLAS/LAPACK buffer size calculations */
2736 #define BLAS_ORD(Order,a,b) ((Order==CblasRowMajor)? (a):(b))
2737 #define BLAS_TRN(Trans,a,b) ((Trans==CblasNoTrans)? (a):(b))
2738 #define BLAS_SIDE(Side,a,b) ((Side==CblasLeft)?(a):(b))
2739 #define BLAS_ORD_T(Order,Trans,a,b) (XOR((Order==CblasRowMajor),(Trans==CblasNoTrans))?(b):(a))
2740 #define BLAS_ORD_S(Order,Side,a,b) (XOR((Order==CblasRowMajor),((Side=='L')||(Side=='l')||(Side==CblasLeft)))?(b):(a))
2741 #define BLAS_ORD_TS(Order,Trans,Side,a,b) (XOR3((Order==CblasRowMajor),(Trans==CblasNoTrans),((Side=='L')||(Side=='l')||(Side==CblasLeft)))?(b):(a))
2743 ";
2744 }
2745 for (my $i=0; $i<3; $i++) {
2746 my $lvl = $i+1;
2747 $hdr .= "extern int ${NAMESPACE}_L${lvl}_OFFLOAD;\n" if ($header =~ /blas/);
2748 }
2750 $hdr .= "
2751 // allocated MSMC and L2 buffer sizes;
2752 // be careful to allocate enough so memory overrun
2753 // does not happen in BLIS/BLAS calls
2754 //#define L2_BUF_SIZE 0x84000
2755 //L2 Cache
2756 //MK: Pool Size 550976, Num Blocks 2, Block size 275424
2757 //KN: Pool Size 128, Num Blocks 0, Block size 3071640
2758 //MN: Pool Size 128, Num Blocks 0, Block size 1400832
2759 //0x86940=551232 = 550976+128+128
2760 //#define L2_BUF_SIZE 0x86940
2761 //#define L2_BUF_SIZE 0x862A0
2762 //Added 3 MNR buffers for C. and increasing Kc for Z
2763 #define L2_BUF_SIZE 0xBFE00
2764 //#define L2_BUF_SIZE 0xBF980 // MR=NR=4 for S
2767 //L3 Cache
2768 //MK: Pool Size 128, Num Blocks 0, Block size 275424
2769 //KN: Pool Size 6143536, Num Blocks 2, Block size 3071640
2770 //MN: Pool Size 128, Num Blocks 0, Block size 1400832
2771 //0x5DBF30 = 6143792=6143536+128+128
2772 //ccs map file says 5dbf40
2773 //Changed KC values to fit in the 4.5M of MSMC
2774 //4647B0
2775 // 0x4664B4
2776 // Within 4.5M
2777 //#define MSMC_BUF_SIZE 0x4647C0
2778 #define MSMC_BUF_SIZE 0x47FDC0
2779 //#define MSMC_BUF_SIZE 0x47F100 // MR=NR=4 for S
2782 //DSPBLIS
2783 //#define MSMC_BUF_SIZE 0x400000
2785 extern char ofld_tbl_sgemm[GEMM_OFFLOAD_TBL_SIZE];
2786 extern char ofld_tbl_dgemm[GEMM_OFFLOAD_TBL_SIZE];
2787 extern char ofld_tbl_cgemm[GEMM_OFFLOAD_TBL_SIZE];
2788 extern char ofld_tbl_zgemm[GEMM_OFFLOAD_TBL_SIZE];
2789 extern char ofld_tbl_ssyrk[SYRK_OFFLOAD_TBL_SIZE];
2790 extern char ofld_tbl_dsyrk[SYRK_OFFLOAD_TBL_SIZE];
2791 extern char ofld_tbl_csyrk[SYRK_OFFLOAD_TBL_SIZE];
2792 extern char ofld_tbl_zsyrk[SYRK_OFFLOAD_TBL_SIZE];
2793 extern char ofld_tbl_strmm[TRMM_OFFLOAD_TBL_SIZE];
2794 extern char ofld_tbl_dtrmm[TRMM_OFFLOAD_TBL_SIZE];
2795 extern char ofld_tbl_ctrmm[TRMM_OFFLOAD_TBL_SIZE];
2796 extern char ofld_tbl_ztrmm[TRMM_OFFLOAD_TBL_SIZE];
2797 extern char ofld_tbl_strsm[TRSM_OFFLOAD_TBL_SIZE];
2798 extern char ofld_tbl_dtrsm[TRSM_OFFLOAD_TBL_SIZE];
2799 extern char ofld_tbl_ctrsm[TRSM_OFFLOAD_TBL_SIZE];
2800 extern char ofld_tbl_ztrsm[TRSM_OFFLOAD_TBL_SIZE];
2801 extern int sgemm_offload_dsp(const enum CBLAS_ORDER Order, int M, int N, int K);
2802 extern int dgemm_offload_dsp(const enum CBLAS_ORDER Order, int M, int N, int K);
2803 extern int cgemm_offload_dsp(const enum CBLAS_ORDER Order, int M, int N, int K);
2804 extern int zgemm_offload_dsp(const enum CBLAS_ORDER Order, int M, int N, int K);
2805 extern int ssymm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2806 int M, int N);
2807 extern int dsymm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2808 int M, int N);
2809 extern int csymm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2810 int M, int N);
2811 extern int zsymm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2812 int M, int N);
2813 extern int chemm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2814 int M, int N);
2815 extern int zhemm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2816 int M, int N);
2817 extern int ssyrk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2818 extern int dsyrk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2819 extern int csyrk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2820 extern int zsyrk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2821 extern int cherk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2822 extern int zherk_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2823 extern int ssyr2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2824 extern int dsyr2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2825 extern int csyr2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2826 extern int zsyr2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2827 extern int cher2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2828 extern int zher2k_offload_dsp(const enum CBLAS_ORDER Order, int N, int K);
2829 extern int strmm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2830 int M, int N);
2831 extern int dtrmm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2832 int M, int N);
2833 extern int ctrmm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2834 int M, int N);
2835 extern int ztrmm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2836 int M, int N);
2837 extern int strsm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2838 int M, int N);
2839 extern int dtrsm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2840 int M, int N);
2841 extern int ctrsm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2842 int M, int N);
2843 extern int ztrsm_offload_dsp(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
2844 int M, int N);
2845 ";
2847 $hdr .= "\n#endif\n";
2848 return $hdr;
2849 }
2851 sub size_based_offload_header_prologue
2852 {
2853 my $NAMESPACE = uc($namespace);
2854 my $sz_hdr = "/* This header contains kernel-specific offload thresholds (if any) */\n" .
2855 "/* It is only used when size-based offload is enabled (002) */\n";
2856 $sz_hdr .= "
2857 /* Default min offload size. Matrices smaller than this will be executed on ARM */
2858 #ifndef ${NAMESPACE}_OFFLOAD_MIN
2859 #define ${NAMESPACE}_OFFLOAD_MIN $OFFLOAD_MIN
2860 #endif
2861 " if ($OFFLOAD_MIN ne "");
2862 $sz_hdr .= "
2863 /* Default max offload size. Matrices larger than this will be executed on ARM */
2864 #ifndef ${NAMESPACE}_OFFLOAD_MAX
2865 #define ${NAMESPACE}_OFFLOAD_MAX $OFFLOAD_MAX
2866 #endif
2867 " if ($OFFLOAD_MAX ne "");
2869 $sz_hdr .= "\n/* Below are offload thresholds for each kernel */\n";
2870 return $sz_hdr;
2871 }
2875 # Arg1: Reference to array of function names in header
2876 # Arg2: Regex of accept filter (optional). Defaults to accept ALL if empty string.
2877 # Arg3: Regex of reject filter (optional). Defaults to reject NONE if empty string.
2878 sub generate_code
2879 {
2880 my $arrayref = shift;
2881 my $accept_filter = shift;
2882 my $reject_filter = shift;
2883 my $NAMESPACE = uc($namespace);
2885 generate_top_code();
2886 foreach my $t (@{$arrayref}) {
2887 print "Found function: |$t|\n" unless ($f);
2888 if ($accept_filter) {
2889 next unless ($t =~ m/$accept_filter/);
2890 }
2891 if ($reject_filter) {
2892 next if ($t =~ m/$reject_filter/);
2893 }
2894 chomp $t;
2895 my $_fname = $t;
2896 $_fname =~ /^\s*\S+\s+(\S+)\s*\(/; $_fname = $1;
2897 my $_FNAME = uc($_fname);
2898 print "codegen for $_fname\n";
2899 print "OCL code (${namespace}_kernel.cl):\n" unless ($f);
2900 write_output(generate_kernel_from_proto(trim($t)), "${namespace}_kernel.cl");
2901 print "DSP code (append to facade.c):\n" unless ($f);
2902 write_output(generate_facade_from_proto(trim($t)), "facade.c");
2903 print "ARM code:\n" unless ($f);
2904 write_output(generate_arm_from_proto(trim($t)),"${namespace}_$offloaded[-1]" . ".c");
2906 # per-kernel size thresholds
2907 #write_output("#define ${NAMESPACE}_${_FNAME}_OFFLOAD_MIN\t${NAMESPACE}_OFFLOAD_MIN\n" .
2908 # "#define ${NAMESPACE}_${_FNAME}_OFFLOAD_MAX\t${NAMESPACE}_OFFLOAD_MAX\n",
2909 # "${namespace}_offload_sizes.h");
2910 #print "Makefile code (append to Makefile):\n" unless ($f);
2911 #write_output(generate_makefile_from_proto(trim($t)),"Makefile");
2912 }
2913 generate_bottom_code();
2914 }
2916 sub generate_top_code
2917 {
2918 print "DSP facade.c prologue:\n" unless ($f);
2919 write_output(generate_facade_prologue(),"facade.c");
2920 print "DSP facade.c generated.\n";
2921 print "DSP ${namespace}_kernel.cl prologue:\n" unless ($f);
2922 write_output(generate_kernel_prologue(),"${namespace}_kernel.cl");
2923 print "DSP ${namespace}_kernel.cl generated.\n";
2924 write_output(get_enums_and_defines(),"${namespace}_kernel.cl");
2925 print "ARM ${namespace}_initfini.c code:\n" unless ($f);
2926 write_output(generate_arm_init(), "${namespace}_initfini.c");
2927 #print "Makefile:\n" unless ($f);
2928 #write_output(generate_makefile_prologue($package, get_base_libname($armlib)), "Makefile");
2929 #print "${namespace}_offload_sizes.h:\n" unless ($f);
2930 #write_output(size_based_offload_header_prologue(), "${namespace}_offload_sizes.h");
2931 }
2933 sub generate_bottom_code
2934 {
2935 print "ARM header code (${namespace}.h):\n" unless ($f);
2936 write_output(generate_arm_header(), "${namespace}.h");
2937 #print "Makefile:\n" unless ($f);
2938 #write_output(generate_makefile_epilogue($package, get_base_libname($armlib), $libdeps), "Makefile");
2939 }
2941 sub write_output
2942 {
2943 my $outstr = shift;
2944 my $filename = shift;
2945 if (!defined($f)) {
2946 print "\n". $outstr . "\n";
2947 }
2948 else {
2949 open(OUTFILE, ">>$filename") || die "Could not open $filename for writing : $!\n";
2950 print OUTFILE $outstr . "\n";
2951 close OUTFILE;
2952 }
2953 }
2955 # checks whether depedencies are fulfilled prior to running the script
2956 sub check_deps
2957 {
2958 ( -x $CTAGS ) || die "Could not find $CTAGS. Please install ctags (apt-get install ctags OR yum install ctags)\n";
2959 }
2961 # returns an array containing function prototypes after reading in and
2962 # parsing the header
2963 sub get_func_protos()
2964 {
2965 #print "cat $header | tr '\n' ' ' | sed 's/\\([{};]\\)/\\1\\n/g' > /tmp/$$.h";
2966 system("cat $header | tr '\n' ' ' | sed 's/\\([{};]\\)/\\1\\n/g' > /tmp/$$.h") && die "Could not generate tmp file: /tmp/$$.h for writing : $!\n";
2967 open(CTAGS_OUT, "$CTAGS -x --c-kinds=fp /tmp/$$.h |") || die "Could not open pipe for reading ctags output: $!\n";
2968 local @protos ;
2969 while (<CTAGS_OUT>) {
2970 #cblas_ztrsv prototype 92 x.h void cblas_ztrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX);
2971 # pick all the text from the fifth field onwards
2972 s/^\s*\S+\s+\S+\s+\d+\s+\S+\s+(.*)$/$1/;
2973 s/\/\*.*\*\///g; # remove C comments
2974 s/\/\/.*//g ; # remove C++ comments
2975 #print "$_";
2976 push(@protos, $_);
2977 }
2978 close(CTAGS_OUT);
2979 unlink("/tmp/$$.h");
2980 print @protos;
2981 return \@protos;
2982 }
2984 # copy include makefiles from
2985 sub copy_make_includes
2986 {
2987 my $base = shift;
2988 print "Copying include makefiles..\n";
2989 for my $file ("make.inc","opencl-make.inc") {
2990 next unless (-f "$base/$file");
2991 link "$base/$file",$file
2992 }
2994 }
2996 #### main starts here ###
2998 $script = $0; chomp $script;
2999 $scriptargs = "@ARGV";
3001 my $scriptdir = `dirname $0`; chomp $scriptdir;
3003 my $usage = "
3005 $script [-h] [-f] [-fatbin] [-dep=</path/to/depheader] \\
3006 [-inc=\"include-pattern\"] [-ex=\"exclude pattern\"] \\
3007 <package> </path/to/header> </path/to/armlib> [dep1] [dep2]..
3009 -f : Generate actual files instead of spewing out everything to stdout (Recommended)
3010 -h : This help message
3011 -fatbin : Generate a fat binary rather than the kernel.out. Note, the fat binary approach
3012 WILL fail by running out tmp space due to the large header generated if you
3013 generate all blas functions (Recommended not to use this option)
3014 -dep=<header> This dependency header will be included in ARM sources files
3015 and DSP facade.c as well kernel .cl file
3016 -inc=<regex>:Include regex. If set, only functions mathing this will be intercepted
3017 By default, this is assumed to be empty, and all functions are in header
3018 are intercpted.
3019 -ex=<regex>: Exclude regex. If set, functions mathing this will not be intercepted.
3020 <package> : The package name, for e.g., blas. The package MUST exist under /usr/src/dsp
3021 -commentarm: Comment out the ARM exection code. This means all intercepted functions
3022 will be offloaded and the user will need to manually uncomment the code for
3023 executing any intercepted functions on ARM.
3024 -dspl1=<KB>: Set DSP L1 to the specified size in KB prior to offload. Cache size will be
3025 automatically restored to the original value on return from the offload.
3026 This argument should ONLY be used if the underlying DSP kernel expects a
3027 different L1 cache size than the OpenCL default.
3028 -dspl2=<KB>: Set DSP L2 to the specified size in KB prior to offload. Cache size will be
3029 automatically restored to the original value on return from the offload.
3030 This argument should ONLY be used if the underlying DSP kernel expects a
3031 different L2 cache size than the OpenCL default.
3032 -offload=<value>: Default offload setting. If unset, this is assumed to a value to ensure
3033 size-based offload of all intercepted functions. The value can be overridden using
3034 the environment variable <NAMESPACE>_OFFLOAD. The NAMESPACE is derived
3035 using the header name. For cblas, NAMESPACE=TI_CBLAS, and the
3036 environment variable is TI_CBLAS_OFFLOAD.
3037 <header> : Full path to header, e.g., ./cblas.h
3038 <armlib> : Full path to the ARM library. This will not be modified. A copy of this will
3039 be created in the working directory, and select (or all) functions in it will
3040 be intercepted. E.g., ../blas/acc/test/arm-blas/libcblas.a
3041 <deps> : Space separate dependency libraries needed for the DSP compile. These will be
3042 searched for in /usr/src/dsp/<package>. E.g., libtstatlas.a libatlas.a
3044 BLAS/LAPACK-specific arguments:
3045 -offmin=<nelems>: <nelems> is the number of elements in the result vector/matrix
3046 below which execution is done on ARM.
3047 e.g., -offmin=1000 will offload BLAS calls to DSP
3048 only if result matrix/vector has a size >= 1000.
3049 -offmax=<nelems>: <nelems> is the number of elements in the result vector/matrix
3050 beyond which execution is done on ARM.
3051 e.g., -offmax=100000 will execute on ARM for result matrix > 10K elements.
3052 -f2cwraplib=<path to lib> Path to library containing f2c wrapper functions (if any)
3055 EXAMPLES
3056 ========
3058 Offload gemm/gemv except cgemm/zgemm:
3059 $script -f -inc=\"cblas_.gem.\" -ex=\"cblas_[cz]gemm\" \\
3060 blas /path/to/cblas.h /path/to/libcblas.a libtstatlas.a libatlas.a
3062 Offload L1 BLAS:
3063 $script -f -inc=\"${blas_prefix}($blas_L1)\" blas /path/to/cblas.h /path/to/libcblas.a libtstatlas.a libatlas.a
3065 Offload L2 BLAS:
3066 $script -f -inc=\"${blas_prefix}($blas_L2)\" blas /path/to/cblas.h /path/to/libcblas.a libtstatlas.a libatlas.a
3068 Offload ALL BLAS:
3069 $script -f blas /path/to/cblas.h /path/to/libcblas.a libtstatlas.a libatlas.a
3071 Offload L3 with dynamic offload to DSP for 100 < result matrix size < 10000,
3072 also, set L1/L2 cache sizes to 4KB/128KB:
3073 $script -f -dspl1=4 -dspl2=128 -offmin=100 -offmax=10000 -inc=\"$blas_prefix($blas_L3)\" blas /path/to/cblas.h /path/to/libcblas.a libtstatlas.a libatlas.a
3075 Recommended settings for PARALLEL BLAS offload:
3076 $script -f -fatbin -offmax=10000000 -offmin=10000 -offload=002 blas /path/to/cblas.h /path/to/arm/libptcblas.a libtstatlas.a libatlas.a
3078 Recommended settings for SERIAL BLAS offload:
3079 $script -f -offload=002 -fatbin -offmin=10000 -offmax=10000000 blas /path/to/cblas.h /path/to/arm/libcblas.a libtstatlas.a libatlas.a
3081 Recommended settings for LAPACK offload:
3082 $script -f -dep=../../blaswrap.h,../../f2c.h -fatbin -offload=002 -offmin=10000 -offmax=10000000 -f2cwraplib=../../prebuilt/lib/libcblaswr.a blas ../../clapack.h ../../prebuilt/lib/liblapack.a libf2c.a libcblaswr.a libtmglib.a libcblas.a libatlas.a
3084 ";
3086 $::ex = $::ex || "";
3087 $::inc = $::inc || "";
3088 $::dep = $::dep || "";
3089 $::exfile = $::exfile || "";
3091 $h = $::h || "";
3092 $fatbin = $::fatbin || "";
3093 $commentarm = $::commentarm || 0;
3094 $f = $::f || "";
3096 if ($::dep) {
3097 my @deps = split(/,/, $::dep);
3098 $::dep = "";
3099 for $_d (@deps) {
3100 $::dep .= "#include \"$_d\"\n";
3101 }
3102 }
3104 $::offload = $::offload || "001"; # offload all intercepted functions
3105 $offload = $::offload;
3107 $::f2cwraplib=$::f2cwraplib || ""; # path to f2c wrapper library if any
3109 # below is a hack to introduce conditional code for execution on ARM
3110 # if resultant matrix contains more elements than the value specified
3111 # below. This flag will not be documented as it's an ugly hack and may be
3112 # removed at any time
3114 # 0 => Level 1
3115 # 1 => Level 2
3116 # 2 => Level 3
3117 $::offmin = $::offmin || "";
3118 $::offmax = $::offmax || "";
3119 $OFFLOAD_MIN = $::offmin if ($::offmin ne "");
3120 $OFFLOAD_MAX = $::offmax if ($::offmax ne "");
3122 # cache size for DSP
3123 # In KB. So -dspl1=4 means set L1 to 4KB
3124 $dspl1 = $::dspl1 || "";
3125 $dspl2 = $::dspl2 || "";
3127 # set scriptargs
3128 $scriptargs = "-dspl1=$dspl1 $scriptargs" if $dspl1;
3129 $scriptargs = "-dspl2=$dspl2 $scriptargs" if $dspl2;
3130 $scriptargs = "-inc=$::inc $scriptargs" if $::inc;
3131 $scriptargs = "-ex=$::ex $scriptargs" if $::ex;
3132 $scriptargs = "-offload=$offload $scriptargs" if $offload;
3133 $scriptargs = "-offmin=$offmin $scriptargs" if $offmin;
3134 $scriptargs = "-offmax=$offmax $scriptargs" if $offmax;
3135 $scriptargs = "-fatbin $scriptargs" if $fatbin;
3136 $scriptargs = "-l3min=$l3min $scriptargs" if $l3min;
3137 $scriptargs = "-l3max=$l3max $scriptargs" if $l3max;
3138 $scriptargs = "-f2cwraplib=$f2cwraplib $scriptargs" if $f2cwraplib;
3139 $scriptargs = "-commentarm $scriptargs" if $commentarm;
3140 $scriptargs = "-f $scriptargs" if $f;
3143 my $inc_regex = $::inc ; # defaults to empty => intercept all
3144 my $ex_regex = $::ex ; # defaults to empty => exclude none
3147 $package = shift || die $usage; chomp $package;
3148 $header = shift || die $usage; chomp $header;
3149 #$armlib = shift || die $usage; chomp $armlib; # /path/to/library.a
3150 ( -f "$header" ) || die "$header does not exist";
3151 #( -f "$armlib" ) || warn "$armlib does not exist";
3152 $libdeps = "@ARGV"; chomp $libdeps;
3155 # derive namespace using header name
3156 $namespace = `basename $header`; chomp $namespace; $namespace =~ s/^(.*)\.h/ti_$1/;
3157 $NAMESPACE = uc($namespace);
3159 check_deps();
3161 if (defined($f)) {
3162 # for $tmpfile ("facade.c","facade.obj","${namespace}_kernel.cl", "${namespace}.h") {
3163 # next unless (-f $tmpfile);
3164 # print "Removing $tmpfile\n" ; unlink($tmpfile);
3165 # }
3166 #unlink glob "*.o";
3167 unlink glob "facade.c";
3168 unlink glob "ti_cblas_initfini.c";
3169 unlink glob "ti_cblas_kernel.cl";
3170 unlink glob "ti_cblas_cblas*.c";
3171 unlink glob "ti_cblas.h";
3172 #unlink glob "*.inc";
3173 #unlink glob "*.a";
3174 }
3176 generate_code(get_func_protos(), $inc_regex, $ex_regex);
3177 copy_make_includes($scriptdir);
3178 link "$scriptdir/rename_f2c","./rename_f2c" if ($::f2cwraplib);
3179 print "Default offload setting: $offload (override using ${NAMESPACE}_OFFLOAD)\n";
3181 exit 0;