summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 205b746)
raw | patch | inline | side by side (parent: 205b746)
author | Jianzhong Xu <a0869574local@uda0869574b> | |
Mon, 14 Dec 2015 21:20:19 +0000 (16:20 -0500) | ||
committer | Jianzhong Xu <a0869574local@uda0869574b> | |
Mon, 14 Dec 2015 21:20:19 +0000 (16:20 -0500) |
173 files changed:
diff --git a/Makefile b/Makefile
index b78f243acff5c338025c5a4574cf5b4827b805a6..a6c382ec98c4f23eb20581e7f912b9cfbac8b183 100644 (file)
--- a/Makefile
+++ b/Makefile
ifeq ($(MEM_MODEL),Large)
BLIS_CFG = c66xLarge
-else ifeq ($(TARGET),Medium)
+else ifeq ($(MEM_MODEL),Medium)
BLIS_CFG = c66xMedium
-else ifeq ($(TARGET),Small)
+else ifeq ($(MEM_MODEL),Small)
BLIS_CFG = c66xSmall
endif
-build: ARMonly
-prebuild: DSPonly
-linalg: ARMplusDSP
-
-ARMonly:
- cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/arm cortex-a15; make -j8; make install; \
- cd ../$(LINALG_BLASACC_DIR)/src; make -f Makefile.ARM; \
- cd ../../$(LINALG_CLAPACK_DIR); make f2clib; make cblaswrap; cd SRC; make
-
-ARMplusDSP_K2H:
- cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; make arch=C66 alllib; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/c66xLarge c66x; make mem_model=Large -j8; make install; \
- ./configure -p install/arm cortex-a15; make -j8; make install; \
- cd ../$(LINALG_TICBLAS_DIR)/src; make mem_model=Large; cd ..; \
- cd ../$(LINALG_BLASACC_DIR)/src; make mem_model=Large; cd..; \
- cd ../$(LINALG_CLAPACK_DIR); make f2clib; make cblaswrap; cd SRC; make -j8
-
-ARMplusDSP_AM57x:
- cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; make arch=C66 alllib; \
- cd ../$(LINALG_TICBLAS_DIR)/src; make MEM_MODEL=Small; cd ..; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/am57x am57x; make -j8; make install; \
- ./configure -p install/arm cortex-a15; make -j8; make install; \
- cd ../$(LINALG_BLASACC_DIR); make crossAM57x; \
- cd ../$(LINALG_CLAPACK_DIR); make f2clib; make cblaswrap; cd SRC; make -j8
-
DSPlibs:
cd $(LINALG_CBLAS_DIR); make arch=C66 alllib; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make mem_model=$(MEM_MODEL) -j8; make install; \
- cd ../$(LINALG_TICBLAS_DIR)/src; make mem_model=$(MEM_MODEL); cd ..; \
+ cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); make install; \
+ cd ../$(LINALG_TICBLAS_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); cd ..;
ARMplusDSP:
cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; \
cd ../$(LINALG_BLIS_DIR); ./configure -p install/arm cortex-a15; make -j8; make install; \
- cd ../$(LINALG_BLASACC_DIR)/src; make mem_model=Large; cd ..; \
+ cd ../$(LINALG_BLASACC_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET); cd ..; \
cd ../$(LINALG_CLAPACK_DIR); make f2clib; make cblaswrap; cd SRC; make -j8
+
+cleanDSPlibs:
+ cd $(LINALG_CBLAS_DIR); make clean; \
+ cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 clean; \
+ cd ../$(LINALG_TICBLAS_DIR)/src; make clean; cd ..;
+
#DSPonly:
BLIStest:
cd $(LINALG_CLAPACK_DIR)/TESTING/EIG; make
-cleanARMplusDSP_K2H:
+cleanARMplusDSP:
cd $(LINALG_CBLAS_DIR); make arch=ARM clean; make arch=C66 clean; \
cd ../$(LINALG_TICBLAS_DIR)/src; make clean; cd ..; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/c66x c66x; make clean; \
+ cd ../$(LINALG_BLIS_DIR); ./configure -p install/Large c66x; make clean; \
./configure -p install/arm cortex-a15; make clean; \
cd ../$(LINALG_BLASACC_DIR); make clean; \
cd ../$(LINALG_BLIS_DIR)/testsuite; make clean; \
index 59809a855f3c277e70d2193d506cfe7e200709a7..0ce5362734499937cbf72736f2bf9b77f796f630 100644 (file)
--- a/blasblisacc/src/Makefile
+++ b/blasblisacc/src/Makefile
include ../../make.inc
-PATH:=$(TI_OCL_CGT_INSTALL)/bin:$(PATH)
+PATH:=$(CGTROOT)/bin:$(PATH)
# Defines
TI_CBLAS_FAT_BINARY = 1
ARM_PLUS_DSP_LIB_DIR = ../lib
-INCDIR := $(TI_OCL_CGT_INSTALL)/include
+INCDIR := $(CGTROOT)/include
INCDIR += -I$(OMP_DIR)/packages/ti/runtime/openmp
INCDIR += -I$(FC_DIR)/packages
INCDIR += -I$(XDC_DIR)/packages
INCDIR += -I$(XDAIS_DIR)/packages
INCDIR += -I$(LIBARCH_DIR)
INCDIR += -I$(PDK_DIR)/packages
+INCDIR += -I$(TI_OCL_INSTALL_DIR)
INCS = -I. -I$(strip $(subst ;, -I,$(subst $(space),$(space),$(INCDIR))))
OBJS = ti_cblas_initfini.o
CPP_DEBUG = -g
-CPP_FLAGS = -D_LITTLE_ENDIAN -D__ARMv7 -DSOC_K2H -I../../cblas/include -I../../blis/install/arm/include/blis/ -I$(TI_OCL_INSTALL_DIR)/include -fopenmp
-CL6X_FLAGS = $(INCS) --openmp --use_g2 -DSOC_K2H -DLIB_OPENCL
+CPP_FLAGS = -D_LITTLE_ENDIAN -D__ARMv7 -D$(TARGET) -I../../cblas/include -I../../blis/install/arm/include/blis/ -I$(TI_OCL_INSTALL_DIR)/include -fopenmp
+CL6X_FLAGS = $(INCS) --openmp --use_g2 -D$(TARGET) -DLIB_OPENCL
CLOCL_FLAGS =
OBJCOPY_ARGS=
ARM_PLUS_DSP_LIB= $(ARM_PLUS_DSP_LIB_DIR)/libcblas_armplusdsp.a
# OpenCL libraries included in make.inc
LIBS += -lpthread
-# CBLAS and BLIS directories
-CBLAS_DSP_LIB = ../../cblas/lib/C66/libcblas_C66.ae66
-TICBLAS_DSP_LIB = ../../ticblas/lib/libticblas.a66x
-CBLAS_ARM_LIB = ../../cblas/lib/ARM/libcblas_ARM.a
-LIBARCH_LIB = $(LIBARCH_DIR)/lib/libArch.a66x
-
-ifeq ($(mem_model),Large)
-BLIS_DSP_LIB = ../../blis/install/c66xLarge/lib/libblis.ae66
-else ifeq ($(mem_model),Medium)
-BLIS_DSP_LIB = ../../blis/install/c66xMedium/lib/libblis.ae66
-else ifeq ($(mem_model),Small)
-BLIS_DSP_LIB = ../../blis/install/lib/libblis.ae66
-#else ifeq ($(mem_model),Tiny)
-endif
-
OCL_BIN = ti_cblas_kernel.out
ifeq ($(TI_CBLAS_FAT_BINARY), 1)
index fdea549af1fc8564b93fe57071c73b12b391c815..0900fd7991afd6e0090b9257aa89aaf18496cdc9 100644 (file)
#ifdef __cplusplus
-extern Kernel* ti_cblas_get_kernel(int idx, const char *fname);
+extern Kernel* ti_cblas_get_kernel(const char *fname);
int ti_cblas_delete_kernel(Kernel* K);
#if 0
extern Context ti_cblas_ocl_context;
extern std::vector<Device>* ti_cblas_ocl_devices;
extern CommandQueue* ti_cblas_ocl_Q;
extern Program::Binaries* ti_cblas_ocl_binary;
-extern Program* ti_cblas_ocl_program;
+//extern Program* ti_cblas_ocl_program;
+extern Program ti_cblas_ocl_program;
#endif
#else
extern cl_kernel ti_cblas_get_kernel(int idx, const char *fname);
index 39daf708f3665a6dbc3a3b8f36e29e49d8fe4b05..8011b2b2b828bc4a4ed9419aea1957a1f45a5977 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CAXPY_IDX, "ocl_cblas_caxpy");
+ __K = ti_cblas_get_kernel("ocl_cblas_caxpy");
#ifdef __cplusplus
try
#else
index bf5b16d0cf2e88d088f1ca470a6db33a7715654c..8a8fbc11350d48623dd8f21214dfc35040c79c68 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CCOPY_IDX, "ocl_cblas_ccopy");
+ __K = ti_cblas_get_kernel("ocl_cblas_ccopy");
#ifdef __cplusplus
try
#else
diff --git a/blasblisacc/src/ti_cblas_cblas_cdotc_sub.c b/blasblisacc/src/ti_cblas_cblas_cdotc_sub.c
index 9cfc775883c857f2d934a622ec5e87f03a48b4e5..233d143908e60061ffc8d11848e364ca6fc13e7e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CDOTC_SUB_IDX, "ocl_cblas_cdotc_sub");
+ __K = ti_cblas_get_kernel("ocl_cblas_cdotc_sub");
#ifdef __cplusplus
try
#else
diff --git a/blasblisacc/src/ti_cblas_cblas_cdotu_sub.c b/blasblisacc/src/ti_cblas_cblas_cdotu_sub.c
index 68d4684a1f3c8c8fc98c3dbbdaffc1a928f858d8..20ad6d0a87a8611e81447ee7ade1862050431248 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CDOTU_SUB_IDX, "ocl_cblas_cdotu_sub");
+ __K = ti_cblas_get_kernel("ocl_cblas_cdotu_sub");
#ifdef __cplusplus
try
#else
index 097792c134ec24685a50ddada1299d4db27116b9..9f2a60470db81b7739e62f34355d40e1de4dd185 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGBMV_IDX, "ocl_cblas_cgbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_cgbmv");
#ifdef __cplusplus
try
#else
index ffd982072e7e077bbfc186ae7790be1924e3b991..947f84cf0bf945225777dc7a469b4284ed9e7cc7 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGEMM_IDX, "ocl_cblas_cgemm");
+ __K = ti_cblas_get_kernel("ocl_cblas_cgemm");
#ifdef __cplusplus
try
#else
index 37962f7f3c59c8c5a2e7c6a41ca031cae843ff96..6f807ff768429839e5cb6272ee28f66f66d77edc 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGEMV_IDX, "ocl_cblas_cgemv");
+ __K = ti_cblas_get_kernel("ocl_cblas_cgemv");
#ifdef __cplusplus
try
#else
index e8feac03ac0126dda9c8148cedc56b5803b19d91..4da0a7708f7bb37d7a47980db4a864bc4d58c60e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGERC_IDX, "ocl_cblas_cgerc");
+ __K = ti_cblas_get_kernel("ocl_cblas_cgerc");
#ifdef __cplusplus
try
#else
index 22f8c5046a527a67d8416d5a3af12cb6264efb93..fea0731c7f39268b8e59e2b609c2e6154936ac3a 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGERU_IDX, "ocl_cblas_cgeru");
+ __K = ti_cblas_get_kernel("ocl_cblas_cgeru");
#ifdef __cplusplus
try
#else
index ad7e51bdec8af637d8bd560db330cb99e1aeeafd..66a36bcbd96737486329dc569c84a880b4ab7d2d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHBMV_IDX, "ocl_cblas_chbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_chbmv");
#ifdef __cplusplus
try
#else
index b51fbc2953d5f3d445173f2d40f7d25fa29d9bba..c61eb41bb7c9072c6af634e1d0b9e3fcfd215172 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHEMM_IDX, "ocl_cblas_chemm");
+ __K = ti_cblas_get_kernel("ocl_cblas_chemm");
#ifdef __cplusplus
try
#else
index 8e076486e7069d395ba15c860ff9917db4051fdb..97bf4a25f613b40add4cb1b0a8aac9eba9f1243e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHEMV_IDX, "ocl_cblas_chemv");
+ __K = ti_cblas_get_kernel("ocl_cblas_chemv");
#ifdef __cplusplus
try
#else
index b3ff8c31897c073178be5e85ac9c054a3317081d..2183e42b910c4a69f01d18edd384ed0f6c25c84d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER_IDX, "ocl_cblas_cher");
+ __K = ti_cblas_get_kernel("ocl_cblas_cher");
#ifdef __cplusplus
try
#else
index eced71f9e068f36310ff13125f347bd154d08c26..a340d3d12516245f3e4c690f5e58bb940a0eb422 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER2_IDX, "ocl_cblas_cher2");
+ __K = ti_cblas_get_kernel("ocl_cblas_cher2");
#ifdef __cplusplus
try
#else
index 709f036129cad062b6d46434b78fcdadfe3e56ce..0bebac137b5a6317196bbc29c9b146223c69dd6d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER2K_IDX, "ocl_cblas_cher2k");
+ __K = ti_cblas_get_kernel("ocl_cblas_cher2k");
#ifdef __cplusplus
try
#else
index d7db80b6a597cae2391e8fcd4495ea3bf1a14591..2a3a5f46ae1668ac22cbe70af16421efcd51f528 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHERK_IDX, "ocl_cblas_cherk");
+ __K = ti_cblas_get_kernel("ocl_cblas_cherk");
#ifdef __cplusplus
try
#else
index c4d1d624df19ef648ac0bbf0b8b772bab84b1d80..c48d2415f7cb3272a86868daf9a4be1c60b3007e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPMV_IDX, "ocl_cblas_chpmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_chpmv");
#ifdef __cplusplus
try
#else
index 670016e0fd00928f84d311257bea466c629b0814..d52a6261722c7399021607f89f604d07b90e384a 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPR_IDX, "ocl_cblas_chpr");
+ __K = ti_cblas_get_kernel("ocl_cblas_chpr");
#ifdef __cplusplus
try
#else
index 50d29f732e25eca733394abcd054e34dbf907618..75132be99884b3c412e1f04bbdffda51f7e724b9 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPR2_IDX, "ocl_cblas_chpr2");
+ __K = ti_cblas_get_kernel("ocl_cblas_chpr2");
#ifdef __cplusplus
try
#else
index c31ba615e8be56b3463efe8bb00678c3505c5acd..af7fa3907bc26784723110b83d690bded7ba8fc6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CROTG_IDX, "ocl_cblas_crotg");
+ __K = ti_cblas_get_kernel("ocl_cblas_crotg");
#ifdef __cplusplus
try
#else
index 40d2452480b253c43d3c1a60ff12e77b3a126ed8..9a7b1340ddd4a9198872ab99fafafcdd593fae2a 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSCAL_IDX, "ocl_cblas_cscal");
+ __K = ti_cblas_get_kernel("ocl_cblas_cscal");
#ifdef __cplusplus
try
#else
index aea68b46d1e5e28d9e266a7d500aea7b528cba77..3f9b51d35ca2a2cffa465ad04128860b697b97cd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSSCAL_IDX, "ocl_cblas_csscal");
+ __K = ti_cblas_get_kernel("ocl_cblas_csscal");
#ifdef __cplusplus
try
#else
index 67249d32b0156aa9aeecfc6a3d62c43a8716a624..7e5039187efdfe2dcb16c53df2b5894e8f0612a1 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSWAP_IDX, "ocl_cblas_cswap");
+ __K = ti_cblas_get_kernel("ocl_cblas_cswap");
#ifdef __cplusplus
try
#else
index 6050de720b21b53d987867bbff0c1bbbc2367bc9..494b499352cf37d186dc2b17890fc10db72b5b95 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYMM_IDX, "ocl_cblas_csymm");
+ __K = ti_cblas_get_kernel("ocl_cblas_csymm");
#ifdef __cplusplus
try
#else
index 11feff3f8ef46768d433addd1dd61f499f72b6d8..de62fc1ace0d31693a7a86f3a037ca951ac7864f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYR2K_IDX, "ocl_cblas_csyr2k");
+ __K = ti_cblas_get_kernel("ocl_cblas_csyr2k");
#ifdef __cplusplus
try
#else
index 4840abdfc8efb414bd75151e7214fdb479d913a2..1dd65d4c114dbb2eda9944b72351a820fe7d5a78 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYRK_IDX, "ocl_cblas_csyrk");
+ __K = ti_cblas_get_kernel("ocl_cblas_csyrk");
#ifdef __cplusplus
try
#else
index 018b07030dd14955cf541b6d1c35166008f06e4f..e17c4711e27b780e0bf1a6be1271018daff95ed4 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTBMV_IDX, "ocl_cblas_ctbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ctbmv");
#ifdef __cplusplus
try
#else
index 6f34e3ae8c2e72642a732000e49845bb774f659f..7d76fd8671415bfcf6361e75d82bed956cdeba77 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTBSV_IDX, "ocl_cblas_ctbsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ctbsv");
#ifdef __cplusplus
try
#else
index b2db324062eeb3f301dce6e89f8590c78ac0d142..abc37d0957bbbedb9465f0ea35b382191b95c265 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTPMV_IDX, "ocl_cblas_ctpmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ctpmv");
#ifdef __cplusplus
try
#else
index 45969d33cd990209ad89cd6d0ce64ce8d329ae6a..f1242bc5764967dca48ac544a58e3bd34f77a574 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTPSV_IDX, "ocl_cblas_ctpsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ctpsv");
#ifdef __cplusplus
try
#else
index 54f291f3ada4bb76599a2e80e7b9145d67cb1d77..33ba1462c30730081750ee0657046291ee295e66 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRMM_IDX, "ocl_cblas_ctrmm");
+ __K = ti_cblas_get_kernel("ocl_cblas_ctrmm");
#ifdef __cplusplus
try
#else
index 40054f941ce48333e91fa9af8267adee3d5a974e..4b22df49c0ef0d6adb8ace36166053715fb155ca 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRMV_IDX, "ocl_cblas_ctrmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ctrmv");
#ifdef __cplusplus
try
#else
index 7804059c0351970f512ab4f496f721fc3faee424..05882ea46c030e4d91007cb5f8f164253211810d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRSM_IDX, "ocl_cblas_ctrsm");
+ __K = ti_cblas_get_kernel("ocl_cblas_ctrsm");
#ifdef __cplusplus
try
#else
index 42d5a05d2ecf7c7099ab595206cba546270f8dd4..4f636566c82ba65c9628bc2559eb4c78200cad0b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRSV_IDX, "ocl_cblas_ctrsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ctrsv");
#ifdef __cplusplus
try
#else
index ac2c2af1905e9862621ec3c14007021e10283869..80ba1428076a93bfcb05235ed2a1b704c873eb21 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DASUM_IDX, "ocl_cblas_dasum");
+ __K = ti_cblas_get_kernel("ocl_cblas_dasum");
#ifdef __cplusplus
try
#else
index fee688edb32d0aa4cd9050c42644b26076cacfea..7c561bb21875719b3015fd4bdf031807f5963ebd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DAXPY_IDX, "ocl_cblas_daxpy");
+ __K = ti_cblas_get_kernel("ocl_cblas_daxpy");
#ifdef __cplusplus
try
#else
index 58769ff7228ed96b15365979b6fe02f303876ff5..403fedbc7de5cb8e45c71cb4698b6b5755ab1c0e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DCOPY_IDX, "ocl_cblas_dcopy");
+ __K = ti_cblas_get_kernel("ocl_cblas_dcopy");
#ifdef __cplusplus
try
#else
index 5bfbb81b112d1ba164fd7d6361eb42c570627b36..24469f64c35b5a529b6b4d8fe226301bc0d597a2 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DDOT_IDX, "ocl_cblas_ddot");
+ __K = ti_cblas_get_kernel("ocl_cblas_ddot");
#ifdef __cplusplus
try
#else
index 2e03fd2241bb5a78b5798c7e475abae99e7dcdc6..c3371a00c2dd5a4705f2ae0676536f7d55cd7e03 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGBMV_IDX, "ocl_cblas_dgbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dgbmv");
#ifdef __cplusplus
try
#else
index 6fa53241eb386e9229f7c1261df1b2a931ca3ea5..f670d9b7916635ddac8962e453183e798e0fa678 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGEMM_IDX, "ocl_cblas_dgemm");
+ __K = ti_cblas_get_kernel("ocl_cblas_dgemm");
#ifdef __cplusplus
try
#else
index acb7123ff30729f4f966d080dcc8d6c28c3703fe..461afe198af0b22cc150ef152d1b504eb74700d4 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGEMV_IDX, "ocl_cblas_dgemv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dgemv");
#ifdef __cplusplus
try
#else
index c035efed70e6150e651419c058d8402616b3ae47..5a8f94254fc6b19a3b9a5665ecd5157fa796e47e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGER_IDX, "ocl_cblas_dger");
+ __K = ti_cblas_get_kernel("ocl_cblas_dger");
#ifdef __cplusplus
try
#else
index 0be91b895ee4701bff3e183c13da99a63203b15b..70489bd6409f146a25f6d6e60b7a242f4e86ff59 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DNRM2_IDX, "ocl_cblas_dnrm2");
+ __K = ti_cblas_get_kernel("ocl_cblas_dnrm2");
#ifdef __cplusplus
try
#else
index bb4cdc07730ba5aa5dd0c901b36207725d6c2974..0c5cfa15292eab479df109e31494f8fa34472605 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROT_IDX, "ocl_cblas_drot");
+ __K = ti_cblas_get_kernel("ocl_cblas_drot");
#ifdef __cplusplus
try
#else
index 2c20ae308e8754752271ccfd84f3e27d6a9802fe..f375f4360c026a86e75a304084fa896ae3ce501b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTG_IDX, "ocl_cblas_drotg");
+ __K = ti_cblas_get_kernel("ocl_cblas_drotg");
#ifdef __cplusplus
try
#else
index 1e4a374ea085398c56ccd279b9cb5fe02999a472..39c6a80137ebe952fc1fde0706976bdda2232b36 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTM_IDX, "ocl_cblas_drotm");
+ __K = ti_cblas_get_kernel("ocl_cblas_drotm");
#ifdef __cplusplus
try
#else
index ef4e979a576a1754a26a4a629bb3599c1383efd2..c7d101d7d74fb4a72feaabdd3559dba792765e40 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTMG_IDX, "ocl_cblas_drotmg");
+ __K = ti_cblas_get_kernel("ocl_cblas_drotmg");
#ifdef __cplusplus
try
#else
index d3e1988c0837a5dd87065b12660350a6b78da736..bf4c8e6498b6c648be69080a7c8a510162bc8158 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSBMV_IDX, "ocl_cblas_dsbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dsbmv");
#ifdef __cplusplus
try
#else
index 44c1510a00a0b7f831be31e3a06e5b495e66d70e..3e1bc031675da44de6345e5880919e42402c2e2c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSCAL_IDX, "ocl_cblas_dscal");
+ __K = ti_cblas_get_kernel("ocl_cblas_dscal");
#ifdef __cplusplus
try
#else
index 7e7ff095c7f714b7e75e9c23f4b8226f555adf11..fba396937344656a8fed5fbac478b1c0ec84f32f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSDOT_IDX, "ocl_cblas_dsdot");
+ __K = ti_cblas_get_kernel("ocl_cblas_dsdot");
#ifdef __cplusplus
try
#else
index 5239c73b879d30a40851a5f46ae1a04e33faadde..8f64ca20db4b4d6dec0be822fad4e9d694ca5a9e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPMV_IDX, "ocl_cblas_dspmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dspmv");
#ifdef __cplusplus
try
#else
index d8c25dcf49eabb891204b2ab0be2378d6c88655a..0c7de8123d8bed73e8a183ecdc4e19c8e1acca4b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPR_IDX, "ocl_cblas_dspr");
+ __K = ti_cblas_get_kernel("ocl_cblas_dspr");
#ifdef __cplusplus
try
#else
index 4d6b2c049debc0b14516beb402ec8e1f3d0593c6..e8c0289ccd5cc18f321abcd7761f9843fae3d7f5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPR2_IDX, "ocl_cblas_dspr2");
+ __K = ti_cblas_get_kernel("ocl_cblas_dspr2");
#ifdef __cplusplus
try
#else
index 63cd15ada38181b6b5f14825263bb73857119fc4..9036f3187f471da2d4ebd0d3cf219a532b4b6152 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSWAP_IDX, "ocl_cblas_dswap");
+ __K = ti_cblas_get_kernel("ocl_cblas_dswap");
#ifdef __cplusplus
try
#else
index a6223ee92349bc7f723adac969d7e6ee376d978f..e74a029be068afed928029258e6c269e64c0d240 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYMM_IDX, "ocl_cblas_dsymm");
+ __K = ti_cblas_get_kernel("ocl_cblas_dsymm");
#ifdef __cplusplus
try
#else
index aca46f23ecadab0c668bac4510662baa53d1bddf..9f60c6fee84c1152d0a5499e67fd19f8637a1c0d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYMV_IDX, "ocl_cblas_dsymv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dsymv");
#ifdef __cplusplus
try
#else
index a421bcde70a15430b00f875af77980e9f38c55cb..b85a76a3d9a8a3a43f1b26ea86fbc931b5cb18d2 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR_IDX, "ocl_cblas_dsyr");
+ __K = ti_cblas_get_kernel("ocl_cblas_dsyr");
#ifdef __cplusplus
try
#else
index 5c64af929d45692dcaf68c44065e18404802bdde..4a0143adbffff72927c02ff62012610f8123285c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR2_IDX, "ocl_cblas_dsyr2");
+ __K = ti_cblas_get_kernel("ocl_cblas_dsyr2");
#ifdef __cplusplus
try
#else
index 5e126a700094fd74be18c69c66c1ba50d9ca3a60..043b00ce7a5400aa5f683997d9b2ffd4690ef65c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR2K_IDX, "ocl_cblas_dsyr2k");
+ __K = ti_cblas_get_kernel("ocl_cblas_dsyr2k");
#ifdef __cplusplus
try
#else
index a5b7d87ef9aa0d5f2d4086bbc0eb0806cd617995..b152fbcd511f40c1ebf2143922c46811b8220761 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYRK_IDX, "ocl_cblas_dsyrk");
+ __K = ti_cblas_get_kernel("ocl_cblas_dsyrk");
#ifdef __cplusplus
try
#else
index 8e1149847c15a490d37eb72c30e2cacd0ca4d89d..b14bf8b4821de9308dc8617495d61ff4909cc7f1 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTBMV_IDX, "ocl_cblas_dtbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dtbmv");
#ifdef __cplusplus
try
#else
index 095284708aea09ac3eaeb5c3a8e1d6a102301f5e..b81f39430128bf4a61199c8e725186f20ff59ccf 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTBSV_IDX, "ocl_cblas_dtbsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dtbsv");
#ifdef __cplusplus
try
#else
index f8fa7fcde306160ef9e6620d8d2fd2a912ea12a4..c0839c7066a109c8b0827d301d56be7592076029 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTPMV_IDX, "ocl_cblas_dtpmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dtpmv");
#ifdef __cplusplus
try
#else
index 6856032f5357329ac0a27df39ac90dd24b561714..4a07438f7c04161b99ef9f7dbea3b7e63497cc2e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTPSV_IDX, "ocl_cblas_dtpsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dtpsv");
#ifdef __cplusplus
try
#else
index fd70c5dc4b706e5293b03e5920f4033319752ffb..910a928db1840bc7fcf5d9bb624c96c6c1505bd9 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRMM_IDX, "ocl_cblas_dtrmm");
+ __K = ti_cblas_get_kernel("ocl_cblas_dtrmm");
#ifdef __cplusplus
try
#else
index 54755ec2ee5b645002cb2dce5ac2e7f5da8069ea..0da3b25a4b8afac3731b99074d8130c1f468c954 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRMV_IDX, "ocl_cblas_dtrmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dtrmv");
#ifdef __cplusplus
try
#else
index 798a88e7db5ae6895105b8877d2809c8ff3eaaea..d74cad1cfa6863291a93e599a41a0c2fdec07b79 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRSM_IDX, "ocl_cblas_dtrsm");
+ __K = ti_cblas_get_kernel("ocl_cblas_dtrsm");
#ifdef __cplusplus
try
#else
index 15d492b19ef6d54d96ad3cd9040e2c840d5120ef..53f642eab5c73e1ea487639f69dabad5a76e924f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRSV_IDX, "ocl_cblas_dtrsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_dtrsv");
#ifdef __cplusplus
try
#else
index fd4874ce4c76eb0d6fbdab0ea5b4defb17b9bdb2..956ae211b4e5a904538fc6be34bbd702056a013b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DZASUM_IDX, "ocl_cblas_dzasum");
+ __K = ti_cblas_get_kernel("ocl_cblas_dzasum");
#ifdef __cplusplus
try
#else
index 96147f96cfc07c8bd203d7188e4540b8d413e4a3..3b25d2f1d5f452dcfa17cd6788fb1c3395154e78 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DZNRM2_IDX, "ocl_cblas_dznrm2");
+ __K = ti_cblas_get_kernel("ocl_cblas_dznrm2");
#ifdef __cplusplus
try
#else
index c5569ad9e9a73e65cdd22399c849f2fb11e96ef8..616a92d1cd342172b4c14378f0671254d9806838 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ICAMAX_IDX, "ocl_cblas_icamax");
+ __K = ti_cblas_get_kernel("ocl_cblas_icamax");
#ifdef __cplusplus
try
#else
index 3ed7745801849fcb57d95545c06b9593633c0563..073906e7e058598e6956db5dd15c854cfa3b1fdb 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_IDAMAX_IDX, "ocl_cblas_idamax");
+ __K = ti_cblas_get_kernel("ocl_cblas_idamax");
#ifdef __cplusplus
try
#else
index 19bea6ab6ddd0bd78d74e478b220f1b8c00d7c11..bc0cb8dc49a996fae44214611d161e23a3106a9e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ISAMAX_IDX, "ocl_cblas_isamax");
+ __K = ti_cblas_get_kernel("ocl_cblas_isamax");
#ifdef __cplusplus
try
#else
index 77141cf81d0e2fb1d2ee61287eebbd0a2d1f7aed..cb9c43457b54596fea6977d37fc0083826b725a7 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_IZAMAX_IDX, "ocl_cblas_izamax");
+ __K = ti_cblas_get_kernel("ocl_cblas_izamax");
#ifdef __cplusplus
try
#else
index 7892bb7221f69ac12fbeb16eaa005d4954fb8af8..b78d76e330e8d3634ebc95a756120dcb2448b82b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SASUM_IDX, "ocl_cblas_sasum");
+ __K = ti_cblas_get_kernel("ocl_cblas_sasum");
#ifdef __cplusplus
try
#else
index 3862933e632a4fd52a7255eebf01593ed6c3c88f..9ab8bb8a4a2dc21ecbd2d969eee333366ab156c5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SAXPY_IDX, "ocl_cblas_saxpy");
+ __K = ti_cblas_get_kernel("ocl_cblas_saxpy");
#ifdef __cplusplus
try
#else
index 23c6f4f38c27cc687ed0892dd73449ef80b718bd..26a02b2523cd24e52623d295f3c9c9b307c38a73 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCASUM_IDX, "ocl_cblas_scasum");
+ __K = ti_cblas_get_kernel("ocl_cblas_scasum");
#ifdef __cplusplus
try
#else
index a966e9ee6950f86c5e80f883be581ca7c3a1fc8f..b9b52856d6ab0cb27fdfe6e72cb46f744f55dc01 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCNRM2_IDX, "ocl_cblas_scnrm2");
+ __K = ti_cblas_get_kernel("ocl_cblas_scnrm2");
#ifdef __cplusplus
try
#else
index dd6cab58259f0aff629480c062babebdaaad0ea3..9dad1cefe055e6b95db2729bb20eea73b8266228 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCOPY_IDX, "ocl_cblas_scopy");
+ __K = ti_cblas_get_kernel("ocl_cblas_scopy");
#ifdef __cplusplus
try
#else
index 69fd6d44bb21eb4c8da40cf7f4c35e76dfb19e74..f7d0ef1c4e936de0b62fd14cae2d50af475320d5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SDOT_IDX, "ocl_cblas_sdot");
+ __K = ti_cblas_get_kernel("ocl_cblas_sdot");
#ifdef __cplusplus
try
#else
index d5445ea42bd48a1e64bc44e8e22f300d7fe89b9e..14fb7872aceb0caf39a508b90c3cb5d58b04dbe4 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SDSDOT_IDX, "ocl_cblas_sdsdot");
+ __K = ti_cblas_get_kernel("ocl_cblas_sdsdot");
#ifdef __cplusplus
try
#else
index f4f2826d101e2f19550cc062d299bc315d784ddb..4f45e281b0b1db509e4ea2028b266c29e6f1e0da 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGBMV_IDX, "ocl_cblas_sgbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_sgbmv");
#ifdef __cplusplus
try
#else
index 72e3af3b58ab693ccfc9b31001d4f5d554b8db58..ec4c413e9b81863b340de7f2318f660035e894ae 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGEMM_IDX, "ocl_cblas_sgemm");
+ __K = ti_cblas_get_kernel("ocl_cblas_sgemm");
#ifdef __cplusplus
try
#else
index c6e0c1daf7faf8f5b7166676f291dadbeb1fa048..49d85bef5607cd7bdc2f840e5793433682ff2e4f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGEMV_IDX, "ocl_cblas_sgemv");
+ __K = ti_cblas_get_kernel("ocl_cblas_sgemv");
#ifdef __cplusplus
try
#else
index 1ee795f876353a1adbbfc44a0be97e0f50e1c92c..54be54a410eab493334b7c2c502a2500742fe02c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGER_IDX, "ocl_cblas_sger");
+ __K = ti_cblas_get_kernel("ocl_cblas_sger");
#ifdef __cplusplus
try
#else
index 79a7dcbfb6da15956a44b1bb3eabbf0ca453d710..6fc9b4289d90453e094d88f24c0e53b6e90357e3 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SNRM2_IDX, "ocl_cblas_snrm2");
+ __K = ti_cblas_get_kernel("ocl_cblas_snrm2");
#ifdef __cplusplus
try
#else
index 051e99a68f1e988d49bf33412e31a652ff75826e..da897ccdb84e09472824553535d1f0cd68d7e6e6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROT_IDX, "ocl_cblas_srot");
+ __K = ti_cblas_get_kernel("ocl_cblas_srot");
#ifdef __cplusplus
try
#else
index 2b7a071bb9a248c351b5b0ee5f4397d599c3d94a..4fb2a93475e451e25aaf65b75cbcafad07c8fb4f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTG_IDX, "ocl_cblas_srotg");
+ __K = ti_cblas_get_kernel("ocl_cblas_srotg");
#ifdef __cplusplus
try
#else
index e67142c90213ae74fc2522d00b4a4381099d6b68..eb48711bf7fc4e0e9fbf6d6e1820303bc57b49d0 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTM_IDX, "ocl_cblas_srotm");
+ __K = ti_cblas_get_kernel("ocl_cblas_srotm");
#ifdef __cplusplus
try
#else
index 008e2d0a35a5878f61dd8cef1cede20cc1b58c63..f33cbf29537559482d6bcbf0a37dbb1f08a6c494 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTMG_IDX, "ocl_cblas_srotmg");
+ __K = ti_cblas_get_kernel("ocl_cblas_srotmg");
#ifdef __cplusplus
try
#else
index 08d9dd7479b36b0172ba457a90ca089824ca9cc5..b1aa3b07cf82ea03670c49ba0f3bb4268fe92901 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSBMV_IDX, "ocl_cblas_ssbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ssbmv");
#ifdef __cplusplus
try
#else
index d3afdb9af57c433e646f69fb153bc69b3d2d4169..baeffe56e24dd4d5c955edd7c7bba5c52ba5437c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSCAL_IDX, "ocl_cblas_sscal");
+ __K = ti_cblas_get_kernel("ocl_cblas_sscal");
#ifdef __cplusplus
try
#else
index 78f82ddba2e69a90b0e49dd5ad1083ad8bd22ca0..3fd769adb98206c0709bc56bae841a0efc90aa40 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPMV_IDX, "ocl_cblas_sspmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_sspmv");
#ifdef __cplusplus
try
#else
index 707533f1203af0ff8e44dd7565d2540094f4f931..fd767b2e86fb70e59f7b55e390cae1e2bb7cbf4d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPR_IDX, "ocl_cblas_sspr");
+ __K = ti_cblas_get_kernel("ocl_cblas_sspr");
#ifdef __cplusplus
try
#else
index dac6d8538f1a8aa11aab90484f408d19893fa50e..aa7875ad33d3046c8a7454b0f10769d9114426a9 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPR2_IDX, "ocl_cblas_sspr2");
+ __K = ti_cblas_get_kernel("ocl_cblas_sspr2");
#ifdef __cplusplus
try
#else
index 241d2136298fbee4b1b8061244d8a306bc031112..cfb6cae569995d21495a060ff8a562e6a510012d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSWAP_IDX, "ocl_cblas_sswap");
+ __K = ti_cblas_get_kernel("ocl_cblas_sswap");
#ifdef __cplusplus
try
#else
index c090bed21bdefa2027be33820dfda2ab4d1586b3..68b45bf71224eaa2bb9c1c9ae7198f672b68e75a 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYMM_IDX, "ocl_cblas_ssymm");
+ __K = ti_cblas_get_kernel("ocl_cblas_ssymm");
#ifdef __cplusplus
try
#else
index e92214f0d9b4b9f67c938c86df235519614774b8..39cdcee292dbdd988736ec932685ec9734c531b4 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYMV_IDX, "ocl_cblas_ssymv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ssymv");
#ifdef __cplusplus
try
#else
index 5c45b02b42ccab1327a9fdcabae28d5d7eb0e8a5..efc228b697dcdb80fd1a33b3fbd9b36a93cb91c6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR_IDX, "ocl_cblas_ssyr");
+ __K = ti_cblas_get_kernel("ocl_cblas_ssyr");
#ifdef __cplusplus
try
#else
index ea04df9a24f991f193fdd6f7b9cb887f2341c97d..9218c3b8fad77f7b563ed561968ad1ab24531915 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR2_IDX, "ocl_cblas_ssyr2");
+ __K = ti_cblas_get_kernel("ocl_cblas_ssyr2");
#ifdef __cplusplus
try
#else
index 97b6b443b80c33ae0988494f948b559cd83c8647..9613eb5eb594b47bec739a2dc689216e4979084d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR2K_IDX, "ocl_cblas_ssyr2k");
+ __K = ti_cblas_get_kernel("ocl_cblas_ssyr2k");
#ifdef __cplusplus
try
#else
index 17c8bbddeeaee3871f6cb208fc49eb4d73a5a18f..d7296d24c0768513b12059f05a87018129cbefae 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYRK_IDX, "ocl_cblas_ssyrk");
+ __K = ti_cblas_get_kernel("ocl_cblas_ssyrk");
#ifdef __cplusplus
try
#else
index 97b7922d647442cdc1f94eaa8d67aba92cf2cb2d..f7041aaf3323f5898e10afdb5067e590c7c74835 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STBMV_IDX, "ocl_cblas_stbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_stbmv");
#ifdef __cplusplus
try
#else
index 47002da6eaa00838607cb2a56bac48e161455d86..a7e586819a667bb91babb4a68385e453611a8297 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STBSV_IDX, "ocl_cblas_stbsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_stbsv");
#ifdef __cplusplus
try
#else
index a5ea0271c4383d0167bb98c8dd5c7a70bbd8a847..a9cf8363538bd8597e9b23a498fed79179d0bc17 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STPMV_IDX, "ocl_cblas_stpmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_stpmv");
#ifdef __cplusplus
try
#else
index 0b23825a29b71d1927177aff2dee0b1b14bc7b06..cc419808b2d301ec5430e442133edd6fd6b0fe31 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STPSV_IDX, "ocl_cblas_stpsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_stpsv");
#ifdef __cplusplus
try
#else
index fc1d9d82400c27ccc8adb3423f5576cd573e6bc5..14384df78dd16a72e1200e93ba0edda3ca41b1ca 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRMM_IDX, "ocl_cblas_strmm");
+ __K = ti_cblas_get_kernel("ocl_cblas_strmm");
#ifdef __cplusplus
try
#else
index 933bd345d9e178504416da29d1234f01d227607f..b9bbd6bd43bd7c8ee987598f206e7513e9234f2a 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRMV_IDX, "ocl_cblas_strmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_strmv");
#ifdef __cplusplus
try
#else
index 56ad072c622be0bb7b1489001e296e77c4d18676..8e63aeeef5121a6ae84df8419ab0312cbc13227a 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRSM_IDX, "ocl_cblas_strsm");
+ __K = ti_cblas_get_kernel("ocl_cblas_strsm");
#ifdef __cplusplus
try
#else
index 97aed05220eae5f502c1c6f462e67caca929906f..4855e1c1974ace9311b02cd1583b8c1869961b3b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRSV_IDX, "ocl_cblas_strsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_strsv");
#ifdef __cplusplus
try
#else
index 2ff97c8e9002195a31690106b57c6741a9bc578a..87a37da1d4894efbdf8bffb371f11d910c9f4b65 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_XERBLA_IDX, "ocl_cblas_xerbla");
+ __K = ti_cblas_get_kernel("ocl_cblas_xerbla");
#ifdef __cplusplus
try
#else
index 8450fae6dddcad9b73926122c1d31c307607af9b..134e612f92f8fdbc8efd1499367689fddcb6f741 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZAXPY_IDX, "ocl_cblas_zaxpy");
+ __K = ti_cblas_get_kernel("ocl_cblas_zaxpy");
#ifdef __cplusplus
try
#else
index 60edee0b1a1dcd0898b4e4e537e0f1881ad22f77..2b2bb9d007edcd14d6873e5cd738ec69d31b9240 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZCOPY_IDX, "ocl_cblas_zcopy");
+ __K = ti_cblas_get_kernel("ocl_cblas_zcopy");
#ifdef __cplusplus
try
#else
diff --git a/blasblisacc/src/ti_cblas_cblas_zdotc_sub.c b/blasblisacc/src/ti_cblas_cblas_zdotc_sub.c
index 80cd4d8d19fa29ed864e90aff61fb3bf55901008..98aa5505058bd1efc2f5f5d9580f4ab21afa128c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDOTC_SUB_IDX, "ocl_cblas_zdotc_sub");
+ __K = ti_cblas_get_kernel("ocl_cblas_zdotc_sub");
#ifdef __cplusplus
try
#else
diff --git a/blasblisacc/src/ti_cblas_cblas_zdotu_sub.c b/blasblisacc/src/ti_cblas_cblas_zdotu_sub.c
index 841cf7103056e3085d99ca635e23c0ac9ab41396..5058183de23498a6ea056d90ddd516ef9c8b9b50 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDOTU_SUB_IDX, "ocl_cblas_zdotu_sub");
+ __K = ti_cblas_get_kernel("ocl_cblas_zdotu_sub");
#ifdef __cplusplus
try
#else
index d2c7eca168abcbea0f24d50e003b9cd0ad42de62..f35b69d92f24df99c572833b587b89b3856064fe 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDSCAL_IDX, "ocl_cblas_zdscal");
+ __K = ti_cblas_get_kernel("ocl_cblas_zdscal");
#ifdef __cplusplus
try
#else
index 0a2a0740e8ce2892c5a71309b72157bcc707ac89..bc6b0d9ba6a0a3db4db578362217c146c04dc707 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGBMV_IDX, "ocl_cblas_zgbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_zgbmv");
#ifdef __cplusplus
try
#else
index 77e5bf914df46ef5c0d22898d3b5988180284cd8..eb8ce004f2b64bd739e80fa0e7b1ec0c064d7e39 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGEMM_IDX, "ocl_cblas_zgemm");
+ __K = ti_cblas_get_kernel("ocl_cblas_zgemm");
#ifdef __cplusplus
try
#else
index a195377e20dbc6f6f0257e60ef63d0ba9797b168..63dc74fbaeaeee368d0570e5195c7655ec46c729 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGEMV_IDX, "ocl_cblas_zgemv");
+ __K = ti_cblas_get_kernel("ocl_cblas_zgemv");
#ifdef __cplusplus
try
#else
index 341c8f1fe67c11eb94930b8a0fada68781c109de..a51a0016ae1fa63f799b0cb9911d082478b240b1 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGERC_IDX, "ocl_cblas_zgerc");
+ __K = ti_cblas_get_kernel("ocl_cblas_zgerc");
#ifdef __cplusplus
try
#else
index 94cddea2813c7f4cb1fcbfa312f41b51984b9b2c..53aa086df4f9e6c496578e493d517810883afbfc 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGERU_IDX, "ocl_cblas_zgeru");
+ __K = ti_cblas_get_kernel("ocl_cblas_zgeru");
#ifdef __cplusplus
try
#else
index 36fa157152bb4caad5c34d72e0c86987e848f27a..8402a6fdb5cb1028408b229849b7a0937b11f143 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHBMV_IDX, "ocl_cblas_zhbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_zhbmv");
#ifdef __cplusplus
try
#else
index d206e1361fdcb13fe6151a4ac8b82dfc7384a434..1a1cb8b40c97b8a1631dcea08dbc3ce16d571fd8 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHEMM_IDX, "ocl_cblas_zhemm");
+ __K = ti_cblas_get_kernel("ocl_cblas_zhemm");
#ifdef __cplusplus
try
#else
index 6e87053d010dd9a82dc474db8a40b02f62c2a1d8..b86bd760c75e00f40a56f6f71e3f73e56bcb44ac 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHEMV_IDX, "ocl_cblas_zhemv");
+ __K = ti_cblas_get_kernel("ocl_cblas_zhemv");
#ifdef __cplusplus
try
#else
index a3dcd1b25557b440cc52078b8ce54920806b8be6..b26953c7c7704e88d1481eaec09f6af6eb898158 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER_IDX, "ocl_cblas_zher");
+ __K = ti_cblas_get_kernel("ocl_cblas_zher");
#ifdef __cplusplus
try
#else
index 146d17ff455202d1746c5b1c112724192aff2c37..884cc561f075ee5e9d3d39802d2559f371422a98 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER2_IDX, "ocl_cblas_zher2");
+ __K = ti_cblas_get_kernel("ocl_cblas_zher2");
#ifdef __cplusplus
try
#else
index 6f52d3ae5d260957a04446aee24309c2ca037cfd..80f76dcc4518ccb80b6476bbec23aead7dfb3f3d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER2K_IDX, "ocl_cblas_zher2k");
+ __K = ti_cblas_get_kernel("ocl_cblas_zher2k");
#ifdef __cplusplus
try
#else
index e5437115432891ca5fa07b00195a43f301729402..aad6fdbf2654856d9e222b47404008e8f35cae6b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHERK_IDX, "ocl_cblas_zherk");
+ __K = ti_cblas_get_kernel("ocl_cblas_zherk");
#ifdef __cplusplus
try
#else
index 9a5dd8fc42da0346c0cb73bb11b8a59dc3d11b98..85d73c88a1477dde1805728dfbfe5f82b5dde3da 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPMV_IDX, "ocl_cblas_zhpmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_zhpmv");
#ifdef __cplusplus
try
#else
index d479109e494b12efc0b9afcd9f963d6cb9f40687..5632ee733ddb4c0c136d8bd456105a77f785bfbe 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPR_IDX, "ocl_cblas_zhpr");
+ __K = ti_cblas_get_kernel("ocl_cblas_zhpr");
#ifdef __cplusplus
try
#else
index e55ec9f3bcf193671f735c27c7e6434eb0694065..6063f828c0982ff6206ca3d28208009dd46ddab0 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPR2_IDX, "ocl_cblas_zhpr2");
+ __K = ti_cblas_get_kernel("ocl_cblas_zhpr2");
#ifdef __cplusplus
try
#else
index 41d2041dd63bd8727e59b8ead3fe98b8c39c6f07..ef70e02461b83eea7d3f48b20269628e6b5d085e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZROTG_IDX, "ocl_cblas_zrotg");
+ __K = ti_cblas_get_kernel("ocl_cblas_zrotg");
#ifdef __cplusplus
try
#else
index c08248fa6b7bb3457d89d3fa723d7ceb3dd43018..fd6a16359b6ac422ce8c7512a22aaf85391e2cb3 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSCAL_IDX, "ocl_cblas_zscal");
+ __K = ti_cblas_get_kernel("ocl_cblas_zscal");
#ifdef __cplusplus
try
#else
index a966c27943faa13cd8cda00a678db1646a2ee556..f8b98c7debb8cdd7a9a9e9afa4467c7e518e7397 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSWAP_IDX, "ocl_cblas_zswap");
+ __K = ti_cblas_get_kernel("ocl_cblas_zswap");
#ifdef __cplusplus
try
#else
index deda9fd7c9995cd58cb5b0f7f602228acdd4125c..e6f0df99ded8c6c3c17888eeeaa34827c78377e9 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYMM_IDX, "ocl_cblas_zsymm");
+ __K = ti_cblas_get_kernel("ocl_cblas_zsymm");
#ifdef __cplusplus
try
#else
index 62de2c1216ee1e8b84370c748851aa67baf099d7..15ab279a0305be36860d3edece24d0d75cbf4b3f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYR2K_IDX, "ocl_cblas_zsyr2k");
+ __K = ti_cblas_get_kernel("ocl_cblas_zsyr2k");
#ifdef __cplusplus
try
#else
index 5271187201085378fb052d7f2ef8071ff0b31caa..32767e6e06cd993a3cb9907ecd39468bd7d298f7 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYRK_IDX, "ocl_cblas_zsyrk");
+ __K = ti_cblas_get_kernel("ocl_cblas_zsyrk");
#ifdef __cplusplus
try
#else
index 5b7dc34149d73f267e505e6b2a9bcf50e45b0282..a160fe79eb03598db3855c65e25a160f19ab1e02 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTBMV_IDX, "ocl_cblas_ztbmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ztbmv");
#ifdef __cplusplus
try
#else
index fd9ba055b3443d816665ce5f420ea50b4206b7f3..bb9f6916d1f9b1de28db005b53ee4058a86344b9 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTBSV_IDX, "ocl_cblas_ztbsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ztbsv");
#ifdef __cplusplus
try
#else
index 469a4996d0cea55b2c59c4dd90816391dc6254ce..f631ce67bd3548f6f80b78982173375d4d50e31c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTPMV_IDX, "ocl_cblas_ztpmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ztpmv");
#ifdef __cplusplus
try
#else
index 4def0efd93f10316fc3993786f16584f4c7f2cce..5c00274d80d49b6c306a4a28d9f0dc6bf63f5482 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTPSV_IDX, "ocl_cblas_ztpsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ztpsv");
#ifdef __cplusplus
try
#else
index 803bc4bc43c0fd03d6af68b031d8cd64a74129bd..859a87bccd24a06944aaf557f81a2dd21eb28e2f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRMM_IDX, "ocl_cblas_ztrmm");
+ __K = ti_cblas_get_kernel("ocl_cblas_ztrmm");
#ifdef __cplusplus
try
#else
index a0d12f44c92e89a983365953a0bd51ad19e539b3..d979d62ac2912aa4db704cbd890c146766405d69 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRMV_IDX, "ocl_cblas_ztrmv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ztrmv");
#ifdef __cplusplus
try
#else
index 8a2411a5c917f88e3b78197918ffa6bebca23964..cba160981ca2e0a882c0bb50aa75e39bdec02232 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRSM_IDX, "ocl_cblas_ztrsm");
+ __K = ti_cblas_get_kernel("ocl_cblas_ztrsm");
#ifdef __cplusplus
try
#else
index 02d2797cc74948d66524f55e0f86282d8a5137b9..46329f850506fb49408da40afb9e04d0546cd1cd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRSV_IDX, "ocl_cblas_ztrsv");
+ __K = ti_cblas_get_kernel("ocl_cblas_ztrsv");
#ifdef __cplusplus
try
#else
index f2dd549b161d11bab0d5df6b8cac9fcd3c724d7e..e7e2c910eaf049ed2c06075a2f6f9fd0c9d315a5 100644 (file)
#endif
/* Global variables */
-#ifdef __cplusplus
-
-#if 0
-Context ti_cblas_ocl_context;
-std::vector<Device> ti_cblas_ocl_devices;
-CommandQueue ti_cblas_ocl_Q;
-Program::Binaries ti_cblas_ocl_binary;
-Program ti_cblas_ocl_program;
-Kernel* ti_cblas_ocl_kernels[TI_CBLAS_NUM_KERNELS];
-#else
-Context* ti_cblas_ocl_context = NULL;
-std::vector<Device>* ti_cblas_ocl_devices = NULL;
-CommandQueue* ti_cblas_ocl_Q = NULL;
-Program::Binaries* ti_cblas_ocl_binary = NULL;
-Program* ti_cblas_ocl_program = NULL;
-#endif
+Context* ti_cblas_ocl_context = NULL;
+std::vector<Device>* ti_cblas_ocl_devices = NULL;
+CommandQueue* ti_cblas_ocl_Q = NULL;
+Program::Binaries* ti_cblas_ocl_binary = NULL;
+//Program* ti_cblas_ocl_program = NULL;
+Program ti_cblas_ocl_program;
-#else
-cl_context ti_cblas_ocl_context;
-cl_command_queue ti_cblas_ocl_Q;
-cl_program ti_cblas_ocl_program;
-cl_kernel ti_cblas_ocl_kernels[TI_CBLAS_NUM_KERNELS];
-#endif
int ti_cblas_init_done = 0; /* flag to check if init is complete */
int ti_cblas_disable_debug = 0; /* runtime toggle to disable debug */
int ti_cblas_offload = TI_CBLAS_OFFLOAD_SIZE;
fprintf(stderr, "ERROR: (%s,%d)\n", msg, code);
}
-#ifdef __cplusplus
extern "C"
-#endif
int ti_blis_init(void)
{
- int r_val = 1;
- TI_CBLAS_DEBUG_PRINT("Initializing BLIS ARM\n");
- bli_init();
- TI_CBLAS_DEBUG_PRINT("BLIS ARM initialized\n");
-
-#ifdef __cplusplus
- Event e;
- Kernel* __K;
-#else
- cl_kernel __K;
-#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGEMM_IDX, "ocl_bli_init");
-#ifdef __cplusplus
- try
-#else
- cl_int err = CL_SUCCESS;
-#endif
- {
- void *msmc_ptr;
- TI_CBLAS_DEBUG_PRINT("Initializing BLIS DSP\n");
- msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
-#ifdef __cplusplus
- Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
- __K->setArg(0, buf_MSMC);
+ int r_val = 1;
+ TI_CBLAS_DEBUG_PRINT("Initializing BLIS ARM\n");
+ bli_init();
+ TI_CBLAS_DEBUG_PRINT("BLIS ARM initialized\n");
-#else
- cl_mem buf_MSMC = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr, &err);
- TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 0, sizeof(buf_MSMC), &buf_MSMC);
- TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
-#endif
+ Event e;
+ Kernel* __K;
+
+ __K = ti_cblas_get_kernel("ocl_bli_init");
+
+ try
+ {
+ void *msmc_ptr;
+ TI_CBLAS_DEBUG_PRINT("Initializing BLIS DSP\n");
+ msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
+ Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
+ __K->setArg(0, buf_MSMC);
-#ifdef __cplusplus
- __K->setArg(1, __local(L2_BUF_SIZE));
-#else
- err |= clSetKernelArg(__K, 1, L2_BUF_SIZE, NULL);
-#endif
+ __K->setArg(1, __local(L2_BUF_SIZE));
-#ifdef __cplusplus
- ti_cblas_ocl_Q->enqueueTask(*__K, 0, &e);
- e.wait();
-#else
- cl_event e;
- err |= clEnqueueTask(ti_cblas_ocl_Q, __K, 0, 0, &e);
- TI_CBLAS_OCL_CHKERROR("clEnqueueTask",err);
- err |= clWaitForEvents(1, &e);
- TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
- err |= clReleaseEvent(e);
- TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
+ ti_cblas_ocl_Q->enqueueTask(*__K, 0, &e);
+ e.wait();
-#endif
- ti_cblas_mem_free(msmc_ptr);
- ti_cblas_delete_kernel(__K);
- TI_CBLAS_DEBUG_PRINT("BLIS DSP initialized\n");
-
- }
-#ifdef __cplusplus
- catch (Error err)
- {
- ti_cblas_error(err.what(),err.err());
- r_val = 1;
- return r_val;
- }
-#endif
+ ti_cblas_mem_free(msmc_ptr);
+ ti_cblas_delete_kernel(__K);
+ TI_CBLAS_DEBUG_PRINT("BLIS DSP initialized\n");
+
+ }
+
+ catch (Error err)
+ {
+ ti_cblas_error(err.what(),err.err());
+ r_val = 1;
+ return r_val;
+ }
}
-#ifdef __cplusplus
extern "C"
-#endif
int ti_blis_finalize(void)
{
- int r_val = 1;
- bli_finalize();
+ int r_val = 1;
+ bli_finalize();
-#ifdef __cplusplus
- Event e;
- Kernel* __K;
-#else
- cl_kernel __K;
-#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGEMM_IDX, "ocl_bli_finalize");
-#ifdef __cplusplus
- try
-#else
- cl_int err = CL_SUCCESS;
-#endif
- {
-#ifdef __cplusplus
- ti_cblas_ocl_Q->enqueueTask(*__K, 0, &e);
- e.wait();
-#else
- cl_event e;
- err |= clEnqueueTask(ti_cblas_ocl_Q, __K, 0, 0, &e);
- TI_CBLAS_OCL_CHKERROR("clEnqueueTask",err);
- err |= clWaitForEvents(1, &e);
- TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
- err |= clReleaseEvent(e);
- TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
+ Event e;
+ Kernel* __K;
+ __K = ti_cblas_get_kernel("ocl_bli_finalize");
-#endif
- ti_cblas_delete_kernel(__K);
- }
-#ifdef __cplusplus
- catch (Error err)
- {
- ti_cblas_error(err.what(),err.err());
- r_val = 1;
- return r_val;
- }
-#endif
+ try
+ {
+ ti_cblas_ocl_Q->enqueueTask(*__K, 0, &e);
+ e.wait();
+ ti_cblas_delete_kernel(__K);
+ }
+
+ catch (Error err)
+ {
+ ti_cblas_error(err.what(),err.err());
+ r_val = 1;
+ return r_val;
+ }
}
-#ifdef __cplusplus
extern "C"
-#endif
int ti_cblas_finalize(void)
{
- int r_val = 1;
- //printf("ti_cblas_finalize\n");
+ int r_val = 1;
+ //printf("ti_cblas_finalize\n");
- /* If ti_cblas_init_done is equal to 0,
- * then we know that ti_cblas_init was not called,
- * and so we can return early.
- */
- if(ti_cblas_init_done == 0)
- return 0;
+ /* If ti_cblas_init_done is equal to 0,
+ * then we know that ti_cblas_init was not called,
+ * and so we can return early.
+ */
+ if(ti_cblas_init_done == 0)
+ return 0;
- //r_val = ti_blis_finalize();
- /*Using same name as ti_cblas_init critical region. See notes in bli_init*/
+ //r_val = ti_blis_finalize();
+ /*Using same name as ti_cblas_init critical region. See notes in bli_init*/
#pragma omp critical (ti_cblas_init_critical)
- {
- if (ti_cblas_init_done == 1)
- {
- // Destroy Pthread
- pthread_mutex_destroy(&MUTEX);
- pthread_cond_destroy (&CV);
-
- //destroy Command queue, program, devices and context.
- if(ti_cblas_ocl_Q != NULL)
- {
- delete(ti_cblas_ocl_Q);
- ti_cblas_ocl_Q = NULL;
- }
- if(ti_cblas_ocl_program != NULL)
- {
- delete(ti_cblas_ocl_program);
- ti_cblas_ocl_program = NULL;
- }
- if(ti_cblas_ocl_binary != NULL)
- {
- delete(ti_cblas_ocl_binary);
- ti_cblas_ocl_binary = NULL;
- }
- if(ti_cblas_ocl_devices != NULL)
- {
- delete(ti_cblas_ocl_devices);
- ti_cblas_ocl_devices = NULL;
- }
- if(ti_cblas_ocl_context != NULL)
- {
- delete(ti_cblas_ocl_context);
- ti_cblas_ocl_context = NULL;
- }
- ti_cblas_init_done = 0;
- r_val = 0;
- }
- }
- return r_val;
+ {
+ if (ti_cblas_init_done == 1)
+ {
+ // Destroy Pthread
+ pthread_mutex_destroy(&MUTEX);
+ pthread_cond_destroy (&CV);
+
+ //destroy Command queue, program, devices and context.
+ if(ti_cblas_ocl_Q != NULL)
+ {
+ delete(ti_cblas_ocl_Q);
+ ti_cblas_ocl_Q = NULL;
+ }
+/* if(ti_cblas_ocl_program != NULL)
+ {
+ delete(ti_cblas_ocl_program);
+ ti_cblas_ocl_program = NULL;
+ }*/
+ if(ti_cblas_ocl_binary != NULL)
+ {
+ delete(ti_cblas_ocl_binary);
+ ti_cblas_ocl_binary = NULL;
+ }
+ if(ti_cblas_ocl_devices != NULL)
+ {
+ delete(ti_cblas_ocl_devices);
+ ti_cblas_ocl_devices = NULL;
+ }
+ if(ti_cblas_ocl_context != NULL)
+ {
+ delete(ti_cblas_ocl_context);
+ ti_cblas_ocl_context = NULL;
+ }
+ ti_cblas_init_done = 0;
+ r_val = 0;
+ }
+ }
+ return r_val;
}
void ti_cblas_auto_finalize(void)
}
}
- /* 3-digit value: 012
- * Left-most digit => L1 (0)
- * Middle-digit => L2 (1)
- * Right-most => L3 (2)
- */
- TI_CBLAS_L1_OFFLOAD = ti_cblas_offload / 100;
- int tmp_offload = ti_cblas_offload % 100;
- TI_CBLAS_L2_OFFLOAD = tmp_offload / 10;
- TI_CBLAS_L3_OFFLOAD = tmp_offload % 10;
- TI_CBLAS_DEBUG_PRINT("BLAS Offload values: L1=%d, L2=%d, L3=%d\n",
- TI_CBLAS_L1_OFFLOAD, TI_CBLAS_L2_OFFLOAD, TI_CBLAS_L3_OFFLOAD);
- if ((TI_CBLAS_L1_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE)) {
- TI_CBLAS_ERROR_EXIT("Size-based offload NOT supported for BLAS Level 1 yet.\n");
- }
- if ((TI_CBLAS_L2_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE)) {
- TI_CBLAS_ERROR_EXIT("Size-based offload NOT supported for BLAS Level 2 yet.\n");
- }
+ /* 3-digit value: 012
+ * Left-most digit => L1 (0)
+ * Middle-digit => L2 (1)
+ * Right-most => L3 (2)
+ */
+ TI_CBLAS_L1_OFFLOAD = ti_cblas_offload / 100;
+ int tmp_offload = ti_cblas_offload % 100;
+ TI_CBLAS_L2_OFFLOAD = tmp_offload / 10;
+ TI_CBLAS_L3_OFFLOAD = tmp_offload % 10;
+ TI_CBLAS_DEBUG_PRINT("BLAS Offload values: L1=%d, L2=%d, L3=%d\n",
+ TI_CBLAS_L1_OFFLOAD, TI_CBLAS_L2_OFFLOAD, TI_CBLAS_L3_OFFLOAD);
+ if ((TI_CBLAS_L1_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE)) {
+ TI_CBLAS_ERROR_EXIT("Size-based offload NOT supported for BLAS Level 1 yet.\n");
+ }
+ if ((TI_CBLAS_L2_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE)) {
+ TI_CBLAS_ERROR_EXIT("Size-based offload NOT supported for BLAS Level 2 yet.\n");
+ }
/*------------------------------------------------------------------------
- * Read the offline compiled kernel module
- *-----------------------------------------------------------------------*/
- TI_CBLAS_DEBUG_PRINT("Reading Kernels\n");
- const unsigned char* bin;
+ * Read the offline compiled kernel module
+ *-----------------------------------------------------------------------*/
+ TI_CBLAS_DEBUG_PRINT("Reading Kernels\n");
+ const unsigned char* bin;
#ifdef TI_CBLAS_FAT_BINARY
bin = (unsigned char *)ti_cblas_kernel_dsp_bin;
const size_t bin_length = ti_cblas_kernel_dsp_bin_len;
#else
const char binary[] = "./ti_cblas_kernel.out";
unsigned int bin_length;
-#ifdef __cplusplus
+
bin_length = ocl_read_binary(binary, (char*&)bin);
-#else
- FILE *fp = fopen(binary, "r");
- if (!fp) {
- TI_CBLAS_ERROR_EXIT("Could not open OpenCL pre-compiled binary %s for reading\n", binary);
- }
- struct stat fileinfo;
- stat(binary, &fileinfo);
- bin_length = fileinfo.st_size;
- bin = (char *)malloc(bin_length);
- if (!bin) {
- TI_CBLAS_ERROR_EXIT("Could not malloc of size %d for reading OpenCL binary\n", bin_length);
- }
- if (fread((char *)bin, bin_length, 1, fp) != 1) {
- TI_CBLAS_ERROR_EXIT("Could not read %d bytes of OpenCL binary\n", bin_length);
- }
- fclose(fp);
-#endif /* cplusplus */
#endif /* FAT_BINARY */
/* OpenCL init */
TI_CBLAS_DEBUG_PRINT("Initializing OpenCL\n");
-#ifdef __cplusplus
- ti_cblas_ocl_context = new Context(CL_DEVICE_TYPE_ACCELERATOR);
- ti_cblas_ocl_devices = new std::vector<Device> (ti_cblas_ocl_context->getInfo<CL_CONTEXT_DEVICES>());
- ti_cblas_ocl_binary = new Program::Binaries(1, std::make_pair(bin, bin_length));
- ti_cblas_ocl_program = new Program(*ti_cblas_ocl_context, *ti_cblas_ocl_devices, *ti_cblas_ocl_binary);
- ti_cblas_ocl_program->build(*ti_cblas_ocl_devices);
- ti_cblas_ocl_Q = new CommandQueue(*ti_cblas_ocl_context, ti_cblas_ocl_devices[0][0], CL_QUEUE_PROFILING_ENABLE);
-#else
- cl_int err;
- cl_device_id device;
- /* Create an in-order command queue by default*/
- int queue_flags = 0;
-#ifdef TI_CBLAS_PROFILE
- queue_flags |= CL_QUEUE_PROFILING_ENABLE;
-#endif
- ti_cblas_ocl_context = clCreateContextFromType(0,CL_DEVICE_TYPE_ACCELERATOR,0,0,&err);
- TI_CBLAS_OCL_CHKERROR("clCreateContextFromType",err);
- err = clGetDeviceIDs(0,CL_DEVICE_TYPE_ACCELERATOR,1,&device,0);
- TI_CBLAS_OCL_CHKERROR("clGetDeviceIDs",err);
- ti_cblas_ocl_Q = clCreateCommandQueue(ti_cblas_ocl_context, device, queue_flags, &err);
- TI_CBLAS_OCL_CHKERROR("clCreateCommandQueue",err);
- ti_cblas_ocl_program = clCreateProgramWithBinary(ti_cblas_ocl_context, 1, &device, &bin_length, &bin, NULL, &err);
- TI_CBLAS_OCL_CHKERROR("clCreateProgramWithBinary",err);
- const char *compile_options = "";
- err = clBuildProgram(ti_cblas_ocl_program, 1, &device, compile_options, 0, 0);
- TI_CBLAS_OCL_CHKERROR("clBuildProgram",err);
-
-#endif
+ ti_cblas_ocl_context = new Context(CL_DEVICE_TYPE_ACCELERATOR);
+ ti_cblas_ocl_devices = new std::vector<Device> (ti_cblas_ocl_context->getInfo<CL_CONTEXT_DEVICES>());
+ ti_cblas_ocl_binary = new Program::Binaries(1, std::make_pair(bin, bin_length));
+ //ti_cblas_ocl_program = new Program(*ti_cblas_ocl_context, *ti_cblas_ocl_devices, *ti_cblas_ocl_binary);
+ ti_cblas_ocl_program = Program(*ti_cblas_ocl_context, *ti_cblas_ocl_devices, *ti_cblas_ocl_binary);
+ //ti_cblas_ocl_program->build(*ti_cblas_ocl_devices);
+ ti_cblas_ocl_program.build(*ti_cblas_ocl_devices);
+ ti_cblas_ocl_Q = new CommandQueue(*ti_cblas_ocl_context, ti_cblas_ocl_devices[0][0], CL_QUEUE_PROFILING_ENABLE);
#ifndef TI_CBLAS_FAT_BINARY
-#ifdef __cplusplus
delete [] bin;
-#else
- free((char*)bin);
-#endif
#endif /* FAT_BINARY */
- TI_CBLAS_DEBUG_PRINT("OpenCL initialized\n");
+ TI_CBLAS_DEBUG_PRINT("OpenCL initialized\n");
+
TI_CBLAS_DEBUG_PRINT("Initializing Pthreads\n");
- /* Initializing pthreads */
- pthread_cond_init (&CV, 0);
- pthread_mutex_init(&MUTEX, 0);
- TI_CBLAS_DEBUG_PRINT("Pthreads initialized\n");
-
- TI_CBLAS_DEBUG_PRINT("Initializing BLIS\n");
- ti_blis_init();
- TI_CBLAS_DEBUG_PRINT("BLIS initialized\n");
-
- atexit(ti_cblas_auto_finalize);
-
- TI_CBLAS_PROFILE_REPORT(" Initialization took %8.2f us\n", (float) clock_diff);
- ti_cblas_init_done = 1;
- TI_CBLAS_DEBUG_PRINT("ti_cblas_init: Finished OpenCL initialization\n");
- } //end of !ti_cblas_init_done
- } // End of critical section
- return;
+
+ /* Initializing pthreads */
+ pthread_cond_init (&CV, 0);
+ pthread_mutex_init(&MUTEX, 0);
+ TI_CBLAS_DEBUG_PRINT("Pthreads initialized\n");
+
+ TI_CBLAS_DEBUG_PRINT("Initializing BLIS\n");
+ ti_blis_init();
+ TI_CBLAS_DEBUG_PRINT("BLIS initialized\n");
+
+ atexit(ti_cblas_auto_finalize);
+
+ TI_CBLAS_PROFILE_REPORT(" Initialization took %8.2f us\n", (float) clock_diff);
+ ti_cblas_init_done = 1;
+ TI_CBLAS_DEBUG_PRINT("ti_cblas_init: Finished OpenCL initialization\n");
+ } //end of !ti_cblas_init_done
+
+ } // End of critical section
+
+ return;
}
void ti_cblas_mem_free(void *ptr)
{
- pthread_mutex_lock(&MUTEX);
- __free_msmc(ptr);
- pthread_cond_broadcast(&CV);
- pthread_mutex_unlock(&MUTEX);
+ pthread_mutex_lock(&MUTEX);
+ __free_msmc(ptr);
+ pthread_cond_broadcast(&CV);
+ pthread_mutex_unlock(&MUTEX);
}
void *ti_cblas_mem_alloc(size_t size)
{
- void *ptr;
- pthread_mutex_lock(&MUTEX);
- /*-------------------------------------------------------------------------
-
- * Loop in case of false signal after broadcast.
+ void *ptr;
+ pthread_mutex_lock(&MUTEX);
+ /*-------------------------------------------------------------------------
- *------------------------------------------------------------------------*/
- while ((ptr = __malloc_msmc(size)) == 0)
+ * Loop in case of false signal after broadcast.
- pthread_cond_wait(&CV, &MUTEX);
- pthread_mutex_unlock(&MUTEX);
- return ptr;
+ *------------------------------------------------------------------------*/
+ while ((ptr = __malloc_msmc(size)) == 0)
+ pthread_cond_wait(&CV, &MUTEX);
+ pthread_mutex_unlock(&MUTEX);
+ return ptr;
}
* function with index 'idx'. Initializes the handle if it's
* not been used before, otherwise returns earlier handle
*/
-#ifdef __cplusplus
-Kernel*
-#else
-cl_kernel
-#endif
-ti_cblas_get_kernel(int idx, const char *fname)
+Kernel* ti_cblas_get_kernel(const char *fname)
{
-#if 0
- if (!ti_cblas_kernel_valid[idx]) {
-#ifdef __cplusplus
- ti_cblas_ocl_kernels[idx] = new Kernel(ti_cblas_ocl_program, fname);
-#else
- cl_int err;
- ti_cblas_ocl_kernels[idx] = clCreateKernel(ti_cblas_ocl_program,fname,&err);
- TI_CBLAS_OCL_CHKERROR("clCreateKernel",err);
-#endif
- ti_cblas_kernel_valid[idx] = 1;
- }
- return ti_cblas_ocl_kernels[idx];
-#else
-#ifdef __cplusplus
- Kernel* __K;
-#else
- cl_kernel __K;
-#endif
-#ifdef __cplusplus
- __K = new Kernel(*ti_cblas_ocl_program, fname);
-#else
- cl_int err;
- __K = clCreateKernel(ti_cblas_ocl_program,fname,&err);
- TI_CBLAS_OCL_CHKERROR("clCreateKernel",err);
-#endif
+ Kernel* __K;
- return __K;
-#endif
+ TI_CBLAS_DEBUG_PRINT("In ti_cblas_get_kernel: to get kernel %s.\n", fname);
+ __K = new Kernel(ti_cblas_ocl_program, fname);
+ TI_CBLAS_DEBUG_PRINT("ti_cblas_get_kernel: kernel %s is obtained.\n", fname);
+
+ return __K;
}
-#ifdef __cplusplus
int ti_cblas_delete_kernel(Kernel* K)
-#else
-int ti_cblas_delete_kernel(cl_kernel K)
-#endif
{
-#ifdef __cplusplus
- if(K != NULL)
- {
- delete(K);
- K=NULL;
- }
-#else
- clReleaseKernel(K);
-#endif
- return 0;
-}
-
+ if(K != NULL){
+ delete(K);
+ K = NULL;
+ }
+ return 0;
+}
index afbf5c18a66d6d41e5e930fcb705a2bd4780ba34..71cf70833aaf7c6516aa88481b58e47c16abafdc 100644 (file)
void cblas_cgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_cgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_cgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_cgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_cgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_cgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
void cblas_chbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_chbmv_facade(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_chemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_chemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
@@ -84,11 +84,11 @@ kernel void ocl_cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_cher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda);
kernel void ocl_cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda)
{ cblas_cher2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_cher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_cherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_chpmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
@@ -111,14 +111,14 @@ kernel void ocl_cblas_csscal(const int N, const float alpha, global void *X, con
void cblas_cswap_facade(const int N, global void *X, const int incX, global void *Y, const int incY);
kernel void ocl_cblas_cswap(const int N, global void *X, const int incX, global void *Y, const int incY)
{ cblas_cswap_facade(N, X, incX, Y, incY); }
-void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_csymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_csyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_csyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_ctbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX)
@@ -132,20 +132,20 @@ kernel void ocl_cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_ctpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX);
kernel void ocl_cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX)
{ cblas_ctpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_ctrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_ctrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ctrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_ctrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_ctrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ctrsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_dasum_facade(const int N, global const double *X, const int incX, double *retval);
-kernel void ocl_cblas_dasum(const int N, global const double *X, const int incX, double *retval)
+void cblas_dasum_facade(const int N, global const double *X, const int incX, global double *retval);
+kernel void ocl_cblas_dasum(const int N, global const double *X, const int incX, global double *retval)
{ cblas_dasum_facade(N, X, incX, retval); }
void cblas_daxpy_facade(const int N, const double alpha, global const double *X, const int incX, global double *Y, const int incY);
kernel void ocl_cblas_daxpy(const int N, const double alpha, global const double *X, const int incX, global double *Y, const int incY)
@@ -153,14 +153,14 @@ kernel void ocl_cblas_daxpy(const int N, const double alpha, global const double
void cblas_dcopy_facade(const int N, global const double *X, const int incX, global double *Y, const int incY);
kernel void ocl_cblas_dcopy(const int N, global const double *X, const int incX, global double *Y, const int incY)
{ cblas_dcopy_facade(N, X, incX, Y, incY); }
-void cblas_ddot_facade(const int N, global const double *X, const int incX, global const double *Y, const int incY, double *retval);
-kernel void ocl_cblas_ddot(const int N, global const double *X, const int incX, global const double *Y, const int incY, double *retval)
+void cblas_ddot_facade(const int N, global const double *X, const int incX, global const double *Y, const int incY, global double *retval);
+kernel void ocl_cblas_ddot(const int N, global const double *X, const int incX, global const double *Y, const int incY, global double *retval)
{ cblas_ddot_facade(N, X, incX, Y, incY, retval); }
void cblas_dgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
{ cblas_dgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_dgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_dgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
@@ -168,8 +168,8 @@ kernel void ocl_cblas_dgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANS
void cblas_dger_facade(const enum CBLAS_ORDER order, const int M, const int N, const double alpha, global const double *X, const int incX, global const double *Y, const int incY, global double *A, const int lda);
kernel void ocl_cblas_dger(const enum CBLAS_ORDER order, const int M, const int N, const double alpha, global const double *X, const int incX, global const double *Y, const int incY, global double *A, const int lda)
{ cblas_dger_facade(order, M, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_dnrm2_facade(const int N, global const double *X, const int incX, double *retval);
-kernel void ocl_cblas_dnrm2(const int N, global const double *X, const int incX, double *retval)
+void cblas_dnrm2_facade(const int N, global const double *X, const int incX, global double *retval);
+kernel void ocl_cblas_dnrm2(const int N, global const double *X, const int incX, global double *retval)
{ cblas_dnrm2_facade(N, X, incX, retval); }
void cblas_drot_facade(const int N, global double *X, const int incX, global double *Y, const int incY, const double c, const double s);
kernel void ocl_cblas_drot(const int N, global double *X, const int incX, global double *Y, const int incY, const double c, const double s)
void cblas_dscal_facade(const int N, const double alpha, global double *X, const int incX);
kernel void ocl_cblas_dscal(const int N, const double alpha, global double *X, const int incX)
{ cblas_dscal_facade(N, alpha, X, incX); }
-void cblas_dsdot_facade(const int N, global const float *X, const int incX, global const float *Y, const int incY, double *retval);
-kernel void ocl_cblas_dsdot(const int N, global const float *X, const int incX, global const float *Y, const int incY, double *retval)
+void cblas_dsdot_facade(const int N, global const float *X, const int incX, global const float *Y, const int incY, global double *retval);
+kernel void ocl_cblas_dsdot(const int N, global const float *X, const int incX, global const float *Y, const int incY, global double *retval)
{ cblas_dsdot_facade(N, X, incX, Y, incY, retval); }
void cblas_dspmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *Ap, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *Ap, global const double *X, const int incX, const double beta, global double *Y, const int incY)
void cblas_dswap_facade(const int N, global double *X, const int incX, global double *Y, const int incY);
kernel void ocl_cblas_dswap(const int N, global double *X, const int incX, global double *Y, const int incY)
{ cblas_dswap_facade(N, X, incX, Y, incY); }
-void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_dsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_dsymv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
@@ -216,11 +216,11 @@ kernel void ocl_cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_dsyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *X, const int incX, global const double *Y, const int incY, global double *A, const int lda);
kernel void ocl_cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *X, const int incX, global const double *Y, const int incY, global double *A, const int lda)
{ cblas_dsyr2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_dsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_dsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_dtbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const double *A, const int lda, global double *X, const int incX)
@@ -234,62 +234,62 @@ kernel void ocl_cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_dtpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *Ap, global double *X, const int incX);
kernel void ocl_cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *Ap, global double *X, const int incX)
{ cblas_dtpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_dtrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_dtrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX)
{ cblas_dtrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_dtrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_dtrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX)
{ cblas_dtrsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_dzasum_facade(const int N, global const void *X, const int incX, double *retval);
-kernel void ocl_cblas_dzasum(const int N, global const void *X, const int incX, double *retval)
+void cblas_dzasum_facade(const int N, global const void *X, const int incX, global double *retval);
+kernel void ocl_cblas_dzasum(const int N, global const void *X, const int incX, global double *retval)
{ cblas_dzasum_facade(N, X, incX, retval); }
-void cblas_dznrm2_facade(const int N, global const void *X, const int incX, double *retval);
-kernel void ocl_cblas_dznrm2(const int N, global const void *X, const int incX, double *retval)
+void cblas_dznrm2_facade(const int N, global const void *X, const int incX, global double *retval);
+kernel void ocl_cblas_dznrm2(const int N, global const void *X, const int incX, global double *retval)
{ cblas_dznrm2_facade(N, X, incX, retval); }
-void cblas_icamax_facade(const int N, global const void *X, const int incX, CBLAS_INDEX *retval);
-kernel void ocl_cblas_icamax(const int N, global const void *X, const int incX, CBLAS_INDEX *retval)
+void cblas_icamax_facade(const int N, global const void *X, const int incX, global CBLAS_INDEX *retval);
+kernel void ocl_cblas_icamax(const int N, global const void *X, const int incX, global CBLAS_INDEX *retval)
{ cblas_icamax_facade(N, X, incX, retval); }
-void cblas_idamax_facade(const int N, global const double *X, const int incX, CBLAS_INDEX *retval);
-kernel void ocl_cblas_idamax(const int N, global const double *X, const int incX, CBLAS_INDEX *retval)
+void cblas_idamax_facade(const int N, global const double *X, const int incX, global CBLAS_INDEX *retval);
+kernel void ocl_cblas_idamax(const int N, global const double *X, const int incX, global CBLAS_INDEX *retval)
{ cblas_idamax_facade(N, X, incX, retval); }
-void cblas_isamax_facade(const int N, global const float *X, const int incX, CBLAS_INDEX *retval);
-kernel void ocl_cblas_isamax(const int N, global const float *X, const int incX, CBLAS_INDEX *retval)
+void cblas_isamax_facade(const int N, global const float *X, const int incX, global CBLAS_INDEX *retval);
+kernel void ocl_cblas_isamax(const int N, global const float *X, const int incX, global CBLAS_INDEX *retval)
{ cblas_isamax_facade(N, X, incX, retval); }
-void cblas_izamax_facade(const int N, global const void *X, const int incX, CBLAS_INDEX *retval);
-kernel void ocl_cblas_izamax(const int N, global const void *X, const int incX, CBLAS_INDEX *retval)
+void cblas_izamax_facade(const int N, global const void *X, const int incX, global CBLAS_INDEX *retval);
+kernel void ocl_cblas_izamax(const int N, global const void *X, const int incX, global CBLAS_INDEX *retval)
{ cblas_izamax_facade(N, X, incX, retval); }
-void cblas_sasum_facade(const int N, global const float *X, const int incX, float *retval);
-kernel void ocl_cblas_sasum(const int N, global const float *X, const int incX, float *retval)
+void cblas_sasum_facade(const int N, global const float *X, const int incX, global float *retval);
+kernel void ocl_cblas_sasum(const int N, global const float *X, const int incX, global float *retval)
{ cblas_sasum_facade(N, X, incX, retval); }
void cblas_saxpy_facade(const int N, const float alpha, global const float *X, const int incX, global float *Y, const int incY);
kernel void ocl_cblas_saxpy(const int N, const float alpha, global const float *X, const int incX, global float *Y, const int incY)
{ cblas_saxpy_facade(N, alpha, X, incX, Y, incY); }
-void cblas_scasum_facade(const int N, global const void *X, const int incX, float *retval);
-kernel void ocl_cblas_scasum(const int N, global const void *X, const int incX, float *retval)
+void cblas_scasum_facade(const int N, global const void *X, const int incX, global float *retval);
+kernel void ocl_cblas_scasum(const int N, global const void *X, const int incX, global float *retval)
{ cblas_scasum_facade(N, X, incX, retval); }
-void cblas_scnrm2_facade(const int N, global const void *X, const int incX, float *retval);
-kernel void ocl_cblas_scnrm2(const int N, global const void *X, const int incX, float *retval)
+void cblas_scnrm2_facade(const int N, global const void *X, const int incX, global float *retval);
+kernel void ocl_cblas_scnrm2(const int N, global const void *X, const int incX, global float *retval)
{ cblas_scnrm2_facade(N, X, incX, retval); }
void cblas_scopy_facade(const int N, global const float *X, const int incX, global float *Y, const int incY);
kernel void ocl_cblas_scopy(const int N, global const float *X, const int incX, global float *Y, const int incY)
{ cblas_scopy_facade(N, X, incX, Y, incY); }
-void cblas_sdot_facade(const int N, global const float *X, const int incX, global const float *Y, const int incY, float *retval);
-kernel void ocl_cblas_sdot(const int N, global const float *X, const int incX, global const float *Y, const int incY, float *retval)
+void cblas_sdot_facade(const int N, global const float *X, const int incX, global const float *Y, const int incY, global float *retval);
+kernel void ocl_cblas_sdot(const int N, global const float *X, const int incX, global const float *Y, const int incY, global float *retval)
{ cblas_sdot_facade(N, X, incX, Y, incY, retval); }
-void cblas_sdsdot_facade(const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, float *retval);
-kernel void ocl_cblas_sdsdot(const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, float *retval)
+void cblas_sdsdot_facade(const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *retval);
+kernel void ocl_cblas_sdsdot(const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *retval)
{ cblas_sdsdot_facade(N, alpha, X, incX, Y, incY, retval); }
void cblas_sgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_sgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
{ cblas_sgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_sgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_sgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_sgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
@@ -297,8 +297,8 @@ kernel void ocl_cblas_sgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANS
void cblas_sger_facade(const enum CBLAS_ORDER order, const int M, const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *A, const int lda);
kernel void ocl_cblas_sger(const enum CBLAS_ORDER order, const int M, const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *A, const int lda)
{ cblas_sger_facade(order, M, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_snrm2_facade(const int N, global const float *X, const int incX, float *retval);
-kernel void ocl_cblas_snrm2(const int N, global const float *X, const int incX, float *retval)
+void cblas_snrm2_facade(const int N, global const float *X, const int incX, global float *retval);
+kernel void ocl_cblas_snrm2(const int N, global const float *X, const int incX, global float *retval)
{ cblas_snrm2_facade(N, X, incX, retval); }
void cblas_srot_facade(const int N, global float *X, const int incX, global float *Y, const int incY, const float c, const float s);
kernel void ocl_cblas_srot(const int N, global float *X, const int incX, global float *Y, const int incY, const float c, const float s)
void cblas_sswap_facade(const int N, global float *X, const int incX, global float *Y, const int incY);
kernel void ocl_cblas_sswap(const int N, global float *X, const int incX, global float *Y, const int incY)
{ cblas_sswap_facade(N, X, incX, Y, incY); }
-void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_ssymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_ssymv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
@@ -342,11 +342,11 @@ kernel void ocl_cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_ssyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *A, const int lda);
kernel void ocl_cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *A, const int lda)
{ cblas_ssyr2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_ssyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_ssyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_stbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const float *A, const int lda, global float *X, const int incX)
@@ -360,14 +360,14 @@ kernel void ocl_cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_stpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *Ap, global float *X, const int incX);
kernel void ocl_cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *Ap, global float *X, const int incX)
{ cblas_stpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_strmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_strmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX)
{ cblas_strmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_strsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_strsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX)
@@ -393,8 +393,8 @@ kernel void ocl_cblas_zdscal(const int N, const double alpha, global void *X, co
void cblas_zgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_zgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_zgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
@@ -408,8 +408,8 @@ kernel void ocl_cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int
void cblas_zhbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zhbmv_facade(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_zhemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_zhemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
@@ -420,11 +420,11 @@ kernel void ocl_cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_zher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda);
kernel void ocl_cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda)
{ cblas_zher2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_zher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_zherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_zhpmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
@@ -444,14 +444,14 @@ kernel void ocl_cblas_zscal(const int N, global const void *alpha, global void *
void cblas_zswap_facade(const int N, global void *X, const int incX, global void *Y, const int incY);
kernel void ocl_cblas_zswap(const int N, global void *X, const int incX, global void *Y, const int incY)
{ cblas_zswap_facade(N, X, incX, Y, incY); }
-void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_zsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_zsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_zsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_ztbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX)
@@ -465,14 +465,14 @@ kernel void ocl_cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_ztpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX);
kernel void ocl_cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX)
{ cblas_ztpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_ztrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_ztrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ztrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
-kernel void ocl_cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
+kernel void ocl_cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
{ cblas_ztrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_ztrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
index b8dc85432d662e18d0882e2b0860c415704fa2b0..d207e037c8dfa385f7de1d60d36fdbab11459d74 100644 (file)
}
}
+ printf("New L1D SRAM size is: %d\n", lib_get_L1D_SRAM_size());
+
/* Configure L2 if necessary */
*l2_SRAM_size_orig = lib_get_L2_SRAM_size(); /* get current L2 SRAM size */
l2_cfg_err = LIB_CACHE_SUCCESS;
if(l1d_cfg_err || l2_cfg_err) {
return(-3);
}
+
+ printf("New L2 SRAM size is: %d\n", lib_get_L2_SRAM_size());
/* get L1D and L2 SRAM base address */
l1d_SRAM_ptr = lib_get_L1D_SRAM_base();
index 7748301bba758cd1c6374ac07ba281b8c64d6c8f..df2abc1495169b6b3d8b9329812b7c2bdce751c5 100755 (executable)
#else
cl_kernel __K;
#endif
- __K = ${namespace}_get_kernel($trampdef, \"ocl_$trampname\");
+ __K = ${namespace}_get_kernel(\"ocl_$trampname\");
#ifdef __cplusplus
try
#else
extern void ${namespace}_error(const char* msg, int code);
extern void ${namespace}_init(void);
#ifdef __cplusplus
-extern Kernel* ${namespace}_get_kernel(int idx, const char *fname);
+extern Kernel* ${namespace}_get_kernel(const char *fname);
extern Context ${namespace}_ocl_context;
extern std::vector<Device> ${namespace}_ocl_devices;
extern CommandQueue ${namespace}_ocl_Q;
extern Program ${namespace}_ocl_program;
extern Kernel* ${namespace}_ocl_kernels[];
#else
-extern cl_kernel ${namespace}_get_kernel(int idx, const char *fname);
+extern cl_kernel ${namespace}_get_kernel(const char *fname);
extern cl_context ${namespace}_ocl_context;
extern cl_command_queue ${namespace}_ocl_Q;
extern cl_program ${namespace}_ocl_program;
index 1d04c6a4a588065f0c58c0448cfd2681b6471029..444819e15c0cce011796805f53f6719af2ca43d7 100755 (executable)
//#include <ti/csl/csl_cacheAux.h> // CACHE_invL1d
// for __clock64()
-#include <dsp_c.h>
-
+//#include <dsp_c.h>
// -- EDMA ---------------------------------------------------------------------
#define BLIS_ENABLE_C66X_EDMA
index 05ea46c0389ba2b8d652982722dc773e0d69776d..252f79ca219eceaafeff21488da5b29838b2139c 100755 (executable)
# NOTE: This is needed to enable posix_memalign().
CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L
CMISCFLAGS := --c99
-#CMISCFLAGS += -I$(TI_OCL_CGT_INSTALL)/include
CMISCFLAGS += -I$(OMP_DIR)/packages/ti/runtime/openmp
CMISCFLAGS += -I$(FC_DIR)/packages
CMISCFLAGS += -I$(XDC_DIR)/packages
CMISCFLAGS += -I$(BIOS_DIR)/packages
CMISCFLAGS += -I$(XDAIS_DIR)/packages
CMISCFLAGS += -I$(LIBARCH_DIR)
-CMISCFLAGS += -I$(TI_OCL_CGT_INSTALL)/include
-CMISCFLAGS += -I$(TI_OCL_INSTALL_DIR)
+CMISCFLAGS += -I$(CGTROOT)/include
CMISCFLAGS += -I$(PDK_DIR)/packages
-CMISCFLAGS += -mv6600 --use_g2 --omp -DSOC_K2H -DLIB_OPENCL #-std=c99 # -fopenmp -pg
+CMISCFLAGS += -mv6600 --use_g2 --omp
-ifeq ($(mem_model),Large)
-CMISCFLAGS += -DMEM_MODEL_LARGE
-else ifeq ($(mem_model),Small)
-CMISCFLAGS += -DMEM_MODEL_SMALL
-else ifeq ($(mem_model),Medium)
-CMISCFLAGS += -DMEM_MODEL_MEDIUM
+ifeq ($(LIBOS),LIB_OPENCL)
+CMISCFLAGS += -I$(TI_OCL_INSTALL_DIR)
endif
+ifeq ($(MEM_MODEL),Large)
+BLIS_MEM_MODEL = MEM_MODEL_LARGE
+else ifeq ($(MEM_MODEL),Medium)
+BLIS_MEM_MODEL = MEM_MODEL_MEDIUM
+else ifeq ($(MEM_MODEL),Small)
+BLIS_MEM_MODEL = MEM_MODEL_SMALL
+endif
+
+CMISCFLAGS += -D$(BLIS_MEM_MODEL) -D$(TARGET) -D$(LIBOS)
+
CDBGFLAGS := -s -k -mw
CWARNFLAGS :=
COPTFLAGS := -O2
index b09d5288fec222022bbefa67bbb84895154a3f73..af1ba8d8600fe8c28ff743b9ea117221b8762e54 100644 (file)
// Check parameters.
if ( bli_error_checking_is_enabled() )
- bli_gemm_int_check( alpha, a, b, beta, c, cntl );
+ bli_gemm_int_check( alpha, a, b, beta, c, cntl ); // creating the errors. print sizes of a,b,c
// If C has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *c ) ) return;
i = cntl_impl_type( cntl );
// Index into the variant array to extract the correct function pointer.
- f = vars[n][i];
+ f = vars[n][i]; // print out n and i
// Invoke the variant.
#if defined(BLIS_ENABLE_PROFILE)
#if defined(BLIS_ENABLE_C66X_BUILD)
TSCL = 0;
- counter_start = __clock64();
+ counter_start = lib_clock64();
#else
counter_start = (uint64_t) (bli_clock()*1.2e9);
#endif
#if defined(BLIS_ENABLE_PROFILE)
#if defined(BLIS_ENABLE_C66X_BUILD)
- counter_end = __clock64();
+ counter_end = lib_clock64();
#else
counter_end = (uint64_t) (bli_clock()*1.2e9);
#endif
index 1ffd0a8eee489bb843ebe49d6ea7de8b57765945..72712bbf69ffb5b47f09507e4bea0149d328995e 100644 (file)
/* initiate first c transfer */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
n_cur = ( bli_is_not_edge_f( jr_thread_id, n_iter, n_left ) ? NR : n_left ); \
if(cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \
{ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*m_cur*k*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND],\
(counter_end_mr-counter_start_mr), (uint64_t) 2*m*k*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
(counter_end_nr-counter_start_nr), 2*m*k*n); \
} \
/* initiate first c transfer */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
\
for ( j = jr_thread_id; j < n_iter; j += jr_num_threads ) \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \
{ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
(counter_end_ker-counter_start_ker), 2*m_cur*k*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*m*k*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_gemm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
(counter_end_nr-counter_start_nr), 2*m*k*n); \
} \
index f8fdce5e3bfbb90d9a3fe1deb0acbb3af53bd0ec..b548fef4d368bde329d71ce8dea48056ee685797 100644 (file)
#if defined(BLIS_ENABLE_C66X_BUILD)
TSCL = 0;
- counter_start = __clock64();
+ counter_start = lib_clock64();
#else
counter_start = (uint64_t) (bli_clock()*1.2e9);
#endif
#if defined(BLIS_ENABLE_PROFILE)
#if defined(BLIS_ENABLE_C66X_BUILD)
- counter_end = __clock64();
+ counter_end = lib_clock64();
#else // if not DSP
counter_end = (uint64_t) (bli_clock()*1.2e9);
#endif
index 2ced5caee22dd2fa4ff385d4729ceb42d7cef540..2af25bbcf0c2f36d786b2479703b9fa622c738ac 100644 (file)
diagoffc_j = diagoffc - (doff_t) jr_thread_id * NR; \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
/*if ( diagoffc_j < 0 ) \
{ \
}\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
/* Interior loop over the m dimension (MR rows at a time). */ \
for ( i = ir_thread_id; i < m_iter_new; i += ir_num_threads ) \
{ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
/* Invoke the gemm micro-kernel. */ \
gemm_ukr_cast( k, \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
{ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*mc_new*k*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
(counter_end_nr-counter_start_nr), 2*mc_new*k*n); \
} \
index d72fd63caf1ae4ad39ab09a7f844639f8d494d4d..12eafec36d962631133597d5823c890d48cfd13d 100644 (file)
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
\
if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
\
for ( i = ir_thread_id; i < m_iter; i += ir_num_threads ) \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
\
/* Invoke the gemm micro-kernel. */ \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
\
/* Handle interior and edge cases separately. */ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*m*k*n_cur); \
} \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_herk_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND],\
(counter_end_nr-counter_start_nr), 2*m*k*n); \
} \
index d3a3e3648bff5ebe846c105b66d159ab5ec3437e..71f38f9ed094affc089595d45f112145dba737cb 100644 (file)
#if defined(BLIS_ENABLE_C66X_BUILD)
TSCL = 0;
- counter_start = __clock64();
+ counter_start = lib_clock64();
#else
counter_start = (uint64_t) (bli_clock()*1.2e9);
#endif
#if defined(BLIS_ENABLE_PROFILE)
#if defined(BLIS_ENABLE_C66X_BUILD)
- counter_end = __clock64();
+ counter_end = lib_clock64();
#else
counter_end = (uint64_t) (bli_clock()*1.2e9);
#endif
index 1d67404ecaedff6ef3ef2ab5aab6282b59d432c6..883ae2a71980718fd6a75e4a8fb2bc3f337991e0 100644 (file)
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
\
n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
\
/* Loop over the m dimension (MR rows at a time). */ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
(counter_end_ker-counter_start_ker), 2*k_a1011*m_cur*n_cur); \
} \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND],\
(counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
} \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND],\
(counter_end_nr-counter_start_nr), 2*k*m*n); \
} \
index 2182d55765bf7433219b6371bf6ada7434ffdf72..e5c36e417d8d6bf4ce641549c3e5d72c9073f76e 100644 (file)
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
\
n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
\
/* Loop over the m dimension (MR rows at a time). */ \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
\
/* Handle interior and edge cases separately. */ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k_a1112*m_cur*n_cur); \
} \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
\
/* Handle interior and edge cases separately. */ \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
} /*for ( i = 0; i < m_iter; ++i )*/\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
} \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
(counter_end_nr-counter_start_nr), 2*k*m*n); \
} \
index c23083569b744ce94c14e96e13a8f7e0bc823fcc..1a102e40d6890459898860b55a22ba14ff31a9f0 100644 (file)
/* Loop over the n dimension (NR columns at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
/* Transfering MC(=m)xNR*/ \
if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = 0; i < m_iter; ++i ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker),2*k_b1121*m_cur*n_cur); \
} \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = 0; i < m_iter; ++i ) \
{ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
/*printf("gemm %d %d %d %ld\n", MR, NR, k, (counter_end_ker-counter_start_ker));*/ \
} /*for i*/\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
(counter_end_nr-counter_start_nr), 2*k*m*n); \
} \
index 6707817ce3c7c9effbe31111ce73439cec7c114d..8d23b56e30ceaefa99c3b2d58cc11f52322c58aa 100644 (file)
/* Loop over the n dimension (NR columns at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
/* Transfering MC(=m)xNR*/ \
if (cs_c*sizeof(ctype) < BLIS_C66X_MAXDMASTRIDE) \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = 0; i < m_iter; ++i ) \
{ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k_b0111*m_cur*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*k_b0111*m*n_cur); \
} \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = 0; i < m_iter; ++i ) \
{ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND], \
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_trmm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
(counter_end_nr-counter_start_nr), 2*k*m*n); \
} \
index 06cb179a7cd30020f754a980fa513fbc6c8274b0..050e962ae286542ae9d2601cb1c43fc5c3795c06 100644 (file)
#if defined(BLIS_ENABLE_C66X_BUILD)
TSCL = 0;
- counter_start = __clock64();
+ counter_start = lib_clock64();
#else
counter_start = (uint64_t) (bli_clock()*1.2e9);
#endif
#if defined(BLIS_ENABLE_PROFILE)
#if defined(BLIS_ENABLE_C66X_BUILD)
- counter_end = __clock64();
+ counter_end = lib_clock64();
#else
counter_end = (uint64_t) (bli_clock()*1.2e9);
#endif
index d6352535a093307d0851d96d0ef654d04bf8fdf8..9ae98b67ac2bbac863ed60de090c751c60816fe5 100644 (file)
n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
\
if(rs_c == 1) \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = 0; i < m_iter; ++i ) \
{ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k_b21*m_cur*n_cur); \
} \
} /*MR loop*/\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
} \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
\
for ( i = 0; i < m_iter; ++i ) \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if (BLIS_ENABLE_C66X_IDMA_KERVAR2 == 1) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
} \
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
(counter_end_nr-counter_start_nr), 2*k*m*n); \
} \
index c2b44f8801711969911b70c942a8f2bd69c654a7..14848dd60d9d1684d95b68de99c9d88a0d895013 100644 (file)
\
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_nr = __clock64(); \
+ counter_start_nr = lib_clock64(); \
} \
n_cur = ( bli_is_not_edge_f( 0, n_iter, n_left ) ? NR : n_left ); \
if(rs_c == 1) \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = 0; i < m_iter; ++i ) \
{ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k_b01*m_cur*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
} \
/* Loop over the m dimension (MR rows at a time). */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_mr = __clock64(); \
+ counter_start_mr = lib_clock64(); \
} \
for ( i = 0; i < m_iter; ++i ) \
{ \
/* Handle interior and edge cases separately. */ \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_start_ker = __clock64(); \
+ counter_start_ker = lib_clock64(); \
} \
if ( m_cur == MR && n_cur == NR ) \
{ \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_ker = __clock64(); \
+ counter_end_ker = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_KER_LOOP_IND],\
(counter_end_ker-counter_start_ker), 2*k*m_cur*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_mr = __clock64(); \
+ counter_end_mr = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_IR_LOOP_IND], \
(counter_end_mr-counter_start_mr), 2*k*m*n_cur); \
} \
} \
if (BLIS_ENABLE_PROFILE_KERVAR2 == 1) \
{ \
- counter_end_nr = __clock64(); \
+ counter_end_nr = lib_clock64(); \
bli_profile_data_update(bli_trsm_profile_data[bli_get_thread_num()+BLIS_MAX_NUM_THREADS*BLIS_PROFILE_JR_LOOP_IND], \
(counter_end_nr-counter_start_nr), 2*k*m*n); \
} \
diff --git a/make.inc b/make.inc
index be81de2881ecd1d20990041a2f53f23a951a0fb5..d32c84d87a8fb8b8d46c5e28387d7704665b59b5 100644 (file)
--- a/make.inc
+++ b/make.inc
#DSP_INCLUDE = -I$(TI_OCL_CGT_INSTALL)/include
-DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/cgt-c6x/include
-DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/opencl
+#DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/cgt-c6x/include
+#DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/opencl
#DSP_INCLUDE += -I$(TI_OCL_INSTALL_DIR)/include
CPP = g++
# is located on the build host and necessary ARM libraries are installed
# on that file system.
ifneq ($(MAKECMDGOALS),clean)
- ifeq ($(TARGET_ROOTDIR),)
- $(error Environment variable TARGET_ROOTDIR must be defined. Set it to point at the EVM root file system)
- endif
- ifeq ($(OMP_DIR),)
- $(error Environment variable OMP_DIR must be defined. Set it to point at the OpenMP for DSP Installation Directory, or run the setup_hpc_env script available in the MCSDK-HPC installation)
- endif
+ ifneq ($(LIBOS),LIB_RTOS)
+ ifeq ($(TARGET_ROOTDIR),)
+ $(error Environment variable TARGET_ROOTDIR must be defined. Set it to point at the EVM root file system)
+ endif
+ endif
+
+ ifeq ($(OMP_DIR),)
+ $(error Environment variable OMP_DIR must be defined. Set it to point at the OpenMP for DSP Installation Directory, or run the setup_hpc_env script available in the MCSDK-HPC installation)
+ endif
endif
# gcc ARM cross compiler will not, by default, search the host's
$$(info Using $(1) = $$($(1)))
endef
+# CBLAS and BLIS directories
+CBLAS_DSP_LIB = ../../cblas/lib/C66/libcblas_C66.ae66
+TICBLAS_DSP_LIB = ../../ticblas/lib/libticblas.ae66
+CBLAS_ARM_LIB = ../../cblas/lib/ARM/libcblas_ARM.a
+LIBARCH_LIB = $(LIBARCH_DIR)/lib/libArch.a66x
+
+ifeq ($(MEM_MODEL),Large)
+BLIS_DSP_LIB = ../../blis/install/c66xLarge/lib/libblis.ae66
+else ifeq ($(MEM_MODEL),Medium)
+BLIS_DSP_LIB = ../../blis/install/c66xMedium/lib/libblis.ae66
+else ifeq ($(MEM_MODEL),Small)
+BLIS_DSP_LIB = ../../blis/install/c66xSmall/lib/libblis.ae66
+#else ifeq ($(MEM_MODEL),Tiny)
+endif
+
%.o: %.cpp
@echo Compiling $<
$(CPP) -c $(CPP_FLAGS) $<
diff --git a/setup_env.sh b/setup_env.sh
index 3fbb6dbb00b7a0d0085a7c58739a1b64dfda8a25..52d7de3e7f938af1c32e7b8574e529e293d24938 100644 (file)
--- a/setup_env.sh
+++ b/setup_env.sh
export TI_OCL_INSTALL="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm"
export TI_OCL_INSTALL_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/opencl"
export TI_OCL_CGT_INSTALL="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/share/ti/cgt-c6x"
-export PDK_DIR=/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-pdk-tree
-export FC_DIR=/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-framework-components-tree
-export XDAIS_DIR=/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-xdais-tree
+export PDK_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-pdk-tree"
+export FC_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-framework-components-tree"
+export XDAIS_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-xdais-tree"
export XDC_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-xdctools-tree"
export BIOS_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-sysbios-tree"
-export OMP_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-omp-tree"
-export LIBARCH_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-libarch-tree"
+#export OMP_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-omp-tree"
+export OMP_DIR="/home/a0869574local/ti/openmp_dsp_2_02_00_01"
+#export LIBARCH_DIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-libarch-tree"
+export LIBARCH_DIR="/home/a0869574local/proclibs/libarch_intgit/libarch"
export TARGET_ROOTDIR="/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm"
#export XDC_DIR=/home/a0869574local/tisdk/build/arago-tmp-external-linaro-toolchain/sysroots/k2hk-evm/usr/share/ti/ti-xdctools-tree
diff --git a/setup_env_rtos_yocto.sh b/setup_env_rtos_yocto.sh
--- /dev/null
+++ b/setup_env_rtos_yocto.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+export BIOS_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-sysbios-tree"
+export IPC_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-ipc-tree"
+export XDC_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-xdctools-tree"
+export OMP_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-omp-tree"
+export C6678_PDK_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-pdk-tree"
+export PDK_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-pdk-tree"
+export CGTROOT="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/share/ti/cgt-c6x"
+export XDAIS_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-xdais-tree"
+export FC_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-framework-components-tree"
+#export LIBARCH_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-libarch-tree"
+export LIBARCH_DIR="/home/a0869574local/proclibs/libarch_intgit/libarch"
+export EDMA3_DIR="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/share/ti/ti-edma3lld-tree"
+export PATH="/home/a0869574local/yocoto/tisdk-rtos/sources/oe-core/scripts:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/bin/arm-linux-gnueabi:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/bin/crossscripts:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/sbin:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/bin:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/sbin:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/bin:/home/a0869574local/gcc-linaro-4.9-2015.05-x86_64_arm-linux-gnueabihf/bin:/home/a0869574local/gcc-linaro-4.9-2015.05-x86_64_arm-linux-gnueabihf/bin:/home/a0869574local/yocoto/tisdk-rtos/sources/oe-core/scripts:/home/a0869574local/yocoto/tisdk-rtos/sources/bitbake/bin:/home/a0869574local/gcc-linaro-4.9-2015.05-x86_64_arm-linux-gnueabihf/bin:/home/a0869574local/yocoto/tisdk-rtos/sources/oe-core/scripts:/home/a0869574local/yocoto/tisdk-rtos/sources/bitbake/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games"
diff --git a/ticblas/src/Makefile b/ticblas/src/Makefile
index 0ff8ce153b46e04635035fc33452295806bded33..60bc2d61ac51576847777a98553364c76dccc43a 100644 (file)
--- a/ticblas/src/Makefile
+++ b/ticblas/src/Makefile
include ../../make.inc
-include ./make.inc
-
-INCDIR := $(TI_OCL_CGT_INSTALL)/include
+INCDIR := $(CGTROOT)/include
INCDIR += -I$(OMP_DIR)/packages/ti/runtime/openmp
INCDIR += -I$(FC_DIR)/packages
INCDIR += -I$(XDC_DIR)/packages
INCDIR += -I$(LIBARCH_DIR)
INCDIR += -I$(PDK_DIR)/packages
-ifeq ($(mem_model),Large)
+ifeq ($(LIBOS),LIB_OPENCL)
+INCDIR += -I$(TI_OCL_INSTALL_DIR)
+endif
+
+ifeq ($(MEM_MODEL),Large)
BLIS_INC = ../../blis/install/c66xLarge/include/blis/
BLIS_MEM_MODEL = MEM_MODEL_LARGE
-else ifeq ($(mem_model),Medium)
+else ifeq ($(MEM_MODEL),Medium)
BLIS_INC = ../../blis/install/c66xMedium/include/blis/
BLIS_MEM_MODEL = MEM_MODEL_MEDIUM
-else ifeq ($(mem_model),Small)
+else ifeq ($(MEM_MODEL),Small)
BLIS_INC = ../../blis/install/c66xSmall/include/blis/
BLIS_MEM_MODEL = MEM_MODEL_SMALL
-#else ifeq ($(mem_model),Tiny)
+else ifeq ($(MEM_MODEL),Tiny)
BLIS_INC = ../../blis/install/c66xTiny/include/blis/
BLIS_MEM_MODEL = MEM_MODEL_TINY
endif
INCS = -I. -I$(strip $(subst ;, -I,$(subst $(space),$(space),$(INCDIR))))
-CL6X_FLAGS = $(INCS) --openmp --use_g2 -DSOC_K2H -DLIB_OPENCL -D$(BLIS_MEM_MODEL)
-#CL6X_FLAGS = $(INCS) --openmp --use_g2
+CL6X_FLAGS = $(INCS) --openmp --use_g2 -D$(TARGET) -D$(LIBOS) -D$(BLIS_MEM_MODEL)
+
+DSP_LIB_DIR = ../lib
+DSP_LIB = $(DSP_LIB_DIR)/libticblas.ae66
+
+OBJS = ticblas.obj
+
+all: lib
+cross: lib
+
+lib: $(OBJS)
+ @echo; echo "Building DSP lib: $(DSP_LIB)"
+ mkdir -p $(DSP_LIB_DIR)
+ $(AR) -cr $(DSP_LIB) $(OBJS)
+
+clean::
+ rm $(DSP_LIB)
\ No newline at end of file
diff --git a/ticblas/src/make.inc b/ticblas/src/make.inc
index aa15c2838def1bb7b23aadb162babb52f64573c2..bc5b63916ade198dc95abcf577452893be07b5fe 100644 (file)
--- a/ticblas/src/make.inc
+++ b/ticblas/src/make.inc
# Defines
DSP_LIB_DIR = ../lib
-DSP_LIB = $(DSP_LIB_DIR)/libticblas.a66x
+DSP_LIB = $(DSP_LIB_DIR)/libticblas.ae66
OBJS = ticblas.obj
diff --git a/ticblas/src/ticblas.c b/ticblas/src/ticblas.c
index 8c9043195c09abddb59b26937b95dd81f69a449d..545b54ef5f07aae0d0e7125fc58f66944274bde3 100644 (file)
--- a/ticblas/src/ticblas.c
+++ b/ticblas/src/ticblas.c
#include "../ticblas.h"\r
#include "blis.h"\r
\r
-#define BLAS_LEVEL3_L1DSRAM_SIZE (28*1024)\r
-#define BLAS_LEVEL3_L2SRAM_SIZE (0xBFE00)\r
-#define BLAS_LEVEL3_MSMC_SIZE (0x47FDC0)\r
+#define BLAS_LEVEL3_L1DSRAM_SIZE (28*1024UL)\r
+\r
+#ifdef MEM_MODEL_LARGE\r
+#define BLAS_LEVEL3_L2SRAM_SIZE (767*1024UL) /* 767KB */\r
+#define BLAS_LEVEL3_MSMC_SIZE (0x47FDC0) /* 4.5MB */\r
+#else\r
+# ifdef MEM_MODEL_MEDIUM\r
+# define BLAS_LEVEL3_L2SRAM_SIZE (384*1024UL) /* 384KB */\r
+# define BLAS_LEVEL3_MSMC_SIZE (0x380000) /* 3.5MB */\r
+# else\r
+# ifdef MEM_MODEL_SMALL\r
+# define BLAS_LEVEL3_L2SRAM_SIZE (256*1024UL) /* 256KB */\r
+# define BLAS_LEVEL3_MSMC_SIZE (2048*1024UL)/* 2MB */\r
+# else\r
+# error "Unsupported memory model"\r
+# endif\r
+# endif\r
+#endif\r
\r
#define BLAS_MEM_SIZE_VFAST BLAS_LEVEL3_L1DSRAM_SIZE \r
#define BLAS_MEM_SIZE_FAST BLAS_LEVEL3_L2SRAM_SIZE\r