summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 27d17ca)
raw | patch | inline | side by side (parent: 27d17ca)
author | Jianzhong Xu <a0869574local@uda0869574> | |
Thu, 15 Oct 2015 13:26:16 +0000 (09:26 -0400) | ||
committer | Jianzhong Xu <a0869574local@uda0869574> | |
Thu, 15 Oct 2015 13:26:16 +0000 (09:26 -0400) |
175 files changed:
diff --git a/Makefile b/Makefile
index d7de1721ad201466f0f87f353b355ddc5c2efa6e..9b9b298ebdad5136a902b6c1b2e581087d069170 100644 (file)
--- a/Makefile
+++ b/Makefile
LINALG_BLIS_DIR = blis
LINALG_CBLAS_DIR = cblas
+LINALG_TICBLAS_DIR = ticblas
LINALG_BLASACC_DIR = blasblisacc
LINALG_CLAPACK_DIR = clapack
BLIS_VERSION = $(shell cat $(LINALG_BLIS_DIR)/version)
ARMplusDSP:
cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; make arch=C66 alllib; \
+ cd ../$(LINALG_TICBLAS_DIR)/src; make; cd ..; \
cd ../$(LINALG_BLIS_DIR); ./configure -p install/c66x c66x; make -j8; make install; \
./configure -p install/arm cortex-a15; make -j8; make install; \
cd ../$(LINALG_BLASACC_DIR); make cross; \
cleanARMplusDSP:
cd $(LINALG_CBLAS_DIR); make arch=ARM clean; make arch=C66 clean; \
+ cd ../$(LINALG_TICBLAS_DIR)/src; make clean; cd ..; \
cd ../$(LINALG_BLIS_DIR); ./configure -p install/c66x c66x; make clean; \
./configure -p install/arm cortex-a15; make clean; \
cd ../$(LINALG_BLASACC_DIR); make clean; \
clean:
cd $(LINALG_CBLAS_DIR)/src; make arch=ARM clean; \
+ cd ../$(LINALG_TICBLAS_DIR)/src; make clean; cd ..; \
cd ../../$(LINALG_BLIS_DIR); ./configure -p install/arm cortex-a15; make clean; \
cd ../$(LINALG_BLASACC_DIR)/src; make -f Makefile.ARM cleanARM; \
cd ../../$(LINALG_CLAPACK_DIR); make clean
DSPonly:
cd $(LINALG_CBLAS_DIR); make arch=C66 alllib; \
+ cd ../$(LINALG_TICBLAS_DIR)/src; make; cd ..; \
cd ../$(LINALG_BLIS_DIR); ./configure -p install/c66x c66x; make -j8; make install; \
cd ../$(LINALG_BLASACC_DIR)/src; make ti_cblas_kernel.dsp_h
-
+
install:
install -m 755 -d ${DESTDIR}/usr/include
install -m 755 -d ${DESTDIR}/usr/lib
index 4d49ae45c9ed6cd25c79da6be107ce3e8efdcdbf..93df7870019d4ea82bec7a1793e3f61bd87115b6 100644 (file)
--- a/blasblisacc/src/Makefile
+++ b/blasblisacc/src/Makefile
CPP_DEBUG = -g
CPP_FLAGS = -D_LITTLE_ENDIAN -D__ARMv7 -DDEVICE_K2H -I../../cblas/include -I../../blis/install/arm/include/blis/ -I$(TI_OCL_INSTALL_DIR)/include -fopenmp
-CL6X_FLAGS = $(INCS) --openmp --use_g2
+CL6X_FLAGS = $(INCS) --openmp --use_g2 -DDEVICE_K2H -DLIB_OPENCL
CLOCL_FLAGS =
OBJCOPY_ARGS=
ARM_PLUS_DSP_LIB= $(ARM_PLUS_DSP_LIB_DIR)/libcblas_armplusdsp.a
# CBLAS and BLIS directories
CBLAS_DSP_LIB = ../../cblas/lib/C66/libcblas_C66.ae66
+TICBLAS_DSP_LIB = ../../ticblas/lib/libticblas.a66x
BLIS_DSP_LIB = ../../blis/install/c66x/lib/libblis.ae66
CBLAS_ARM_LIB = ../../cblas/lib/ARM/libcblas_ARM.a
+LIBARCH_LIB = $(LIBARCH_DIR)/packages/ti/libarch/lib/libArch.a66x
OCL_BIN = ti_cblas_kernel.out
OBJS += ofld_tbl_strsm.o
OBJS += ofld_tbl_ztrsm.o
-
all: armplusdsp
cross: armplusdsp
ti_cblas_initfini.o: $(OCL_BIN)
# target for fat binary
-ti_cblas_kernel.dsp_h: ti_cblas_kernel.cl facade.obj $(CBLAS_DSP_LIB) $(BLIS_DSP_LIB)
+ti_cblas_kernel.dsp_h: ti_cblas_kernel.cl facade.obj ti_cblas_mem_config.obj $(CBLAS_DSP_LIB) $(BLIS_DSP_LIB) $(TICBLAS_DSP_LIB) $(LIBARCH_LIB)
@echo; echo Building $@
@rm -f ti_cblas_kernel.out
@echo Building fat binary header
index cf3fd3847b0c69e64d9104196a07c75f3d204c6b..ed60c2ba25d02155fb51840cace7fc6bfbec5a71 100644 (file)
--- a/blasblisacc/src/facade.c
+++ b/blasblisacc/src/facade.c
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-
-#include <stdio.h>
+//#include "stdio.h"
#include "../../cblas/include/cblas.h"
-#include "blis.h"
-#define DEVICE_K2H
-
-#include <dsp_c.h>
-
-#define getNextMultiple(x, y) ( ( ((x)+(y)-1)/(y) )* (y) )
-// L1 buffer is hardwared here
-#define L1_BUF_LOC 0x00F00000
-
-// note these pointers must be filled if used functions
-char *pool_mk_mem_L1;
-char *pool_kn_mem_L1;
-char *pool_mn_mem_L1;
-
-char *pool_mk_mem_L2;
-char *pool_kn_mem_L2;
-char *pool_mn_mem_L2;
-
-char *pool_mk_mem_L3;
-char *pool_kn_mem_L3;
-char *pool_mn_mem_L3;
-
-extern void bli_mem_init();
-
-void ti_bli_init_dsp(char *l3_buf, char *l2_buf)
-{
- bli_init();
-}
-
-void ti_bli_finalize_dsp(void)
-{
- bli_finalize();
-}
+#include "../../ticblas/ticblas.h"
+extern int bli_l3_mem_config(double *msmc_buf, size_t msmc_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig);
+extern int bli_l3_mem_reconfig(size_t l1D_SRAM_size_orig, size_t l2_SRAM_size_orig);
void cblas_caxpy_facade(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY)
{
@@ -88,34 +57,18 @@ void cblas_cgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE
cblas_cgbmv(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_cgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_cgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
@@ -138,34 +91,18 @@ void cblas_chbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_chbmv(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_chemm(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_chemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
@@ -183,64 +120,32 @@ void cblas_cher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_cher2(order, Uplo, N, alpha, X, incX, Y, incY, A, lda);
}
-void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const float beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const float beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_cher2k(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_cherk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_chpmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *Ap, const void *X, const int incX, const void *beta, void *Y, const int incY)
@@ -278,94 +183,46 @@ void cblas_cswap_facade(const int N, void *X, const int incX, void *Y, const int
cblas_cswap(N, X, incX, Y, incY);
}
-void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_csymm(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_csyr2k(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_csyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_ctbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const void *A, const int lda, void *X, const int incX)
@@ -388,34 +245,18 @@ void cblas_ctpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ctpsv(order, Uplo, TransA, Diag, N, Ap, X, incX);
}
-void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, float *l3_buf, float *l2_buf_loc)
+void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_ctrmm(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_ctrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX)
@@ -423,34 +264,18 @@ void cblas_ctrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ctrmv(order, Uplo, TransA, Diag, N, A, lda, X, incX);
}
-void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, float *l3_buf, float *l2_buf_loc)
+void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_ctrsm(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_ctrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX)
@@ -482,35 +307,35 @@ void cblas_dgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE
{
cblas_dgbmv(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY);
}
-
-void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, float *l3_buf, float *l2_buf_loc)
-{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+/*
+extern char *pool_mk_mem_L1;
+extern char *pool_kn_mem_L1;
+extern char *pool_mn_mem_L1;
+
+extern char *pool_mk_mem_L2;
+extern char *pool_kn_mem_L2;
+extern char *pool_mn_mem_L2;
+
+extern char *pool_mk_mem_L3;
+extern char *pool_kn_mem_L3;
+extern char *pool_mn_mem_L3;
+*/
+void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+{
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
+ //printf("pool_mk_mem_L1 is 0x%x, pool_kn_mem_L1 is 0x%x, pool_mn_mem_L1 is 0x%x.\n", (unsigned int)pool_mk_mem_L1, (unsigned int)pool_kn_mem_L1, (unsigned int)pool_mn_mem_L1);
+ //printf("pool_mk_mem_L2 is 0x%x, pool_kn_mem_L2 is 0x%x, pool_mn_mem_L2 is 0x%x.\n", (unsigned int)pool_mk_mem_L2, (unsigned int)pool_kn_mem_L2, (unsigned int)pool_mn_mem_L2);
+ //printf("pool_mk_mem_L3 is 0x%x, pool_kn_mem_L3 is 0x%x, pool_mn_mem_L3 is 0x%x.\n", (unsigned int)pool_mk_mem_L3, (unsigned int)pool_kn_mem_L3, (unsigned int)pool_mn_mem_L3);
+
cblas_dgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_dgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY)
@@ -583,34 +408,18 @@ void cblas_dswap_facade(const int N, double *X, const int incX, double *Y, const
cblas_dswap(N, X, incX, Y, incY);
}
-void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_dsymm(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_dsymv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, const double *A, const int lda, const double *X, const int incX, const double beta, double *Y, const int incY)
@@ -628,64 +437,32 @@ void cblas_dsyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_dsyr2(order, Uplo, N, alpha, X, incX, Y, incY, A, lda);
}
-void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_dsyr2k(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_dsyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_dtbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const double *A, const int lda, double *X, const int incX)
@@ -708,34 +485,18 @@ void cblas_dtpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_dtpsv(order, Uplo, TransA, Diag, N, Ap, X, incX);
}
-void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb, float *l3_buf, float *l2_buf_loc)
+void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_dtrmm(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_dtrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const double *A, const int lda, double *X, const int incX)
@@ -743,34 +504,18 @@ void cblas_dtrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_dtrmv(order, Uplo, TransA, Diag, N, A, lda, X, incX);
}
-void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb, float *l3_buf, float *l2_buf_loc)
+void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_dtrsm(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_dtrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const double *A, const int lda, double *X, const int incX)
@@ -848,34 +593,18 @@ void cblas_sgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE
cblas_sgbmv(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_sgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY)
@@ -943,34 +672,18 @@ void cblas_sswap_facade(const int N, float *X, const int incX, float *Y, const i
cblas_sswap(N, X, incX, Y, incY);
}
-void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_ssymm(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_ssymv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY)
@@ -988,64 +701,32 @@ void cblas_ssyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ssyr2(order, Uplo, N, alpha, X, incX, Y, incY, A, lda);
}
-void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_ssyr2k(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_ssyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_stbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const float *A, const int lda, float *X, const int incX)
@@ -1068,34 +749,18 @@ void cblas_stpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_stpsv(order, Uplo, TransA, Diag, N, Ap, X, incX);
}
-void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb, float *l3_buf, float *l2_buf_loc)
+void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_strmm(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_strmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const float *A, const int lda, float *X, const int incX)
@@ -1103,34 +768,18 @@ void cblas_strmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_strmv(order, Uplo, TransA, Diag, N, A, lda, X, incX);
}
-void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb, float *l3_buf, float *l2_buf_loc)
+void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_strsm(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_strsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const float *A, const int lda, float *X, const int incX)
@@ -1173,34 +822,18 @@ void cblas_zgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE
cblas_zgbmv(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_zgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_zgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
@@ -1223,34 +856,18 @@ void cblas_zhbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_zhbmv(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_zhemm(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_zhemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *A, const int lda, const void *X, const int incX, const void *beta, void *Y, const int incY)
@@ -1268,64 +885,32 @@ void cblas_zher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_zher2(order, Uplo, N, alpha, X, incX, Y, incY, A, lda);
}
-void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const double beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const double beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_zher2k(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_zherk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_zhpmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const void *alpha, const void *Ap, const void *X, const int incX, const void *beta, void *Y, const int incY)
@@ -1358,94 +943,46 @@ void cblas_zswap_facade(const int N, void *X, const int incX, void *Y, const int
cblas_zswap(N, X, incX, Y, incY);
}
-void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_zsymm(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_zsyr2k(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc, float *l3_buf, float *l2_buf_loc)
+void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_zsyrk(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_ztbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, const void *A, const int lda, void *X, const int incX)
@@ -1468,34 +1005,18 @@ void cblas_ztpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ztpsv(order, Uplo, TransA, Diag, N, Ap, X, incX);
}
-void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, float *l3_buf, float *l2_buf_loc)
+void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_ztrmm(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_ztrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX)
@@ -1503,34 +1024,18 @@ void cblas_ztrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ztrmv(order, Uplo, TransA, Diag, N, A, lda, X, incX);
}
-void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, float *l3_buf, float *l2_buf_loc)
+void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
{
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
cblas_ztrsm(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb);
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
void cblas_ztrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const void *A, const int lda, void *X, const int incX)
index 512b3de01ec805dcc16adf503ac940db972b1786..e9ca10dcfb969a338d9680fcc12c4c85cb24a35c 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_cgemm[GEMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 33e7a384d61192c03d14b038302493ed2c0d6d33..74c637b8c29f1af78277048e32a85d8e0b9e30b4 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_csyrk[SYRK_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 959136b0435c881f0215cfe95d0e268f74ce064d..1d0522cfe906a8ce216d6182ca89afcdc4da839a 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_ctrmm[TRMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index d817eb843661e4042b93db86ed79762f83ffd4d9..21dcdaf7e2a7a418860ef9ce237f86261e913bf6 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_ctrsm[TRMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
index f24fc228aa363b2fde5a0123d55a7ee8da8ba607..75819ee81081f97ab372f0f4087fb7844240f1be 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_dgemm[GEMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index ee3a346a32154d9156b9261ee4b27ca9ace25bc2..776469b243fcba5789a8c3a4eb7169419754351e 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_dsyrk[SYRK_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index f96dec82c700530e4b02a7ff75f6cc9dda04c444..446189f43978c51e8a802b985670cda0ed2a9ad7 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_dtrmm[TRMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 29cfd611247f54943909be96d0f6b874cd273d19..daeb38ba53c4633d809a81df30524e28aff58e81 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_dtrsm[TRMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 4ecf176f0d708a3447bc4afa063dc603347edc52..b058b2fc71543a14d0c789e1a2cd0032bfb7accd 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_sgemm[GEMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 0600ebb476a2ccb6a6310810265da06947b03880..5b89cc96949345af939f91043e4f3667e16cb38c 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_ssyrk[SYRK_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index e108f6561fc2c6ccf09a352b6e50826eab609a36..22f14e7ad1f5d14889ff04d2ffb6187e05ecfae5 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_strmm[TRMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 9cbd4848161076093d95d09f4aa42a3803e757cd..68cc3ae8c6e40b9571c82dc91920a6d7d805fc96 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_strsm[TRMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 594dee1d142bbc22e54a1dc76a450385a6ec7105..f7259284446da61c46a6b666b5c75e5cc6a7746a 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_zgemm[GEMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index e713dea37beddd640da49d37be12db9ba2bca51b..8df383eabbb90e63e03754c186af49196c64c714 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_zsyrk[SYRK_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 6aa24fb6936eaae5fdfd2d310971736966e647b7..0d9caa8943f49acd5e7c1da22b0eb6ee20febfc2 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_ztrmm[TRMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
index 012cec7311442a93c98fc9c2267eb9e836fa6565..2d630e8e4c0e0dfc0fc5caa0fb2c34dfe49e42ce 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
char ofld_tbl_ztrsm[TRMM_OFFLOAD_TBL_SIZE] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
similarity index 100%
rename from blasblisacc/src/ti_cblas.h
rename to blasblisacc/src/ti_cblas_acc.h
rename from blasblisacc/src/ti_cblas.h
rename to blasblisacc/src/ti_cblas_acc.h
index bdf2fe87113ea8274de6315575d11b7e02ac84db..39daf708f3665a6dbc3a3b8f36e29e49d8fe4b05 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index fff48bf88663331db0ff00d7fded32cd3d793b07..bf5b16d0cf2e88d088f1ca470a6db33a7715654c 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
diff --git a/blasblisacc/src/ti_cblas_cblas_cdotc_sub.c b/blasblisacc/src/ti_cblas_cblas_cdotc_sub.c
index c2f00cfdadd2499b3e5e85c3b6f0d9787362d91e..9cfc775883c857f2d934a622ec5e87f03a48b4e5 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
diff --git a/blasblisacc/src/ti_cblas_cblas_cdotu_sub.c b/blasblisacc/src/ti_cblas_cblas_cdotu_sub.c
index 025ac50d4cd6260361950790e34760497c9a0cc6..68d4684a1f3c8c8fc98c3dbbdaffc1a928f858d8 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index e45a04f5be10f3016f186e7089d165956a20a642..097792c134ec24685a50ddada1299d4db27116b9 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -204,7 +205,6 @@ void cblas_cgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 4dc98d9cb7da98b84ffe177a172e053966af4bec..ffd982072e7e077bbfc186ae7790be1924e3b991 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -194,6 +195,7 @@ void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -207,11 +209,23 @@ void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
err |= clSetKernelArg(__K, 14, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(15, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 15, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(15, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(16, buf_err);
#else
- err |= clSetKernelArg(__K, 15, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -225,9 +239,12 @@ void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_cgemm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 6c58f7a51567c961fb16bc93de4298ef2107e2bc..37962f7f3c59c8c5a2e7c6a41ca031cae843ff96 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -192,7 +193,6 @@ void cblas_cgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index b66278317b69046e68bc40c2da64dcf040e1db31..e8feac03ac0126dda9c8148cedc56b5803b19d91 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -175,7 +176,6 @@ void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N, const v
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 011a1ffce4e7a799b4333cc2ba4ebd208cefb706..22f8c5046a527a67d8416d5a3af12cb6264efb93 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -175,7 +176,6 @@ void cblas_cgeru(const enum CBLAS_ORDER order, const int M, const int N, const v
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 34f98a716302c4677fb363925d3bce530d434523..ad7e51bdec8af637d8bd560db330cb99e1aeeafd 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -192,7 +193,6 @@ void cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 52648550fa727b76ffcc4e78f632a385af5a8078..b51fbc2953d5f3d445173f2d40f7d25fa29d9bba 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -188,6 +189,7 @@ void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -201,11 +203,23 @@ void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -219,9 +233,12 @@ void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_chemm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 09bf9bb9d2906ab6a98a5de1542773d781291013..8e076486e7069d395ba15c860ff9917db4051fdb 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -186,7 +187,6 @@ void cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 8f7c90152d9d87b018b36b980e4993f65b5a8538..b3ff8c31897c073178be5e85ac9c054a3317081d 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index f07e96bc354773a7e5281fecc390e6098c831098..eced71f9e068f36310ff13125f347bd154d08c26 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -175,7 +176,6 @@ void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 4fcf2db13c09ab47a9f860bea50afdbc51e7c45b..709f036129cad062b6d46434b78fcdadfe3e56ce 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -183,6 +184,7 @@ void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -196,11 +198,23 @@ void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -214,9 +228,12 @@ void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_cher2k is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index b3f6c6a2e6becae9885f9682755e56ba82fec19c..d7db80b6a597cae2391e8fcd4495ea3bf1a14591 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -157,6 +158,7 @@ void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -170,11 +172,23 @@ void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(12, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(12, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(13, buf_err);
#else
- err |= clSetKernelArg(__K, 12, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -188,9 +202,12 @@ void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_cherk is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index dc12e2f426c21669d32ae597238b6774223084f7..c4d1d624df19ef648ac0bbf0b8b772bab84b1d80 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -180,7 +181,6 @@ void cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 0b6e9c5a9175ac8da3cc91d9e49ef48888c4d8c7..670016e0fd00928f84d311257bea466c629b0814 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 3a92c94eb70b0eca996282cdd7988ee79eab03e6..50d29f732e25eca733394abcd054e34dbf907618 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -169,7 +170,6 @@ void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 3cc378cf804aaab705fc550412f3afd4cd9e2ff0..c31ba615e8be56b3463efe8bb00678c3505c5acd 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index a80951b13c73f2e3f84a2654f1e42f43d871d5d5..40d2452480b253c43d3c1a60ff12e77b3a126ed8 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 22819ea243e394789c52af3fdf5b1b749e947526..aea68b46d1e5e28d9e266a7d500aea7b528cba77 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 75db0f145055d3bb53b22182ae362bdda63162db..67249d32b0156aa9aeecfc6a3d62c43a8716a624 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 80de3a7a60d5a08393b28e8e1827a52db04e93f2..6050de720b21b53d987867bbff0c1bbbc2367bc9 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -188,6 +189,7 @@ void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -201,11 +203,23 @@ void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -219,9 +233,12 @@ void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_csymm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index a195349e15737b938bd346257a0b6ebe4c058930..11feff3f8ef46768d433addd1dd61f499f72b6d8 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -188,6 +189,7 @@ void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -201,11 +203,23 @@ void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -219,9 +233,12 @@ void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_csyr2k is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index ecfcabc57bdbe5c95c77a03d8da0600f85677fa7..4840abdfc8efb414bd75151e7214fdb479d913a2 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -167,6 +168,7 @@ void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -180,11 +182,23 @@ void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(12, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(12, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(13, buf_err);
#else
- err |= clSetKernelArg(__K, 12, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -198,9 +212,12 @@ void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_csyrk is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 00d7b76eee045ca9a72dd7043edba96c32f25a99..018b07030dd14955cf541b6d1c35166008f06e4f 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -161,7 +162,6 @@ void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 93b84be7d32f86a47c5641eafc7a776894b94ec2..6f34e3ae8c2e72642a732000e49845bb774f659f 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -161,7 +162,6 @@ void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index da5fc3f233c3ab76c5620c747903ad22e7570058..b2db324062eeb3f301dce6e89f8590c78ac0d142 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -149,7 +150,6 @@ void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index a1448002901dbd75848d81ad5eaa8b1365fb625d..45969d33cd990209ad89cd6d0ce64ce8d329ae6a 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -149,7 +150,6 @@ void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 1aeaa52c2c63e6dabbc3ba14313ae352333a3aa8..54f291f3ada4bb76599a2e80e7b9145d67cb1d77 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -168,6 +169,7 @@ void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -181,11 +183,23 @@ void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(13, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(13, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(14, buf_err);
#else
- err |= clSetKernelArg(__K, 13, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -199,9 +213,12 @@ void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_ctrmm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 0b33200b8e48b83dfbc2fe92ddcf2c25a48286df..40054f941ce48333e91fa9af8267adee3d5a974e 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -155,7 +156,6 @@ void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 6ea2cc2d76d62ee8a9e43507a483dd6cd973ec8e..7804059c0351970f512ab4f496f721fc3faee424 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -168,6 +169,7 @@ void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -181,11 +183,23 @@ void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(13, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(13, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(14, buf_err);
#else
- err |= clSetKernelArg(__K, 13, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -199,9 +213,12 @@ void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_ctrsm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 8b697a5d3176c867498620cd02405a555652aa62..42d5a05d2ecf7c7099ab595206cba546270f8dd4 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -155,7 +156,6 @@ void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index ab0fb5f80d6458e53557af38d317c2704bb5ec80..ac2c2af1905e9862621ec3c14007021e10283869 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 7e99bb165b29bb928c62954d1e78d3041f34f800..fee688edb32d0aa4cd9050c42644b26076cacfea 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -137,7 +138,6 @@ void cblas_daxpy(const int N, const double alpha, const double *X, const int inc
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index fb9790e8248dd5e65eab602d60d7a3d68d89ce1a..58769ff7228ed96b15365979b6fe02f303876ff5 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 719e82e3d2d5782b7d24da61c0b36cd32eee1bd5..5bfbb81b112d1ba164fd7d6361eb42c570627b36 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -127,7 +128,7 @@ double cblas_ddot(const int N, const double *X, const int incX, const double *Y,
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval);
__K->setArg(5, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 5, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
@@ -144,7 +145,6 @@ double cblas_ddot(const int N, const double *X, const int incX, const double *Y,
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 468b5b4048a26492394ed51723d8db60b1ed2d6a..2e03fd2241bb5a78b5798c7e475abae99e7dcdc6 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -194,7 +195,6 @@ void cblas_dgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 976ec5a79f49f2771d33d795134fd5a2bd1ad457..6fa53241eb386e9229f7c1261df1b2a931ca3ea5 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -184,6 +185,7 @@ void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -197,11 +199,23 @@ void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
err |= clSetKernelArg(__K, 14, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(15, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 15, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(15, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(16, buf_err);
#else
- err |= clSetKernelArg(__K, 15, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -215,9 +229,12 @@ void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_dgemm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 97a1b056cb49cfcd8a3b76f8f41909726ce9faf3..acb7123ff30729f4f966d080dcc8d6c28c3703fe 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -182,7 +183,6 @@ void cblas_dgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 4da84337102ea97ff4cc2cd6198e46eebd683eeb..c035efed70e6150e651419c058d8402616b3ae47 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -170,7 +171,6 @@ void cblas_dger(const enum CBLAS_ORDER order, const int M, const int N, const do
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 18eaddd236330460eed88f7515c065d47ddbd33f..0be91b895ee4701bff3e183c13da99a63203b15b 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 0694e9d3496b6ad8437401477ebd064bd0a0b6c8..bb4cdc07730ba5aa5dd0c901b36207725d6c2974 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -142,7 +143,6 @@ void cblas_drot(const int N, double *X, const int incX, double *Y, const int inc
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index d152518c5678e24ebd4066ab76383af20903543d..2c20ae308e8754752271ccfd84f3e27d6a9802fe 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index f8b95aabfaf385e2275e9b4e4c8a88213973405b..1e4a374ea085398c56ccd279b9cb5fe02999a472 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -141,7 +142,6 @@ void cblas_drotm(const int N, double *X, const int incX, double *Y, const int in
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index fb19c073a8f87fc945cde643b815c8a296046786..ef4e979a576a1754a26a4a629bb3599c1383efd2 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -130,7 +131,6 @@ void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index bc02f21fe4a4a35e7030d541c1eef67fa00af920..d3e1988c0837a5dd87065b12660350a6b78da736 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -182,7 +183,6 @@ void cblas_dsbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index badc23089e235fc88ee7fd1defb8096babae1cd0..44c1510a00a0b7f831be31e3a06e5b495e66d70e 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 1a03fd6d0f57f17da0c2ea6a0bd920585f87ab89..7e7ff095c7f714b7e75e9c23f4b8226f555adf11 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval);
__K->setArg(5, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 5, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index ba7eb80c39b54a0354928f3f35845d17b6333b53..5239c73b879d30a40851a5f46ae1a04e33faadde 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -170,7 +171,6 @@ void cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 3462f3ff1f6a8825e97983527e39c8fa1d4fc435..d8c25dcf49eabb891204b2ab0be2378d6c88655a 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 677f81330cf7f9085d9e015b0e790d222bba4d77..4d6b2c049debc0b14516beb402ec8e1f3d0593c6 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -164,7 +165,6 @@ void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 9937fe09bb0b3e16c307d06d06aec8f4039da3cc..63cd15ada38181b6b5f14825263bb73857119fc4 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -142,7 +143,6 @@ void cblas_dswap(const int N, double *X, const int incX, double *Y, const int in
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 53f36330d70b59042bfbcc0fc66bf74ea497b052..a6223ee92349bc7f723adac969d7e6ee376d978f 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -178,6 +179,7 @@ void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -191,11 +193,23 @@ void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -209,9 +223,12 @@ void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_dsymm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 498ab222229d40b9b87ca18c5eb2dbe07d3d6093..aca46f23ecadab0c668bac4510662baa53d1bddf 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -176,7 +177,6 @@ void cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 408976a370fadc1cfc9b965531867be77a697067..a421bcde70a15430b00f875af77980e9f38c55cb 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 5a13d29a535f1cdb8fc12962e5f071acd20a0fa2..5c64af929d45692dcaf68c44065e18404802bdde 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -170,7 +171,6 @@ void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 4cf91631127f0d4c8046c7b124b4751aa46f9ef8..5e126a700094fd74be18c69c66c1ba50d9ca3a60 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -178,6 +179,7 @@ void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -191,11 +193,23 @@ void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -209,9 +223,12 @@ void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_dsyr2k is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 747d1420689d5e19130a29cdbb3c466c6d6d0ee4..a5b7d87ef9aa0d5f2d4086bbc0eb0806cd617995 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -157,6 +158,7 @@ void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -170,11 +172,23 @@ void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(12, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(12, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(13, buf_err);
#else
- err |= clSetKernelArg(__K, 12, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -188,9 +202,12 @@ void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_dsyrk is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 98ca5d37cf2ce1f240804192f8e8dd05c093847f..8e1149847c15a490d37eb72c30e2cacd0ca4d89d 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -161,7 +162,6 @@ void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 9e48c9d36b52ad5f95121d052cd81690f3dd33b9..095284708aea09ac3eaeb5c3a8e1d6a102301f5e 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -161,7 +162,6 @@ void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 3e715b9ed822dbe8a6214e165c36a5a846fd9d29..f8fa7fcde306160ef9e6620d8d2fd2a912ea12a4 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -149,7 +150,6 @@ void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 6084309a6ecdbfbc09e918a516b731c5c980d170..6856032f5357329ac0a27df39ac90dd24b561714 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -149,7 +150,6 @@ void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 13c0f6e1016094dbd6ecd2aa1c3d1403c8a4422b..fd70c5dc4b706e5293b03e5920f4033319752ffb 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -163,6 +164,7 @@ void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -176,11 +178,23 @@ void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(13, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(13, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(14, buf_err);
#else
- err |= clSetKernelArg(__K, 13, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -194,9 +208,12 @@ void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_dtrmm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 339ed16d54acbe80a77934841d073e4494a57ee9..54755ec2ee5b645002cb2dce5ac2e7f5da8069ea 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -155,7 +156,6 @@ void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 7724f014d016d4f1a29bb747bf342465106a91c7..798a88e7db5ae6895105b8877d2809c8ff3eaaea 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -163,6 +164,7 @@ void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -176,11 +178,23 @@ void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(13, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(13, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(14, buf_err);
#else
- err |= clSetKernelArg(__K, 13, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -194,9 +208,12 @@ void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_dtrsm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index eb5cfd4f65ea8da0b4b4952d63905682a3c1426a..15d492b19ef6d54d96ad3cd9040e2c840d5120ef 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -155,7 +156,6 @@ void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 603aa7873cc0972f68527d0e7ef938abc3d8080b..fd4874ce4c76eb0d6fbdab0ea5b4defb17b9bdb2 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 0295da8e6c6f5907904f3e8d746c1b8417c4dcfb..96147f96cfc07c8bd203d7188e4540b8d413e4a3 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(double), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 0dc769fea4ee6e437ad9b168a0eeed65a4ce1128..c5569ad9e9a73e65cdd22399c849f2fb11e96ef8 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 12d6e0e4de50a066dcc17aae4b4e5c6ce91a38b6..3ed7745801849fcb57d95545c06b9593633c0563 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index f1cfed86d4d7ca06bb89d40a3aa4a7c10ad9e186..19bea6ab6ddd0bd78d74e478b220f1b8c00d7c11 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 623059fd6f31dea8d1d27ae0ab33eb569c7c82e1..77141cf81d0e2fb1d2ee61287eebbd0a2d1f7aed 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(CBLAS_INDEX), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 60ba25c2145844a389dec6b0436447e6d67296b5..7892bb7221f69ac12fbeb16eaa005d4954fb8af8 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index d00a338291c2791de77d6a5d3bc7552cdff554f9..3862933e632a4fd52a7255eebf01593ed6c3c88f 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -137,7 +138,6 @@ void cblas_saxpy(const int N, const float alpha, const float *X, const int incX,
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 4c7cc216148c698f4b02030849efddad32d2ea1d..23c6f4f38c27cc687ed0892dd73449ef80b718bd 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 8131294913e7a8e8597b7efc8dedd4cba1075df5..a966e9ee6950f86c5e80f883be581ca7c3a1fc8f 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 099f5ecf17f60fc1ac60cfff4a94a6d5f3881e36..dd6cab58259f0aff629480c062babebdaaad0ea3 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -131,7 +132,6 @@ void cblas_scopy(const int N, const float *X, const int incX, float *Y, const in
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 86877e91a7e4097b669a871a9fc039cdc81003bf..69fd6d44bb21eb4c8da40cf7f4c35e76dfb19e74 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -127,7 +128,7 @@ float cblas_sdot(const int N, const float *X, const int incX, const float *Y, co
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval);
__K->setArg(5, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 5, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
@@ -144,7 +145,6 @@ float cblas_sdot(const int N, const float *X, const int incX, const float *Y, co
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 09f6f84ee794e367e8936ae1fa73100aa0131e29..d5445ea42bd48a1e64bc44e8e22f300d7fe89b9e 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -133,7 +134,7 @@ float cblas_sdsdot(const int N, const float alpha, const float *X, const int inc
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval);
__K->setArg(6, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 6, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
@@ -150,7 +151,6 @@ float cblas_sdsdot(const int N, const float alpha, const float *X, const int inc
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index cd4951ed71ecdce889bbbc3f5de219f2bca0cf8f..f4f2826d101e2f19550cc062d299bc315d784ddb 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -194,7 +195,6 @@ void cblas_sgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index d97a454f17dcfea3adf84df1681cc8cd8b7d246b..72e3af3b58ab693ccfc9b31001d4f5d554b8db58 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -184,6 +185,7 @@ void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -197,11 +199,23 @@ void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
err |= clSetKernelArg(__K, 14, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(15, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 15, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(15, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(16, buf_err);
#else
- err |= clSetKernelArg(__K, 15, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -215,9 +229,12 @@ void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_sgemm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 3b7b9eafe90f23647b6bdba5015ac57e51c95592..c6e0c1daf7faf8f5b7166676f291dadbeb1fa048 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -182,7 +183,6 @@ void cblas_sgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index dfe9aceac19a5a9ab5ed885e81dabe6d8714e6ed..1ee795f876353a1adbbfc44a0be97e0f50e1c92c 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -170,7 +171,6 @@ void cblas_sger(const enum CBLAS_ORDER order, const int M, const int N, const fl
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 798b7b0a9e87594288007fcdc487ab25199b352d..79a7dcbfb6da15956a44b1bb3eabbf0ca453d710 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
Buffer buf_retval(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval);
__K->setArg(3, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(float), &retval, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
err |= clSetKernelArg(__K, 3, sizeof(buf_retval), &buf_retval);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index d185fed4dac440ae8a567af0540763b12650eab9..051e99a68f1e988d49bf33412e31a652ff75826e 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -142,7 +143,6 @@ void cblas_srot(const int N, float *X, const int incX, float *Y, const int incY,
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 5a0594e81cef542bc94b442766526cfb2bf6520f..2b7a071bb9a248c351b5b0ee5f4397d599c3d94a 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 410b8fd61579c66c562adf11c40cdd15088bb3ae..e67142c90213ae74fc2522d00b4a4381099d6b68 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -141,7 +142,6 @@ void cblas_srotm(const int N, float *X, const int incX, float *Y, const int incY
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 6418b25d95a3f153c10cb544addfe4ec968104c8..008e2d0a35a5878f61dd8cef1cede20cc1b58c63 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 47c73765342b837ada80629d6040b821e62db56b..08d9dd7479b36b0172ba457a90ca089824ca9cc5 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -182,7 +183,6 @@ void cblas_ssbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 743be109f2ea3132607669fd80e740c1ef5ca932..d3afdb9af57c433e646f69fb153bc69b3d2d4169 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 14d13d50d12a682a60e60d8993521c80614e79dc..78f82ddba2e69a90b0e49dd5ad1083ad8bd22ca0 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -170,7 +171,6 @@ void cblas_sspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 31e59b82aef7d121a75c2cc1a9b7f83e8b020d17..707533f1203af0ff8e44dd7565d2540094f4f931 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 4c78328ab8fc7a59971ec7d37cf6c75bcd61cb97..dac6d8538f1a8aa11aab90484f408d19893fa50e 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -164,7 +165,6 @@ void cblas_sspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index d62a9e4d4f628c7f0464e0edc7bce0086d5ed44c..241d2136298fbee4b1b8061244d8a306bc031112 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -142,7 +143,6 @@ void cblas_sswap(const int N, float *X, const int incX, float *Y, const int incY
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 43a506564381154b6645b16e506814ff6f1df747..c090bed21bdefa2027be33820dfda2ab4d1586b3 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -178,6 +179,7 @@ void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -191,11 +193,23 @@ void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -209,9 +223,12 @@ void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_ssymm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 8a84e6363287e69ede19a520c0340f5119a707cf..e92214f0d9b4b9f67c938c86df235519614774b8 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -176,7 +177,6 @@ void cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index bf5259ad7490864ae75b09e785e20bc145d56330..5c45b02b42ccab1327a9fdcabae28d5d7eb0e8a5 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 7aa68442afbc9a852eaa9e57577e37dabdfc13db..ea04df9a24f991f193fdd6f7b9cb887f2341c97d 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -170,7 +171,6 @@ void cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index c7c30f005af24db20c7a93a18b4fd19258bfd093..97b6b443b80c33ae0988494f948b559cd83c8647 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -178,6 +179,7 @@ void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -191,11 +193,23 @@ void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -209,9 +223,12 @@ void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_ssyr2k is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index efcb97bbfcf585ca8d8beba3c8bd41db66c49924..17c8bbddeeaee3871f6cb208fc49eb4d73a5a18f 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -157,6 +158,7 @@ void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -170,11 +172,23 @@ void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(12, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(12, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(13, buf_err);
#else
- err |= clSetKernelArg(__K, 12, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -188,9 +202,12 @@ void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_ssyrk is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 662600a1c1d90a7e33387a6b055afe2859fd38f7..97b7922d647442cdc1f94eaa8d67aba92cf2cb2d 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -161,7 +162,6 @@ void cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 48b640b89c1ff7342b8551a4c94c7a3e0829fa38..47002da6eaa00838607cb2a56bac48e161455d86 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -161,7 +162,6 @@ void cblas_stbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index a1d91f8af8cc658575d7a30e843136bb9bf49912..a5ea0271c4383d0167bb98c8dd5c7a70bbd8a847 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -149,7 +150,6 @@ void cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index e487ad6fe5722cdfd51397f9cfe1243b5fa9d10e..0b23825a29b71d1927177aff2dee0b1b14bc7b06 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -149,7 +150,6 @@ void cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 3f1244127fa2497ca782188213ad147d022dbaca..fc1d9d82400c27ccc8adb3423f5576cd573e6bc5 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -163,6 +164,7 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -176,11 +178,23 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(13, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(13, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(14, buf_err);
#else
- err |= clSetKernelArg(__K, 13, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -194,9 +208,12 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_strmm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index a103a7f23712cd63390893a23a10c0f3e5e90e3f..933bd345d9e178504416da29d1234f01d227607f 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -155,7 +156,6 @@ void cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index e1d8513d608f018906bab5965684eb2ec4421e2c..56ad072c622be0bb7b1489001e296e77c4d18676 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -163,6 +164,7 @@ void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -176,11 +178,23 @@ void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(13, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(13, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(14, buf_err);
#else
- err |= clSetKernelArg(__K, 13, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -194,9 +208,12 @@ void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_strsm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 33505905123641c3881bed7a745e350a94b575fd..97aed05220eae5f502c1c6f462e67caca929906f 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -155,7 +156,6 @@ void cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 5643cedbfb74b3d0598590fbe96423317e319939..2ff97c8e9002195a31690106b57c6741a9bc578a 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index db210af27db374e31f4d73378adafc0a8f1f56ff..8450fae6dddcad9b73926122c1d31c307607af9b 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 7236ae986e18efc9584f7bf8405f6fc5daf33b22..60edee0b1a1dcd0898b4e4e537e0f1881ad22f77 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
diff --git a/blasblisacc/src/ti_cblas_cblas_zdotc_sub.c b/blasblisacc/src/ti_cblas_cblas_zdotc_sub.c
index 5f241935e11b8b528b8c78cb6132fca0c63003e2..80cd4d8d19fa29ed864e90aff61fb3bf55901008 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
diff --git a/blasblisacc/src/ti_cblas_cblas_zdotu_sub.c b/blasblisacc/src/ti_cblas_cblas_zdotu_sub.c
index 26d70cea8b94699302eaba17e11b63617e15d1c9..841cf7103056e3085d99ca635e23c0ac9ab41396 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index c8a2600ac0fec3bccd3ee4408fe7f5933dbc4936..d2c7eca168abcbea0f24d50e003b9cd0ad42de62 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index d333b18c28ca3232f68603c307dbcbff46b14764..0a2a0740e8ce2892c5a71309b72157bcc707ac89 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -204,7 +205,6 @@ void cblas_zgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 86cdecc527206e7a4cfacf4563256cf766ef78af..77e5bf914df46ef5c0d22898d3b5988180284cd8 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -194,6 +195,7 @@ void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -207,11 +209,23 @@ void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
err |= clSetKernelArg(__K, 14, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(15, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 15, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(15, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(16, buf_err);
#else
- err |= clSetKernelArg(__K, 15, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -225,9 +239,12 @@ void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_zgemm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index f3739e863df495c13536045ec142d3934baebb43..a195377e20dbc6f6f0257e60ef63d0ba9797b168 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -192,7 +193,6 @@ void cblas_zgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 867e0a566c8a1f43ae6fd439a2c1796abf44b511..341c8f1fe67c11eb94930b8a0fada68781c109de 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -175,7 +176,6 @@ void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N, const v
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 22b64e56c1f682e50032ffe02a699ecc4a2dbb9f..94cddea2813c7f4cb1fcbfa312f41b51984b9b2c 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -175,7 +176,6 @@ void cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int N, const v
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 8b0d9b9e8c7b44b466152d91aea9f0a1e0cefa9f..36fa157152bb4caad5c34d72e0c86987e848f27a 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -192,7 +193,6 @@ void cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index fa5d9cb9b9b4b420e871aafeb7f664a82887accf..d206e1361fdcb13fe6151a4ac8b82dfc7384a434 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -188,6 +189,7 @@ void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -201,11 +203,23 @@ void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -219,9 +233,12 @@ void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_zhemm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 511eb725e375e26c6a887ba32e86b691b2275b93..6e87053d010dd9a82dc474db8a40b02f62c2a1d8 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -186,7 +187,6 @@ void cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 55c99e4840871d9ec5a77dbb687775342872f08f..a3dcd1b25557b440cc52078b8ce54920806b8be6 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 905730515f33b539553b38e419852662d74056b6..146d17ff455202d1746c5b1c112724192aff2c37 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -175,7 +176,6 @@ void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 3fe3bdb781e34a18460d0ce964628688f512cee9..6f52d3ae5d260957a04446aee24309c2ca037cfd 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -183,6 +184,7 @@ void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -196,11 +198,23 @@ void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -214,9 +228,12 @@ void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_zher2k is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 232d839a24289af5ab982c996e306d0da317f0db..e5437115432891ca5fa07b00195a43f301729402 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -157,6 +158,7 @@ void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -170,11 +172,23 @@ void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(12, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(12, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(13, buf_err);
#else
- err |= clSetKernelArg(__K, 12, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -188,9 +202,12 @@ void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_zherk is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 5cd8688d82fa798ce707579844e3d881e3cedc42..9a5dd8fc42da0346c0cb73bb11b8a59dc3d11b98 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -180,7 +181,6 @@ void cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 80a6348c55e70378a6362016ffabd216d55b061c..d479109e494b12efc0b9afcd9f963d6cb9f40687 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 854cd61c74cba1f9850526beee7d697008c324bb..e55ec9f3bcf193671f735c27c7e6434eb0694065 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -169,7 +170,6 @@ void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index b01f3ae49467081048c4c0948da435edd0faf9dc..41d2041dd63bd8727e59b8ead3fe98b8c39c6f07 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index c731c2bf0bff8b9b87050f836f96d94042cff0ad..c08248fa6b7bb3457d89d3fa723d7ceb3dd43018 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 8deae8d58b182a248a82b19cf305a6e0e43de146..a966c27943faa13cd8cda00a678db1646a2ee556 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 40fdba50fa7486ac351f3c0efb434c43601f5b83..deda9fd7c9995cd58cb5b0f7f602228acdd4125c 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -188,6 +189,7 @@ void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -201,11 +203,23 @@ void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -219,9 +233,12 @@ void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_zsymm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 80844059f5489c2471543b788c3a6b0109967b28..62de2c1216ee1e8b84370c748851aa67baf099d7 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -188,6 +189,7 @@ void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -201,11 +203,23 @@ void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(14, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(14, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(15, buf_err);
#else
- err |= clSetKernelArg(__K, 14, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -219,9 +233,12 @@ void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_zsyr2k is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 99a661c2eb4d7c9cc4b8b258556295da61d44239..5271187201085378fb052d7f2ef8071ff0b31caa 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -167,6 +168,7 @@ void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -180,11 +182,23 @@ void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(12, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(12, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(13, buf_err);
#else
- err |= clSetKernelArg(__K, 12, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -198,9 +212,12 @@ void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_zsyrk is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index e3d2155a9169dbb634a769da1e3b92945b624a43..5b7dc34149d73f267e505e6b2a9bcf50e45b0282 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -161,7 +162,6 @@ void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 696a26c974435353c9a358d7008582421ca85b10..fd9ba055b3443d816665ce5f420ea50b4206b7f3 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -161,7 +162,6 @@ void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index a40b4e497bead3e85ff9f3bd8d487bdaf99a4d7a..469a4996d0cea55b2c59c4dd90816391dc6254ce 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -149,7 +150,6 @@ void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 15c9515d1da22e5b27c71d5e8a007f8de08f8400..4def0efd93f10316fc3993786f16584f4c7f2cce 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -149,7 +150,6 @@ void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 4474751f82531e984151a79a992829f2e0280ef2..803bc4bc43c0fd03d6af68b031d8cd64a74129bd 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -168,6 +169,7 @@ void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -181,11 +183,23 @@ void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(13, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(13, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(14, buf_err);
#else
- err |= clSetKernelArg(__K, 13, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -199,9 +213,12 @@ void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_ztrmm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index 1bad5fda21cf49f1fbe455941cabc482e5e6ab13..a0d12f44c92e89a983365953a0bd51ad19e539b3 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -155,7 +156,6 @@ void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index bc9bd319789c620421d3a59dfa2e5f3c0ad238eb..8a2411a5c917f88e3b78197918ffa6bebca23964 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -168,6 +169,7 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
#endif
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
@@ -181,11 +183,23 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
+
+#ifdef __cplusplus
+ __K->setArg(13, msmc_size);
+#else
+ err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
+#endif
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
#ifdef __cplusplus
- __K->setArg(13, __local(L2_BUF_SIZE));
+ Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg(14, buf_err);
#else
- err |= clSetKernelArg(__K, 13, L2_BUF_SIZE, NULL);
+ cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
+ err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
#ifdef __cplusplus
@@ -199,9 +213,12 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+ if(err_code != TICBLAS_SUCCESS) {
+ printf("Error code returned by offloaded cblas_ztrsm is %d\n.", err_code);
+ }
+
ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
index f0cd18f2e167121e62409502bac6193a04400894..02d2797cc74948d66524f55e0f86282d8a5137b9 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -155,7 +156,6 @@ void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
ti_cblas_delete_kernel(__K);
index 9b3a59085359e01e800f82da4b03a9a97cb1b700..f2dd549b161d11bab0d5df6b8cac9fcd3c724d7e 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
#include <pthread.h>
#ifdef TI_CBLAS_FAT_BINARY
pthread_mutex_init(&MUTEX, 0);
TI_CBLAS_DEBUG_PRINT("Pthreads initialized\n");
- //TI_CBLAS_DEBUG_PRINT("Initializing BLIS\n");
- //ti_blis_init();
- //TI_CBLAS_DEBUG_PRINT("BLIS initialized\n");
+ TI_CBLAS_DEBUG_PRINT("Initializing BLIS\n");
+ ti_blis_init();
+ TI_CBLAS_DEBUG_PRINT("BLIS initialized\n");
atexit(ti_cblas_auto_finalize);
index 8745a47d4d13fbadaebe967883cb627fc6d1d635..afbf5c18a66d6d41e5e930fcb705a2bd4780ba34 100644 (file)
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
+
void ti_bli_init_dsp(global char *l3_buf, local char *l2_buf);
kernel void ocl_bli_init(global char *l3_buf, local char *l2_buf)
{ ti_bli_init_dsp(l3_buf, l2_buf); }
void ti_bli_finalize_dsp(void);
kernel void ocl_bli_finalize(void)
{ ti_bli_finalize_dsp(); }
+
void cblas_caxpy_facade(const int N, global const void *alpha, global const void *X, const int incX, global void *Y, const int incY);
kernel void ocl_cblas_caxpy(const int N, global const void *alpha, global const void *X, const int incX, global void *Y, const int incY)
{ cblas_caxpy_facade(N, alpha, X, incX, Y, incY); }
void cblas_cgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_cgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_cgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_cgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_cgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_cgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_cgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_cgemv_facade(order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
void cblas_chbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_chbmv_facade(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_chemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_chemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_chemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_chemv_facade(order, Uplo, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -82,12 +84,12 @@ kernel void ocl_cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_cher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda);
kernel void ocl_cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda)
{ cblas_cher2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_cher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
-void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_cherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_cher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_cherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_chpmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_chpmv_facade(order, Uplo, N, alpha, Ap, X, incX, beta, Y, incY); }
@@ -109,15 +111,15 @@ kernel void ocl_cblas_csscal(const int N, const float alpha, global void *X, con
void cblas_cswap_facade(const int N, global void *X, const int incX, global void *Y, const int incY);
kernel void ocl_cblas_cswap(const int N, global void *X, const int incX, global void *Y, const int incY)
{ cblas_cswap_facade(N, X, incX, Y, incY); }
-void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_csymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
-void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_csyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
-void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_csyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_csymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_csyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_csyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_ctbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ctbmv_facade(order, Uplo, TransA, Diag, N, K, A, lda, X, incX); }
@@ -130,15 +132,15 @@ kernel void ocl_cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_ctpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX);
kernel void ocl_cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX)
{ cblas_ctpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_ctrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l2_buf_loc); }
+void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_ctrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_ctrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ctrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_ctrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l2_buf_loc); }
+void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_ctrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_ctrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ctrsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
void cblas_dgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
{ cblas_dgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_dgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_dgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_dgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
{ cblas_dgemv_facade(order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
void cblas_dswap_facade(const int N, global double *X, const int incX, global double *Y, const int incY);
kernel void ocl_cblas_dswap(const int N, global double *X, const int incX, global double *Y, const int incY)
{ cblas_dswap_facade(N, X, incX, Y, incY); }
-void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_dsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_dsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_dsymv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
{ cblas_dsymv_facade(order, Uplo, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -214,12 +216,12 @@ kernel void ocl_cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_dsyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *X, const int incX, global const double *Y, const int incY, global double *A, const int lda);
kernel void ocl_cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *X, const int incX, global const double *Y, const int incY, global double *A, const int lda)
{ cblas_dsyr2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_dsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
-void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_dsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_dsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_dsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_dtbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const double *A, const int lda, global double *X, const int incX)
{ cblas_dtbmv_facade(order, Uplo, TransA, Diag, N, K, A, lda, X, incX); }
@@ -232,15 +234,15 @@ kernel void ocl_cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_dtpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *Ap, global double *X, const int incX);
kernel void ocl_cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *Ap, global double *X, const int incX)
{ cblas_dtpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_dtrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l2_buf_loc); }
+void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_dtrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_dtrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX)
{ cblas_dtrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_dtrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l2_buf_loc); }
+void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_dtrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_dtrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX)
{ cblas_dtrsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
void cblas_sgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_sgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
{ cblas_sgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_sgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_sgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_sgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_sgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
{ cblas_sgemv_facade(order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
void cblas_sswap_facade(const int N, global float *X, const int incX, global float *Y, const int incY);
kernel void ocl_cblas_sswap(const int N, global float *X, const int incX, global float *Y, const int incY)
{ cblas_sswap_facade(N, X, incX, Y, incY); }
-void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_ssymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_ssymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_ssymv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
{ cblas_ssymv_facade(order, Uplo, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -340,12 +342,12 @@ kernel void ocl_cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_ssyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *A, const int lda);
kernel void ocl_cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *A, const int lda)
{ cblas_ssyr2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_ssyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
-void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_ssyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_ssyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_ssyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_stbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const float *A, const int lda, global float *X, const int incX)
{ cblas_stbmv_facade(order, Uplo, TransA, Diag, N, K, A, lda, X, incX); }
@@ -358,15 +360,15 @@ kernel void ocl_cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_stpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *Ap, global float *X, const int incX);
kernel void ocl_cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *Ap, global float *X, const int incX)
{ cblas_stpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_strmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l2_buf_loc); }
+void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_strmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_strmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX)
{ cblas_strmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_strsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l2_buf_loc); }
+void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_strsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_strsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX)
{ cblas_strsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
@@ -391,9 +393,9 @@ kernel void ocl_cblas_zdscal(const int N, const double alpha, global void *X, co
void cblas_zgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_zgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_zgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_zgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zgemv_facade(order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -406,9 +408,9 @@ kernel void ocl_cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int
void cblas_zhbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zhbmv_facade(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_zhemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_zhemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_zhemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zhemv_facade(order, Uplo, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -418,12 +420,12 @@ kernel void ocl_cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_zher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda);
kernel void ocl_cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda)
{ cblas_zher2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_zher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
-void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_zherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_zher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_zherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_zhpmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zhpmv_facade(order, Uplo, N, alpha, Ap, X, incX, beta, Y, incY); }
@@ -442,15 +444,15 @@ kernel void ocl_cblas_zscal(const int N, global const void *alpha, global void *
void cblas_zswap_facade(const int N, global void *X, const int incX, global void *Y, const int incY);
kernel void ocl_cblas_zswap(const int N, global void *X, const int incX, global void *Y, const int incY)
{ cblas_zswap_facade(N, X, incX, Y, incY); }
-void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_zsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
-void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_zsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l2_buf_loc); }
-void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_zsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l2_buf_loc); }
+void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_zsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_zsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_zsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
void cblas_ztbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ztbmv_facade(order, Uplo, TransA, Diag, N, K, A, lda, X, incX); }
@@ -463,15 +465,15 @@ kernel void ocl_cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_ztpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX);
kernel void ocl_cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX)
{ cblas_ztpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_ztrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l2_buf_loc); }
+void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_ztrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_ztrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ztrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, local double *l2_buf_loc);
-kernel void ocl_cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, local double *l2_buf_loc)
-{ cblas_ztrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l2_buf_loc); }
+void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code);
+kernel void ocl_cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, int *err_code)
+{ cblas_ztrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
void cblas_ztrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ztrsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
diff --git a/blasblisacc/src/ti_cblas_mem_config.c b/blasblisacc/src/ti_cblas_mem_config.c
--- /dev/null
@@ -0,0 +1,128 @@
+/******************************************************************************
+ * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+//#include "stdio.h"
+#include "blis.h"
+#include "../../ticblas/ticblas.h"
+#include <ti/libarch/libarch.h>
+
+#define BLIS_L3_DDR_SIZE_ZERO (0)
+
+int bli_l3_mem_config(double *msmc_buf, size_t msmc_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig)
+{
+ size_t smem_size_vfast, smem_size_fast, smem_size_med, smem_size_slow;
+ void *l1d_SRAM_ptr, *l2_SRAM_ptr;
+ int l1d_cfg_err, l2_cfg_err;
+
+ /* First, verify the provided/available memory meet requirements */
+ tiCblasGetSizes(&smem_size_vfast, &smem_size_fast, &smem_size_med, &smem_size_slow);
+
+ if( (smem_size_vfast> lib_get_L1D_total_size()) // total available L1D
+ ||(smem_size_fast > lib_get_L2_total_size()) // total available L2
+ ||(smem_size_med > msmc_buf_size) // provided MSMC memory
+ ||(smem_size_slow > BLIS_L3_DDR_SIZE_ZERO) // DDR not used
+ ) {
+ return(-2);
+ }
+
+ /* Configure L1D if necessary */
+ *l1D_SRAM_size_orig = lib_get_L1D_SRAM_size(); // get current L1D SRAM size
+ l1d_cfg_err = LIB_CACHE_SUCCESS;
+ if(*l1D_SRAM_size_orig <= smem_size_vfast) { // configure L1D if needs more SRAM
+ #pragma omp parallel
+ {
+ l1d_cfg_err = lib_L1D_config_SRAM(smem_size_vfast);
+ }
+ }
+
+ /* Configure L2 if necessary */
+ *l2_SRAM_size_orig = lib_get_L2_SRAM_size(); // get current L2 SRAM size
+ l2_cfg_err = LIB_CACHE_SUCCESS;
+ if(*l2_SRAM_size_orig <= smem_size_fast) { // configure L2 if needs more SRAM
+ #pragma omp parallel
+ {
+ l2_cfg_err = lib_L2_config_SRAM(smem_size_fast);
+ }
+ }
+
+ if(l1d_cfg_err || l2_cfg_err) {
+ return(-3);
+ }
+
+ /* get L1D and L2 SRAM base address */
+ l1d_SRAM_ptr = lib_get_L1D_SRAM_base();
+ l2_SRAM_ptr = lib_get_L2_SRAM_base();
+
+ /* pass allocated memories for heap initialization */
+ return(tiCblasInit(l1d_SRAM_ptr, smem_size_vfast,
+ l2_SRAM_ptr, smem_size_fast,
+ msmc_buf, msmc_buf_size,
+ NULL, BLIS_L3_DDR_SIZE_ZERO));
+}
+
+/*==============================================================================
+ * This function reconfigures L1D and L2 after processing is finished
+ *============================================================================*/
+int bli_l3_mem_reconfig(size_t l1D_SRAM_size_orig, size_t l2_SRAM_size_orig)
+{
+ int l1d_cfg_err, l2_cfg_err;
+
+ /* configure L1D back */
+ l1d_cfg_err = LIB_CACHE_SUCCESS;
+ if(l1D_SRAM_size_orig!=lib_get_L1D_SRAM_size()) {
+ #pragma omp parallel
+ {
+ l1d_cfg_err = lib_L1D_config_SRAM(l1D_SRAM_size_orig);
+ }
+ }
+
+ l2_cfg_err = LIB_CACHE_SUCCESS;
+ if(l2_SRAM_size_orig <= lib_get_L2_SRAM_size()) {
+ #pragma omp parallel
+ {
+ l2_cfg_err = lib_L2_config_SRAM(l2_SRAM_size_orig);
+ }
+ }
+
+ /* configure L1D and L2 back */
+ if(l1d_cfg_err || l2_cfg_err) {
+ return(-4);
+ }
+
+ return(TICBLAS_SUCCESS);
+} /* test_reconfig_memory */
+
+void ti_bli_init_dsp(char *l3_buf, char *l2_buf)
+{
+// printf("In function ti_bli_init_dsp, l3_buff is 0x%x, l2_buf is 0x%x.\n", (unsigned int)l3_buf, (unsigned int)l2_buf);
+ bli_init();
+}
+
+void ti_bli_finalize_dsp(void)
+{
+ bli_finalize();
+}
index 0c4ada17164a5785c384385d5c43b27314e366cb..d8651cf4d17ae31454804650c4133e80de3bd2a7 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
#define OFFLOAD_TO_DSP 1
#define NO_OFFLOAD_TO_DSP 0
index c5302c55f9a8297260e860e95b6e693b9bc3f302..68f35526da4a9d9db5c75392198976977671f609 100755 (executable)
my $hdr_init = generate_header_specific_init_code();
my $armcode = <<"END_ARM_INIT";
$source_code_header
-#include "${namespace}.h"
+#include "${namespace}_acc.h"
#ifdef ${NAMESPACE}_FAT_BINARY
#include "${namespace}_kernel.dsp_h"
my $armcode = <<"ARM_FROM_PROTO";
$source_code_header
-#include "${namespace}.h"
+#include "${namespace}_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
# $i_plus_1 = $i+1;
$armcode .= "
void *msmc_ptr;
+ size_t msmc_size = MSMC_BUF_SIZE;
msmc_ptr = ${namespace}_mem_alloc(MSMC_BUF_SIZE);
#ifdef __cplusplus
Buffer buf_MSMC(*${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
TI_CBLAS_OCL_CHKERROR(\"clSetKernelArg\",err);
#endif
";
-##ifdef __cplusplus
-# __K->setArg($i_plus_1, __local(L2_BUF_SIZE));
-##else
-# err |= clSetKernelArg(__K, $i_plus_1, L2_BUF_SIZE, NULL);
-##endif
-#";
+ $i++;
+ $i_plus_1 = $i+1;
+ $armcode .= "
+#ifdef __cplusplus
+ __K->setArg($i, msmc_size);
+#else
+ err |= clSetKernelArg(__K, $i, sizeof(msmc_size), &msmc_size);
+#endif
+
+ /* create a buffer argument to get the return error code from the DSP */
+ int err_code;
+#ifdef __cplusplus
+ Buffer buf_err(*${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
+ __K->setArg($i_plus_1, buf_err);
+#else
+ cl_mem buf_err = clCreateBuffer(*${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
+ ${NAMESPACE}_OCL_CHKERROR(\"clCreateBuffer\",err);
+ err |= clSetKernelArg(__K, $i_plus_1, sizeof(buf_err), &buf_err);
+ ${NAMESPACE}_OCL_CHKERROR(\"clSetKernelArg\",err);
+#endif
+";
+ $i+=2;
}
+
if ($tramptype !~ /^void$/i) {
$armcode .= "
/* create a buffer argument to get the return value from the DSP */
Buffer buf_retval(*${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof($tramptype), &retval);
__K->setArg($i, buf_retval);
#else
- cl_mem buf_retval = clCreateBuffer(${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof($tramptype), &retval, &err);
+ cl_mem buf_retval = clCreateBuffer(*${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof($tramptype), &retval, &err);
${NAMESPACE}_OCL_CHKERROR(\"clCreateBuffer\",err);
err |= clSetKernelArg(__K, $i, sizeof(buf_retval), &buf_retval);
${NAMESPACE}_OCL_CHKERROR(\"clSetKernelArg\",err);
#endif
";
}
+
$armcode .= "
#ifdef __cplusplus
${namespace}_ocl_Q->enqueueTask(*__K, 0, &e);
${NAMESPACE}_OCL_CHKERROR(\"clWaitForEvents\",err);
err |= clReleaseEvent(e);
${NAMESPACE}_OCL_CHKERROR(\"clReleaseEvent\",err);
-
#endif
";
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
$armcode .= "
+ if(err_code != TICBLAS_SUCCESS) {
+ printf(\"Error code returned by offloaded $trampname is %d\\n.\", err_code);
+ }
+
${namespace}_mem_free(msmc_ptr);
";
}
print "In generate_kernel_from_proto, this is a level 3 function - " . $trampname . "\n";
# $oclcode .= ", global double *l3_buf, local double *l2_buf_loc";
# $trampproto .= ", global double *l3_buf, local double *l2_buf_loc";
- $oclcode .= ", global double *l3_buf";
- $trampproto .= ", global double *l3_buf";
+ $oclcode .= ", global double *l3_buf, size_t l3_buf_size, int *err_code";
+ $trampproto .= ", global double *l3_buf, size_t l3_buf_size, int *err_code";
}
}
}
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
# $oclcode .= ", l3_buf, l2_buf_loc";
- $oclcode .= ", l3_buf";
+ $oclcode .= ", l3_buf, l3_buf_size, err_code";
}
$oclcode .= "); }";
$oclcode = $trampproto . "\n" . $oclcode;
{
my $facade_prologue = <<"FACADE_PROLOGUE";
$source_code_header
-#include <stdio.h>
#include "../../cblas/include/cblas.h"
-#include "blis.h"
-#define DEVICE_K2H
-
-#include <dsp_c.h>
-
-#define getNextMultiple(x, y) ( ( ((x)+(y)-1)/(y) )* (y) )
-// L1 buffer is hardwared here
-#define L1_BUF_LOC 0x00F00000
-
-// note these pointers must be filled if used functions
-char *pool_mk_mem_L1;
-char *pool_kn_mem_L1;
-char *pool_mn_mem_L1;
+#include "../../ticblas/ticblas.h"
-char *pool_mk_mem_L2;
-char *pool_kn_mem_L2;
-char *pool_mn_mem_L2;
-
-char *pool_mk_mem_L3;
-char *pool_kn_mem_L3;
-char *pool_mn_mem_L3;
-
-extern void bli_mem_init();
-
-void ti_bli_init_dsp(char *l3_buf, char *l2_buf)
-{
- bli_init();
-}
-
-void ti_bli_finalize_dsp(void)
-{
- bli_finalize();
-}
+extern int bli_l3_mem_config(double *msmc_buf, size_t msmc_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig);
+extern int bli_l3_mem_reconfig(size_t l1D_SRAM_size_orig, size_t l2_SRAM_size_orig);
FACADE_PROLOGUE
return $facade_prologue;
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
# $dspcode .= ", float *l3_buf, float *l2_buf_loc";
# $trampproto .= ", float *l3_buf, float *l2_buf_loc" ;
- $dspcode .= ", float *l3_buf";
- $trampproto .= ", float *l3_buf" ;
+ $dspcode .= ", double *l3_buf, size_t l3_buf_size, int *err_code";
+ $trampproto .= ", double *l3_buf, size_t l3_buf_size, int *err_code";
}
$trampproto .= ");";
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
# print "facade code to setup cache for level 3 function ". $trampname ."\n";
$dspcode .= "
- pool_mk_mem_L1 = (char *) getNextMultiple((int) L1_BUF_LOC, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L1 = (char *) getNextMultiple(((int) pool_mk_mem_L1) + BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L1 = (char *) getNextMultiple(((int) pool_kn_mem_L1) + BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L2 = (char *) getNextMultiple((int) l2_buf_loc, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L2 = (char *) getNextMultiple(((int) pool_mk_mem_L2) + BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L2 = (char *) getNextMultiple(((int) pool_kn_mem_L2) + BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);
-
- pool_mk_mem_L3 = (char *) getNextMultiple((int) l3_buf, BLIS_CACHE_LINE_SIZE);
- pool_kn_mem_L3 = (char *) getNextMultiple(((int) pool_mk_mem_L3) + BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
- pool_mn_mem_L3 = (char *) getNextMultiple(((int) pool_kn_mem_L3) + BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);
-
- bli_mem_init();
-
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_4k();
- }
+ size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ if(*err_code != TICBLAS_SUCCESS) {
+ return;
+ }
+
";
}
else {
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
# print "facade code to return default cache for level 3 function ". $trampname ."\n";
$dspcode .= "
- #pragma omp parallel
- {
- __cache_l1d_flush();
- __cache_l1d_all();
- }
+ *err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
";
}
else {
unlink glob "ti_cblas_initfini.c";
unlink glob "ti_cblas_kernel.cl";
unlink glob "ti_cblas_cblas*.c";
- unlink glob "ti_cblas.h";
+ unlink glob "ti_cblas_acc.h";
#unlink glob "*.inc";
#unlink glob "*.a";
}
index 7257fdb1a9f29f3997914f58e7ec5a0dd38964b1..3c3c2a727338fb4163d911cb93f78880f47e95a1 100755 (executable)
CMISCFLAGS += -I$(LINUX_DEVKIT_ROOT)/usr/include
$(info Using $(UNAME_M))
endif
-CMISCFLAGS += -mv6600 --use_g2 --omp #-std=c99 # -fopenmp -pg
+CMISCFLAGS += -mv6600 --use_g2 --omp -DDEVICE_K2H -DLIB_OPENCL #-std=c99 # -fopenmp -pg
CDBGFLAGS := -s
diff --git a/ticblas/src/Makefile b/ticblas/src/Makefile
--- /dev/null
+++ b/ticblas/src/Makefile
@@ -0,0 +1,44 @@
+
+include ../../make.inc
+
+include ./make.inc
+
+TI_INSTALL_DIR?=/usr/src/dsp
+
+PATH:=$(TI_OCL_CGT_INSTALL)/bin:$(PATH)
+
+define FIND_DSP_PKG
+ export $(1)?=$$(patsubst %/$(3),%,$$(lastword $$(sort $$(wildcard $$(TI_INSTALL_DIR)/$(2)/$(3)))))
+ ifeq ($$($(1)),)
+ $$(error ERROR - $(1) is not defined and could not be found in $(TI_INSTALL_DIR)/ )
+ else
+ ifeq ($$(wildcard $$($(1))/$(3)),)
+ $$(error ERROR - "$(1) = $$($(1))" Is not valid!)
+ endif
+ endif
+ $$(info Using $(1) = $$($(1)))
+endef
+
+UNAME_M :=$(shell uname -m)
+
+# INCLUDE Directory
+$(eval $(call FIND_DSP_PKG,OMP_DIR,openmp_dsp*,packages))
+ifneq (,$(findstring 86, $(UNAME_M)))
+$(eval $(call FIND_DSP_PKG,C6636_PDK_DIR,pdk_keystone2*,packages))
+endif
+
+$(eval $(call FIND_DSP_PKG,FC_DIR,framework_components*,packages))
+$(eval $(call FIND_DSP_PKG,LIBARCH_DIR,libarch*,packages))
+$(eval $(call FIND_DSP_PKG,XDAIS_DIR,xdais*,packages))
+$(eval $(call FIND_DSP_PKG,XDC_DIR,xdc*,packages))
+
+INCDIR := $(TI_OCL_CGT_INSTALL)/include;$(TARGET_ROOTDIR)/usr/include;../../blis/install/c66x/include/blis/;$(OMP_DIR)/packages/ti/runtime/openmp;$(PDK_DIR)/packages
+INCDIR += -I$(FC_DIR)/packages
+INCDIR += -I$(XDC_DIR)/packages
+INCDIR += -I$(XDAIS_DIR)/packages
+INCDIR += -I$(LIBARCH_DIR)/packages
+
+INCS = -I. -I$(strip $(subst ;, -I,$(subst $(space),$(space),$(INCDIR))))
+
+CL6X_FLAGS = $(INCS) --openmp --use_g2 -DDEVICE_K2H -DLIB_OPENCL
+#CL6X_FLAGS = $(INCS) --openmp --use_g2
diff --git a/ticblas/src/make.inc b/ticblas/src/make.inc
--- /dev/null
+++ b/ticblas/src/make.inc
@@ -0,0 +1,17 @@
+
+# Defines
+DSP_LIB_DIR = ../lib
+DSP_LIB = $(DSP_LIB_DIR)/libticblas.a66x
+
+OBJS = ticblas.obj
+
+all: lib
+cross: lib
+
+lib: $(OBJS)
+ @echo; echo "Building DSP lib: $(DSP_LIB)"
+ mkdir -p $(DSP_LIB_DIR)
+ $(AR) -cr $(DSP_LIB) $(OBJS)
+
+clean::
+ rm $(DSP_LIB)
\ No newline at end of file
diff --git a/ticblas/src/ticblas.c b/ticblas/src/ticblas.c
--- /dev/null
+++ b/ticblas/src/ticblas.c
@@ -0,0 +1,147 @@
+/******************************************************************************\r
+ * Copyright (c) 2015, Texas Instruments Incorporated - http://www.ti.com\r
+ * All rights reserved.\r
+ *\r
+ * Redistribution and use in source and binary forms, with or without\r
+ * modification, are permitted provided that the following conditions are met:\r
+ * * Redistributions of source code must retain the above copyright\r
+ * notice, this list of conditions and the following disclaimer.\r
+ * * Redistributions in binary form must reproduce the above copyright\r
+ * notice, this list of conditions and the following disclaimer in the\r
+ * documentation and/or other materials provided with the distribution.\r
+ * * Neither the name of Texas Instruments Incorporated nor the\r
+ * names of its contributors may be used to endorse or promote products\r
+ * derived from this software without specific prior written permission.\r
+ *\r
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"\r
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\r
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\r
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\r
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\r
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\r
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\r
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\r
+ * THE POSSIBILITY OF SUCH DAMAGE.\r
+ *****************************************************************************/\r
+#include <ti/libarch/libarch.h>\r
+#include "../ticblas.h"\r
+#include "blis.h"\r
+\r
+#define BLAS_LEVEL3_L1DSRAM_SIZE (28*1024)\r
+#define BLAS_LEVEL3_L2SRAM_SIZE (0xBFE00)\r
+#define BLAS_LEVEL3_MSMC_SIZE (0x47FDC0)\r
+\r
+#define BLAS_MEM_SIZE_VFAST BLAS_LEVEL3_L1DSRAM_SIZE \r
+#define BLAS_MEM_SIZE_FAST BLAS_LEVEL3_L2SRAM_SIZE\r
+#define BLAS_MEM_SIZE_MEDIUM BLAS_LEVEL3_MSMC_SIZE\r
+#define BLAS_MEM_SIZE_SLOW (0)\r
+\r
+\r
+/* Define memory descriptors for memory management */\r
+lib_memdscr_t blas_mem_vfast;\r
+lib_memdscr_t blas_mem_fast;\r
+lib_memdscr_t blas_mem_medium;\r
+lib_memdscr_t blas_mem_slow;\r
+\r
+/* Define a memory descriptor array */\r
+lib_memdscr_t * blas_memdscr_tab[LIB_MEMTYPE_N] = {\r
+ &blas_mem_vfast,\r
+ &blas_mem_fast,\r
+ &blas_mem_medium,\r
+ &blas_mem_slow\r
+};\r
+\r
+// note these pointers must be filled if used functions\r
+char *pool_mk_mem_L1;\r
+char *pool_kn_mem_L1;\r
+char *pool_mn_mem_L1;\r
+\r
+char *pool_mk_mem_L2;\r
+char *pool_kn_mem_L2;\r
+char *pool_mn_mem_L2;\r
+\r
+char *pool_mk_mem_L3;\r
+char *pool_kn_mem_L3;\r
+char *pool_mn_mem_L3;\r
+\r
+extern void bli_mem_init();\r
+\r
+/*==============================================================================\r
+ * This function returns the address of the memory descriptor array\r
+ *============================================================================*/\r
+void * blasGetMemHandle()\r
+{\r
+ return((void *)&blas_memdscr_tab[0]);\r
+} /* blasGetMemHandle */\r
+\r
+/*==============================================================================\r
+ * It returns the size requirement of each of the 4 memory types defined in \r
+ * the library framework. \r
+ *============================================================================*/\r
+void tiCblasGetSizes(size_t *smem_size_vfast, size_t *smem_size_fast, \r
+ size_t *smem_size_medium, size_t *smem_size_slow)\r
+{\r
+ *smem_size_vfast = BLAS_MEM_SIZE_VFAST; // very fast scratch memory\r
+ *smem_size_fast = BLAS_MEM_SIZE_FAST; // fast scratch memory\r
+ *smem_size_medium = BLAS_MEM_SIZE_MEDIUM; // medium speed scratch memory\r
+ *smem_size_slow = BLAS_MEM_SIZE_SLOW; // slow scratch memory\r
+} /* tiCblasGetSizes */\r
+\r
+/*==============================================================================\r
+ * It performs necessary initialization through library framework API in order\r
+ * to do memory allocations. \r
+ *============================================================================*/\r
+int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size,\r
+ void * mem_fast_base, size_t mem_fast_size,\r
+ void * mem_medium_base, size_t mem_medium_size,\r
+ void * mem_slow_base, size_t mem_slow_size)\r
+{\r
+ lib_memdscr_t **blas_mem_handle = blasGetMemHandle();\r
+\r
+ /* Verify supplied memories meet requirements */ \r
+ if( ((mem_vfast_base == NULL) || (mem_vfast_size < BLAS_MEM_SIZE_VFAST) )\r
+ ||((mem_fast_base == NULL) || (mem_fast_size < BLAS_MEM_SIZE_FAST) )\r
+ ||((mem_medium_base == NULL) || (mem_medium_size < BLAS_MEM_SIZE_MEDIUM) )\r
+/* ||((mem_slow_base == NULL) || (mem_slow_size < BLAS_MEM_SIZE_SLOW) )*/\r
+ ) {\r
+ return(TICBLAS_INIT_ERROR);\r
+ }\r
+ else {\r
+ lib_smem_vinit(blas_mem_handle, mem_vfast_base, mem_vfast_size);\r
+ lib_smem_finit(blas_mem_handle, mem_fast_base, mem_fast_size);\r
+ lib_smem_minit(blas_mem_handle, mem_medium_base, mem_medium_size);\r
+/* lib_smem_sinit(blas_mem_handle, mem_slow_base, mem_slow_size); */\r
+ \r
+ pool_mk_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);\r
+ pool_kn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);\r
+ pool_mn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);\r
+\r
+ pool_mk_mem_L2 = lib_smem_falloc(blas_mem_handle, BLIS_MK_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);\r
+ pool_kn_mem_L2 = lib_smem_falloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);\r
+ pool_mn_mem_L2 = lib_smem_falloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L2, BLIS_CACHE_LINE_SIZE);\r
+ \r
+ pool_mk_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_MK_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);\r
+ pool_kn_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);\r
+ pool_mn_mem_L3 = lib_smem_malloc(blas_mem_handle, BLIS_MN_POOL_SIZE_L3, BLIS_CACHE_LINE_SIZE);\r
+\r
+ if( (pool_mk_mem_L1 == NULL)\r
+ ||(pool_kn_mem_L1 == NULL) \r
+ ||(pool_mn_mem_L1 == NULL) \r
+ ||(pool_mk_mem_L2 == NULL) \r
+ ||(pool_kn_mem_L2 == NULL) \r
+ ||(pool_mn_mem_L2 == NULL) \r
+ ||(pool_mk_mem_L3 == NULL) \r
+ ||(pool_kn_mem_L3 == NULL) \r
+ ||(pool_mn_mem_L3 == NULL) ) {\r
+ return(TICBLAS_INIT_ERROR); \r
+ } \r
+ else {\r
+ bli_mem_init();\r
+ return(TICBLAS_SUCCESS);\r
+ }\r
+ }\r
+} /* tiCblasInit */\r
+\r
+/* Nothing after this line */\r
diff --git a/ticblas/ticblas.h b/ticblas/ticblas.h
--- /dev/null
+++ b/ticblas/ticblas.h
@@ -0,0 +1,42 @@
+/******************************************************************************\r
+ * Copyright (c) 2015, Texas Instruments Incorporated - http://www.ti.com\r
+ * All rights reserved.\r
+ *\r
+ * Redistribution and use in source and binary forms, with or without\r
+ * modification, are permitted provided that the following conditions are met:\r
+ * * Redistributions of source code must retain the above copyright\r
+ * notice, this list of conditions and the following disclaimer.\r
+ * * Redistributions in binary form must reproduce the above copyright\r
+ * notice, this list of conditions and the following disclaimer in the\r
+ * documentation and/or other materials provided with the distribution.\r
+ * * Neither the name of Texas Instruments Incorporated nor the\r
+ * names of its contributors may be used to endorse or promote products\r
+ * derived from this software without specific prior written permission.\r
+ *\r
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"\r
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\r
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE\r
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\r
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF\r
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS\r
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN\r
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)\r
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF\r
+ * THE POSSIBILITY OF SUCH DAMAGE.\r
+ *****************************************************************************/\r
+#ifndef TICBLAS_H\r
+#define TICBLAS_H\r
+ \r
+#define TICBLAS_SUCCESS (0)\r
+#define TICBLAS_INIT_ERROR (-1)\r
+\r
+void tiCblasGetSizes(size_t *smem_size_vfast, size_t *smem_size_fast, \r
+ size_t *smem_size_medium, size_t *smem_size_slow);\r
+\r
+int tiCblasInit(void * mem_vfast_base, size_t mem_vfast_size,\r
+ void * mem_fast_base, size_t mem_fast_size,\r
+ void * mem_medium_base, size_t mem_medium_size,\r
+ void * mem_slow_base, size_t mem_slow_size);\r
+\r
+#endif /* TICBLAS_H */\r