summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 8f9b629)
raw | patch | inline | side by side (parent: 8f9b629)
author | Jianzhong Xu <xuj@ti.com> | |
Mon, 25 Jan 2016 20:56:49 +0000 (20:56 +0000) | ||
committer | Jianzhong Xu <xuj@ti.com> | |
Mon, 25 Jan 2016 20:56:49 +0000 (20:56 +0000) |
162 files changed:
index 3beb479988a780340ea2f372c4c350d5210285b0..547d98328ae13fad4ada8e194bedd8736449e132 100644 (file)
--- a/blasblisacc/src/facade.c
+++ b/blasblisacc/src/facade.c
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-
+
#include "../../cblas/include/cblas.h"
#include "../../ticblas/ticblas.h"
extern char *pool_mn_mem_L3;
#endif
-extern int bli_l3_mem_config(double *msmc_buf, size_t msmc_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig);
+extern int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_t ddr_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig);
extern int bli_l3_mem_reconfig(size_t l1D_SRAM_size_orig, size_t l2_SRAM_size_orig);
+
void cblas_caxpy_facade(const int N, const void *alpha, const void *X, const int incX, void *Y, const int incY)
{
cblas_caxpy(N, alpha, X, incX, Y, incY);
@@ -71,11 +72,11 @@ void cblas_cgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE
cblas_cgbmv(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -105,11 +106,11 @@ void cblas_chbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_chbmv(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -134,11 +135,11 @@ void cblas_cher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_cher2(order, Uplo, N, alpha, X, incX, Y, incY, A, lda);
}
-void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const float beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const float beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -148,11 +149,11 @@ void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Upl
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const void *A, const int lda, const float beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -197,11 +198,11 @@ void cblas_cswap_facade(const int N, void *X, const int incX, void *Y, const int
cblas_cswap(N, X, incX, Y, incY);
}
-void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -211,11 +212,11 @@ void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -225,11 +226,11 @@ void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Upl
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -259,11 +260,11 @@ void cblas_ctpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ctpsv(order, Uplo, TransA, Diag, N, Ap, X, incX);
}
-void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -278,11 +279,11 @@ void cblas_ctrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ctrmv(order, Uplo, TransA, Diag, N, A, lda, X, incX);
}
-void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -322,21 +323,15 @@ void cblas_dgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE
cblas_dgbmv(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
-#ifdef TI_CBLAS_DEBUG
- printf("pool_mk_mem_L1 is 0x%x, pool_kn_mem_L1 is 0x%x, pool_mn_mem_L1 is 0x%x.\n", (unsigned int)pool_mk_mem_L1, (unsigned int)pool_kn_mem_L1, (unsigned int)pool_mn_mem_L1);
- printf("pool_mk_mem_L2 is 0x%x, pool_kn_mem_L2 is 0x%x, pool_mn_mem_L2 is 0x%x.\n", (unsigned int)pool_mk_mem_L2, (unsigned int)pool_kn_mem_L2, (unsigned int)pool_mn_mem_L2);
- printf("pool_mk_mem_L3 is 0x%x, pool_kn_mem_L3 is 0x%x, pool_mn_mem_L3 is 0x%x.\n", (unsigned int)pool_mk_mem_L3, (unsigned int)pool_kn_mem_L3, (unsigned int)pool_mn_mem_L3);
-#endif
-
cblas_dgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
@@ -412,11 +407,11 @@ void cblas_dswap_facade(const int N, double *X, const int incX, double *Y, const
cblas_dswap(N, X, incX, Y, incY);
}
-void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -441,11 +436,11 @@ void cblas_dsyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_dsyr2(order, Uplo, N, alpha, X, incX, Y, incY, A, lda);
}
-void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -455,11 +450,11 @@ void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Upl
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const double *A, const int lda, const double beta, double *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -489,11 +484,11 @@ void cblas_dtpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_dtpsv(order, Uplo, TransA, Diag, N, Ap, X, incX);
}
-void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -508,11 +503,11 @@ void cblas_dtrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_dtrmv(order, Uplo, TransA, Diag, N, A, lda, X, incX);
}
-void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, const double *A, const int lda, double *B, const int ldb, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -597,11 +592,11 @@ void cblas_sgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE
cblas_sgbmv(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -676,11 +671,11 @@ void cblas_sswap_facade(const int N, float *X, const int incX, float *Y, const i
cblas_sswap(N, X, incX, Y, incY);
}
-void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -705,11 +700,11 @@ void cblas_ssyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ssyr2(order, Uplo, N, alpha, X, incX, Y, incY, A, lda);
}
-void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float *B, const int ldb, const float beta, float *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -719,11 +714,11 @@ void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Upl
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, const float *A, const int lda, const float beta, float *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -753,11 +748,11 @@ void cblas_stpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_stpsv(order, Uplo, TransA, Diag, N, Ap, X, incX);
}
-void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -772,11 +767,11 @@ void cblas_strmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_strmv(order, Uplo, TransA, Diag, N, A, lda, X, incX);
}
-void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, const float *A, const int lda, float *B, const int ldb, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -826,11 +821,11 @@ void cblas_zgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE
cblas_zgbmv(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -860,11 +855,11 @@ void cblas_zhbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_zhbmv(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY);
}
-void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -889,11 +884,11 @@ void cblas_zher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_zher2(order, Uplo, N, alpha, X, incX, Y, incY, A, lda);
}
-void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const double beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const double beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -903,11 +898,11 @@ void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Upl
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, const void *A, const int lda, const double beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -947,11 +942,11 @@ void cblas_zswap_facade(const int N, void *X, const int incX, void *Y, const int
cblas_zswap(N, X, incX, Y, incY);
}
-void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -961,11 +956,11 @@ void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *B, const int ldb, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -975,11 +970,11 @@ void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Upl
*err_code = bli_l3_mem_reconfig(l1D_SRAM_size_orig, l2_SRAM_size_orig);
}
-void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const void *alpha, const void *A, const int lda, const void *beta, void *C, const int ldc, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -1009,11 +1004,11 @@ void cblas_ztpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ztpsv(order, Uplo, TransA, Diag, N, Ap, X, incX);
}
-void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
@@ -1028,11 +1023,11 @@ void cblas_ztrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo
cblas_ztrmv(order, Uplo, TransA, Diag, N, A, lda, X, incX);
}
-void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, double *l3_buf, size_t l3_buf_size, int *err_code)
+void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const void *alpha, const void *A, const int lda, void *B, const int ldb, void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code)
{
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
index fdea549af1fc8564b93fe57071c73b12b391c815..70a040c7758529e14f487b8e89a092300dd61e3c 100644 (file)
#define MSMC_BUF_SIZE 0x47FDC0
//#define MSMC_BUF_SIZE 0x47F100 // MR=NR=4 for S
+#define DDR_BUF_SIZE (16384)
//DSPBLIS
//#define MSMC_BUF_SIZE 0x400000
index 39daf708f3665a6dbc3a3b8f36e29e49d8fe4b05..9f20b26c5c959bd22f81091306731494796cd593 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CAXPY_IDX, "ocl_cblas_caxpy");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CAXPY_IDX, "ocl_cblas_caxpy");
+
#ifdef __cplusplus
try
#else
index bf5b16d0cf2e88d088f1ca470a6db33a7715654c..bf48d8435b8ebb2d106c242cd1e87bb2797835e3 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CCOPY_IDX, "ocl_cblas_ccopy");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CCOPY_IDX, "ocl_cblas_ccopy");
+
#ifdef __cplusplus
try
#else
diff --git a/blasblisacc/src/ti_cblas_cblas_cdotc_sub.c b/blasblisacc/src/ti_cblas_cblas_cdotc_sub.c
index 9cfc775883c857f2d934a622ec5e87f03a48b4e5..c54530ef6cebdcdc221d47d613b5b5b405ba2ce3 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CDOTC_SUB_IDX, "ocl_cblas_cdotc_sub");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CDOTC_SUB_IDX, "ocl_cblas_cdotc_sub");
+
#ifdef __cplusplus
try
#else
diff --git a/blasblisacc/src/ti_cblas_cblas_cdotu_sub.c b/blasblisacc/src/ti_cblas_cblas_cdotu_sub.c
index 68d4684a1f3c8c8fc98c3dbbdaffc1a928f858d8..4070c52189bb4baecbceff2636f44f78a3583df5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CDOTU_SUB_IDX, "ocl_cblas_cdotu_sub");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CDOTU_SUB_IDX, "ocl_cblas_cdotu_sub");
+
#ifdef __cplusplus
try
#else
index 097792c134ec24685a50ddada1299d4db27116b9..f425fa1f73742d0754dbdc86339d717072ac2e0f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGBMV_IDX, "ocl_cblas_cgbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGBMV_IDX, "ocl_cblas_cgbmv");
+
#ifdef __cplusplus
try
#else
index ffd982072e7e077bbfc186ae7790be1924e3b991..3eb73efaa015ccac8fb7a71cb33dd5c1b3daf95c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGEMM_IDX, "ocl_cblas_cgemm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGEMM_IDX, "ocl_cblas_cgemm");
+
#ifdef __cplusplus
try
#else
@@ -209,22 +210,29 @@ void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
err |= clSetKernelArg(__K, 14, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(15, msmc_size);
#else
err |= clSetKernelArg(__K, 15, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(16, buf_DDR);
+ __K->setArg(17, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(16, buf_err);
+ __K->setArg(18, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 18, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -246,6 +254,7 @@ void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 37962f7f3c59c8c5a2e7c6a41ca031cae843ff96..05fe4cfa38d6ea3c18874a5be894dc2632ef0b4c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGEMV_IDX, "ocl_cblas_cgemv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGEMV_IDX, "ocl_cblas_cgemv");
+
#ifdef __cplusplus
try
#else
index e8feac03ac0126dda9c8148cedc56b5803b19d91..d16e435a1c22065338c6d2de95c0db1ee822aac5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGERC_IDX, "ocl_cblas_cgerc");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGERC_IDX, "ocl_cblas_cgerc");
+
#ifdef __cplusplus
try
#else
index 22f8c5046a527a67d8416d5a3af12cb6264efb93..ebf8c199e75cd4246eedc8f70ac34187efc691e0 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGERU_IDX, "ocl_cblas_cgeru");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CGERU_IDX, "ocl_cblas_cgeru");
+
#ifdef __cplusplus
try
#else
index ad7e51bdec8af637d8bd560db330cb99e1aeeafd..98ad84a78b75120d2ef3dbddf55405c452afc3fc 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHBMV_IDX, "ocl_cblas_chbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHBMV_IDX, "ocl_cblas_chbmv");
+
#ifdef __cplusplus
try
#else
index b51fbc2953d5f3d445173f2d40f7d25fa29d9bba..f8348692443c0900bd911592622e6b64326ad181 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHEMM_IDX, "ocl_cblas_chemm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHEMM_IDX, "ocl_cblas_chemm");
+
#ifdef __cplusplus
try
#else
@@ -203,22 +204,29 @@ void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -240,6 +248,7 @@ void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 8e076486e7069d395ba15c860ff9917db4051fdb..b81a5ca893498ff1851356a8dc0588dee46b23fd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHEMV_IDX, "ocl_cblas_chemv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHEMV_IDX, "ocl_cblas_chemv");
+
#ifdef __cplusplus
try
#else
index b3ff8c31897c073178be5e85ac9c054a3317081d..f0ea8143e8f473bd6e4efb3d7bbc585a0329057e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER_IDX, "ocl_cblas_cher");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER_IDX, "ocl_cblas_cher");
+
#ifdef __cplusplus
try
#else
index eced71f9e068f36310ff13125f347bd154d08c26..12ba685746c159533dc326afb43a520f3641febe 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER2_IDX, "ocl_cblas_cher2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER2_IDX, "ocl_cblas_cher2");
+
#ifdef __cplusplus
try
#else
index 709f036129cad062b6d46434b78fcdadfe3e56ce..ac6e37f4e20db12efe3e74d170d9e7c791a2b4bd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER2K_IDX, "ocl_cblas_cher2k");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHER2K_IDX, "ocl_cblas_cher2k");
+
#ifdef __cplusplus
try
#else
@@ -198,22 +199,29 @@ void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -235,6 +243,7 @@ void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index d7db80b6a597cae2391e8fcd4495ea3bf1a14591..bce49b1cd305abee7baa64eefc6f4b064cf26b26 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHERK_IDX, "ocl_cblas_cherk");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHERK_IDX, "ocl_cblas_cherk");
+
#ifdef __cplusplus
try
#else
@@ -172,22 +173,29 @@ void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(12, msmc_size);
#else
err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(13, buf_DDR);
+ __K->setArg(14, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(13, buf_err);
+ __K->setArg(15, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -209,6 +217,7 @@ void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index c4d1d624df19ef648ac0bbf0b8b772bab84b1d80..3de67e443f5714b7acf185940575112751242127 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPMV_IDX, "ocl_cblas_chpmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPMV_IDX, "ocl_cblas_chpmv");
+
#ifdef __cplusplus
try
#else
index 670016e0fd00928f84d311257bea466c629b0814..ec3155a58e59e5df7d40026b3d1ddece8e422b29 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPR_IDX, "ocl_cblas_chpr");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPR_IDX, "ocl_cblas_chpr");
+
#ifdef __cplusplus
try
#else
index 50d29f732e25eca733394abcd054e34dbf907618..e64057cbf75f07fab9af75620d3e99c189f82c17 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPR2_IDX, "ocl_cblas_chpr2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CHPR2_IDX, "ocl_cblas_chpr2");
+
#ifdef __cplusplus
try
#else
index c31ba615e8be56b3463efe8bb00678c3505c5acd..190ec8f1cca21f5d78802f88b28ec476ebdc5533 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CROTG_IDX, "ocl_cblas_crotg");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CROTG_IDX, "ocl_cblas_crotg");
+
#ifdef __cplusplus
try
#else
index 40d2452480b253c43d3c1a60ff12e77b3a126ed8..4c930606f994d222ea6a9cbd085763678db343b0 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSCAL_IDX, "ocl_cblas_cscal");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSCAL_IDX, "ocl_cblas_cscal");
+
#ifdef __cplusplus
try
#else
index aea68b46d1e5e28d9e266a7d500aea7b528cba77..a548c62aca96cd7f9884405a665c336488c6c2b5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSSCAL_IDX, "ocl_cblas_csscal");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSSCAL_IDX, "ocl_cblas_csscal");
+
#ifdef __cplusplus
try
#else
index 67249d32b0156aa9aeecfc6a3d62c43a8716a624..e6d206a42c89e5be55b324d02633db0abc9cae19 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSWAP_IDX, "ocl_cblas_cswap");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSWAP_IDX, "ocl_cblas_cswap");
+
#ifdef __cplusplus
try
#else
index 6050de720b21b53d987867bbff0c1bbbc2367bc9..14785a23e6d2ac1ab59e6db6ff1f785ecc5a7eee 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYMM_IDX, "ocl_cblas_csymm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYMM_IDX, "ocl_cblas_csymm");
+
#ifdef __cplusplus
try
#else
@@ -203,22 +204,29 @@ void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -240,6 +248,7 @@ void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 11feff3f8ef46768d433addd1dd61f499f72b6d8..2e26948440d0a54513459e29a6e9949b5b1b7205 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYR2K_IDX, "ocl_cblas_csyr2k");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYR2K_IDX, "ocl_cblas_csyr2k");
+
#ifdef __cplusplus
try
#else
@@ -203,22 +204,29 @@ void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -240,6 +248,7 @@ void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 4840abdfc8efb414bd75151e7214fdb479d913a2..c76555749e462a2415c7b12e9c1b6a08fced6c7b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYRK_IDX, "ocl_cblas_csyrk");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CSYRK_IDX, "ocl_cblas_csyrk");
+
#ifdef __cplusplus
try
#else
@@ -182,22 +183,29 @@ void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(12, msmc_size);
#else
err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(13, buf_DDR);
+ __K->setArg(14, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(13, buf_err);
+ __K->setArg(15, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -219,6 +227,7 @@ void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 018b07030dd14955cf541b6d1c35166008f06e4f..467cf853e1394d09354185b127fd28a83c0ad82b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTBMV_IDX, "ocl_cblas_ctbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTBMV_IDX, "ocl_cblas_ctbmv");
+
#ifdef __cplusplus
try
#else
index 6f34e3ae8c2e72642a732000e49845bb774f659f..9c9c9a92552ba3d01c88a3a187b6ac3c14d1ea6a 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTBSV_IDX, "ocl_cblas_ctbsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTBSV_IDX, "ocl_cblas_ctbsv");
+
#ifdef __cplusplus
try
#else
index b2db324062eeb3f301dce6e89f8590c78ac0d142..251ce80a821841bc1125932513e2330a2abe5f18 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTPMV_IDX, "ocl_cblas_ctpmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTPMV_IDX, "ocl_cblas_ctpmv");
+
#ifdef __cplusplus
try
#else
index 45969d33cd990209ad89cd6d0ce64ce8d329ae6a..42b6a0bed910d142f0f870fa2ca2d1572e9f9f93 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTPSV_IDX, "ocl_cblas_ctpsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTPSV_IDX, "ocl_cblas_ctpsv");
+
#ifdef __cplusplus
try
#else
index 54f291f3ada4bb76599a2e80e7b9145d67cb1d77..50d3607515a703784a85b67db26f5228ba228480 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRMM_IDX, "ocl_cblas_ctrmm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRMM_IDX, "ocl_cblas_ctrmm");
+
#ifdef __cplusplus
try
#else
@@ -183,22 +184,29 @@ void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(13, msmc_size);
#else
err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(14, buf_DDR);
+ __K->setArg(15, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(14, buf_err);
+ __K->setArg(16, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -220,6 +228,7 @@ void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 40054f941ce48333e91fa9af8267adee3d5a974e..c501935c1d3d91f0c2199a1c8cfb2abb66576f86 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRMV_IDX, "ocl_cblas_ctrmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRMV_IDX, "ocl_cblas_ctrmv");
+
#ifdef __cplusplus
try
#else
index 7804059c0351970f512ab4f496f721fc3faee424..c03d8a0acb7997d19aaf56c2f5d31703f80bb753 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRSM_IDX, "ocl_cblas_ctrsm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRSM_IDX, "ocl_cblas_ctrsm");
+
#ifdef __cplusplus
try
#else
@@ -183,22 +184,29 @@ void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(13, msmc_size);
#else
err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(14, buf_DDR);
+ __K->setArg(15, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(14, buf_err);
+ __K->setArg(16, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -220,6 +228,7 @@ void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 42d5a05d2ecf7c7099ab595206cba546270f8dd4..39c3ee4f3b6c4d28c4d6f467fea3cf0ce70d45b1 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRSV_IDX, "ocl_cblas_ctrsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_CTRSV_IDX, "ocl_cblas_ctrsv");
+
#ifdef __cplusplus
try
#else
index ac2c2af1905e9862621ec3c14007021e10283869..60527bd78fd82c1773f9bac6fb77abe3c4f2ed72 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DASUM_IDX, "ocl_cblas_dasum");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DASUM_IDX, "ocl_cblas_dasum");
+
#ifdef __cplusplus
try
#else
index fee688edb32d0aa4cd9050c42644b26076cacfea..b673a31464acfc370cf5b15290ee488eb943a252 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DAXPY_IDX, "ocl_cblas_daxpy");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DAXPY_IDX, "ocl_cblas_daxpy");
+
#ifdef __cplusplus
try
#else
index 58769ff7228ed96b15365979b6fe02f303876ff5..fa8877c17f807fc69412fa8eee340c4467c86171 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DCOPY_IDX, "ocl_cblas_dcopy");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DCOPY_IDX, "ocl_cblas_dcopy");
+
#ifdef __cplusplus
try
#else
index 5bfbb81b112d1ba164fd7d6361eb42c570627b36..de913e558c46dab97c2865f864490e5eb80db47c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DDOT_IDX, "ocl_cblas_ddot");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DDOT_IDX, "ocl_cblas_ddot");
+
#ifdef __cplusplus
try
#else
index 2e03fd2241bb5a78b5798c7e475abae99e7dcdc6..4eb8ca18ff0bb15ef5f4bcb502a08e9987d5a9ba 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGBMV_IDX, "ocl_cblas_dgbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGBMV_IDX, "ocl_cblas_dgbmv");
+
#ifdef __cplusplus
try
#else
index 6fa53241eb386e9229f7c1261df1b2a931ca3ea5..4cfa7e56da6ae7414a4d2f43e454e62849416228 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGEMM_IDX, "ocl_cblas_dgemm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGEMM_IDX, "ocl_cblas_dgemm");
+
#ifdef __cplusplus
try
#else
@@ -199,22 +200,29 @@ void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
err |= clSetKernelArg(__K, 14, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(15, msmc_size);
#else
err |= clSetKernelArg(__K, 15, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(16, buf_DDR);
+ __K->setArg(17, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(16, buf_err);
+ __K->setArg(18, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 18, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -236,6 +244,7 @@ void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index acb7123ff30729f4f966d080dcc8d6c28c3703fe..c624f6cca3a40c9c5fa0597d910ce015da291c80 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGEMV_IDX, "ocl_cblas_dgemv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGEMV_IDX, "ocl_cblas_dgemv");
+
#ifdef __cplusplus
try
#else
index c035efed70e6150e651419c058d8402616b3ae47..0492d0aa90393e9e91e4c8f47eed80e36a021a9d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGER_IDX, "ocl_cblas_dger");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DGER_IDX, "ocl_cblas_dger");
+
#ifdef __cplusplus
try
#else
index 0be91b895ee4701bff3e183c13da99a63203b15b..bc200fa6b73a8483437b8be129743f34f50a0fe6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DNRM2_IDX, "ocl_cblas_dnrm2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DNRM2_IDX, "ocl_cblas_dnrm2");
+
#ifdef __cplusplus
try
#else
index bb4cdc07730ba5aa5dd0c901b36207725d6c2974..86bacba7ff9f9a8ccbc3e38c331251f153d8d0fe 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROT_IDX, "ocl_cblas_drot");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROT_IDX, "ocl_cblas_drot");
+
#ifdef __cplusplus
try
#else
index 2c20ae308e8754752271ccfd84f3e27d6a9802fe..c535574abfa0498693584096376268f81f14520e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTG_IDX, "ocl_cblas_drotg");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTG_IDX, "ocl_cblas_drotg");
+
#ifdef __cplusplus
try
#else
index 1e4a374ea085398c56ccd279b9cb5fe02999a472..0cb941fd2abe30879be6c2c59814e806e1a3d741 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTM_IDX, "ocl_cblas_drotm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTM_IDX, "ocl_cblas_drotm");
+
#ifdef __cplusplus
try
#else
index ef4e979a576a1754a26a4a629bb3599c1383efd2..0cf5e8fa10c592ad895766bd4fdd07af8f603af5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTMG_IDX, "ocl_cblas_drotmg");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DROTMG_IDX, "ocl_cblas_drotmg");
+
#ifdef __cplusplus
try
#else
index d3e1988c0837a5dd87065b12660350a6b78da736..920b40361577f965bfc38fb63842a17fa4d7a31c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSBMV_IDX, "ocl_cblas_dsbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSBMV_IDX, "ocl_cblas_dsbmv");
+
#ifdef __cplusplus
try
#else
index 44c1510a00a0b7f831be31e3a06e5b495e66d70e..d75049f65afa31e77da37c9623bcde8b2a2fb6fb 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSCAL_IDX, "ocl_cblas_dscal");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSCAL_IDX, "ocl_cblas_dscal");
+
#ifdef __cplusplus
try
#else
index 7e7ff095c7f714b7e75e9c23f4b8226f555adf11..c976fefc2d1db46dff46a00058ce01f1755065c1 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSDOT_IDX, "ocl_cblas_dsdot");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSDOT_IDX, "ocl_cblas_dsdot");
+
#ifdef __cplusplus
try
#else
index 5239c73b879d30a40851a5f46ae1a04e33faadde..48a0e452f080642d37d9fd16dd0e9f567247bdb6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPMV_IDX, "ocl_cblas_dspmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPMV_IDX, "ocl_cblas_dspmv");
+
#ifdef __cplusplus
try
#else
index d8c25dcf49eabb891204b2ab0be2378d6c88655a..93c94843f2675e378546df796d7ff164c7ff11cd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPR_IDX, "ocl_cblas_dspr");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPR_IDX, "ocl_cblas_dspr");
+
#ifdef __cplusplus
try
#else
index 4d6b2c049debc0b14516beb402ec8e1f3d0593c6..89aff959b3476b815b36e62be51e06c638c1ce63 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPR2_IDX, "ocl_cblas_dspr2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSPR2_IDX, "ocl_cblas_dspr2");
+
#ifdef __cplusplus
try
#else
index 63cd15ada38181b6b5f14825263bb73857119fc4..b3e692928613b5f5f34a048a47df2bd8a27d52d2 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSWAP_IDX, "ocl_cblas_dswap");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSWAP_IDX, "ocl_cblas_dswap");
+
#ifdef __cplusplus
try
#else
index a6223ee92349bc7f723adac969d7e6ee376d978f..52dd00838ae0bc335c17b15fcdff3215a8055161 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYMM_IDX, "ocl_cblas_dsymm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYMM_IDX, "ocl_cblas_dsymm");
+
#ifdef __cplusplus
try
#else
@@ -193,22 +194,29 @@ void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -230,6 +238,7 @@ void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index aca46f23ecadab0c668bac4510662baa53d1bddf..7e6a9627ff427826615c8aaf8a4c4a20c359b6ed 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYMV_IDX, "ocl_cblas_dsymv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYMV_IDX, "ocl_cblas_dsymv");
+
#ifdef __cplusplus
try
#else
index a421bcde70a15430b00f875af77980e9f38c55cb..1d42f0bd40df24f47c96d9a097cb85c9721731fd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR_IDX, "ocl_cblas_dsyr");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR_IDX, "ocl_cblas_dsyr");
+
#ifdef __cplusplus
try
#else
index 5c64af929d45692dcaf68c44065e18404802bdde..5fa094ba29d3f72ba026ea29774e3460e2ec878f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR2_IDX, "ocl_cblas_dsyr2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR2_IDX, "ocl_cblas_dsyr2");
+
#ifdef __cplusplus
try
#else
index 5e126a700094fd74be18c69c66c1ba50d9ca3a60..4c5f281b5d62fa41efb43e50c29f274fafaad2c7 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR2K_IDX, "ocl_cblas_dsyr2k");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYR2K_IDX, "ocl_cblas_dsyr2k");
+
#ifdef __cplusplus
try
#else
@@ -193,22 +194,29 @@ void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -230,6 +238,7 @@ void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index a5b7d87ef9aa0d5f2d4086bbc0eb0806cd617995..e07661799bfb34b4af00823f737fd24490f8ef35 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYRK_IDX, "ocl_cblas_dsyrk");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DSYRK_IDX, "ocl_cblas_dsyrk");
+
#ifdef __cplusplus
try
#else
@@ -172,22 +173,29 @@ void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(12, msmc_size);
#else
err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(13, buf_DDR);
+ __K->setArg(14, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(13, buf_err);
+ __K->setArg(15, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -209,6 +217,7 @@ void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 8e1149847c15a490d37eb72c30e2cacd0ca4d89d..4856f3cb8a92bf0d1332cb118a6dee1470fcf8e0 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTBMV_IDX, "ocl_cblas_dtbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTBMV_IDX, "ocl_cblas_dtbmv");
+
#ifdef __cplusplus
try
#else
index 095284708aea09ac3eaeb5c3a8e1d6a102301f5e..f9aaf2d9d20c7277c82d9053cb0bd6f668c4afc2 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTBSV_IDX, "ocl_cblas_dtbsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTBSV_IDX, "ocl_cblas_dtbsv");
+
#ifdef __cplusplus
try
#else
index f8fa7fcde306160ef9e6620d8d2fd2a912ea12a4..d3740ee3cfecdc52912e75338f954cb094434303 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTPMV_IDX, "ocl_cblas_dtpmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTPMV_IDX, "ocl_cblas_dtpmv");
+
#ifdef __cplusplus
try
#else
index 6856032f5357329ac0a27df39ac90dd24b561714..030eea6765d132fdf1ef7a895afe68bbc5c82ec5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTPSV_IDX, "ocl_cblas_dtpsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTPSV_IDX, "ocl_cblas_dtpsv");
+
#ifdef __cplusplus
try
#else
index fd70c5dc4b706e5293b03e5920f4033319752ffb..f2107933378711c35ae27855d4096eb5b0d668f2 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRMM_IDX, "ocl_cblas_dtrmm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRMM_IDX, "ocl_cblas_dtrmm");
+
#ifdef __cplusplus
try
#else
@@ -178,22 +179,29 @@ void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(13, msmc_size);
#else
err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(14, buf_DDR);
+ __K->setArg(15, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(14, buf_err);
+ __K->setArg(16, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -215,6 +223,7 @@ void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 54755ec2ee5b645002cb2dce5ac2e7f5da8069ea..20d08c8d4b57582572ee6e67c76f9a41ae3df512 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRMV_IDX, "ocl_cblas_dtrmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRMV_IDX, "ocl_cblas_dtrmv");
+
#ifdef __cplusplus
try
#else
index 798a88e7db5ae6895105b8877d2809c8ff3eaaea..0dbfa28d843a8de0bdff5137f86cb8ce0372428c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRSM_IDX, "ocl_cblas_dtrsm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRSM_IDX, "ocl_cblas_dtrsm");
+
#ifdef __cplusplus
try
#else
@@ -178,22 +179,29 @@ void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(13, msmc_size);
#else
err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(14, buf_DDR);
+ __K->setArg(15, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(14, buf_err);
+ __K->setArg(16, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -215,6 +223,7 @@ void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 15d492b19ef6d54d96ad3cd9040e2c840d5120ef..d9229c0ae5af6723db06d1b0e26ecb4e96050690 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRSV_IDX, "ocl_cblas_dtrsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DTRSV_IDX, "ocl_cblas_dtrsv");
+
#ifdef __cplusplus
try
#else
index fd4874ce4c76eb0d6fbdab0ea5b4defb17b9bdb2..6d5641bc9ea69bf9dbd245966dbc7069d4abf18b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DZASUM_IDX, "ocl_cblas_dzasum");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DZASUM_IDX, "ocl_cblas_dzasum");
+
#ifdef __cplusplus
try
#else
index 96147f96cfc07c8bd203d7188e4540b8d413e4a3..4557b2ef40342b27aa1b4cd4747397da85bdf5a7 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DZNRM2_IDX, "ocl_cblas_dznrm2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_DZNRM2_IDX, "ocl_cblas_dznrm2");
+
#ifdef __cplusplus
try
#else
index c5569ad9e9a73e65cdd22399c849f2fb11e96ef8..ce9e10c21c5f582e110dd01f75d7f28b0ad98f5f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ICAMAX_IDX, "ocl_cblas_icamax");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ICAMAX_IDX, "ocl_cblas_icamax");
+
#ifdef __cplusplus
try
#else
index 3ed7745801849fcb57d95545c06b9593633c0563..d6bfedfdb6c8e5c7f032ab6819dac7fca6e9d388 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_IDAMAX_IDX, "ocl_cblas_idamax");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_IDAMAX_IDX, "ocl_cblas_idamax");
+
#ifdef __cplusplus
try
#else
index 19bea6ab6ddd0bd78d74e478b220f1b8c00d7c11..5beae2407db6d0578d18afe45592d7786978e9c1 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ISAMAX_IDX, "ocl_cblas_isamax");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ISAMAX_IDX, "ocl_cblas_isamax");
+
#ifdef __cplusplus
try
#else
index 77141cf81d0e2fb1d2ee61287eebbd0a2d1f7aed..c80492381b2fbef510666d734f23fb14de910c89 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_IZAMAX_IDX, "ocl_cblas_izamax");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_IZAMAX_IDX, "ocl_cblas_izamax");
+
#ifdef __cplusplus
try
#else
index 7892bb7221f69ac12fbeb16eaa005d4954fb8af8..6f4172d8596c2196c515c80186af5cb4690d0abb 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SASUM_IDX, "ocl_cblas_sasum");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SASUM_IDX, "ocl_cblas_sasum");
+
#ifdef __cplusplus
try
#else
index 3862933e632a4fd52a7255eebf01593ed6c3c88f..9c7af6ef5e3df994d1ebee6e3962b9e3724531a2 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SAXPY_IDX, "ocl_cblas_saxpy");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SAXPY_IDX, "ocl_cblas_saxpy");
+
#ifdef __cplusplus
try
#else
index 23c6f4f38c27cc687ed0892dd73449ef80b718bd..3f0a9a34a382de498be87f36cb82e93a37fb9014 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCASUM_IDX, "ocl_cblas_scasum");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCASUM_IDX, "ocl_cblas_scasum");
+
#ifdef __cplusplus
try
#else
index a966e9ee6950f86c5e80f883be581ca7c3a1fc8f..43f0b2f8daee71bb7fbc2da09bd650cb34985650 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCNRM2_IDX, "ocl_cblas_scnrm2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCNRM2_IDX, "ocl_cblas_scnrm2");
+
#ifdef __cplusplus
try
#else
index dd6cab58259f0aff629480c062babebdaaad0ea3..7e225e88b36f50ae35a1526305af6ca5a1a1dc3d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCOPY_IDX, "ocl_cblas_scopy");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SCOPY_IDX, "ocl_cblas_scopy");
+
#ifdef __cplusplus
try
#else
index 69fd6d44bb21eb4c8da40cf7f4c35e76dfb19e74..9cd85250c62450db3c743038082f3fba35f47f43 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SDOT_IDX, "ocl_cblas_sdot");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SDOT_IDX, "ocl_cblas_sdot");
+
#ifdef __cplusplus
try
#else
index d5445ea42bd48a1e64bc44e8e22f300d7fe89b9e..268fb76d9fe697dbef58b7a2ede06fe3e8eef091 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SDSDOT_IDX, "ocl_cblas_sdsdot");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SDSDOT_IDX, "ocl_cblas_sdsdot");
+
#ifdef __cplusplus
try
#else
index f4f2826d101e2f19550cc062d299bc315d784ddb..956ee4110936c7bddf2c7d52798e3438249e2b22 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGBMV_IDX, "ocl_cblas_sgbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGBMV_IDX, "ocl_cblas_sgbmv");
+
#ifdef __cplusplus
try
#else
index 72e3af3b58ab693ccfc9b31001d4f5d554b8db58..3385f80c8c17b0fe1a9321ca22ea50284d7a81d0 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGEMM_IDX, "ocl_cblas_sgemm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGEMM_IDX, "ocl_cblas_sgemm");
+
#ifdef __cplusplus
try
#else
@@ -199,22 +200,29 @@ void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
err |= clSetKernelArg(__K, 14, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(15, msmc_size);
#else
err |= clSetKernelArg(__K, 15, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(16, buf_DDR);
+ __K->setArg(17, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(16, buf_err);
+ __K->setArg(18, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 18, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -236,6 +244,7 @@ void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index c6e0c1daf7faf8f5b7166676f291dadbeb1fa048..9acaf531852879ddc1df0546b8da64c99afbad4f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGEMV_IDX, "ocl_cblas_sgemv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGEMV_IDX, "ocl_cblas_sgemv");
+
#ifdef __cplusplus
try
#else
index 1ee795f876353a1adbbfc44a0be97e0f50e1c92c..fcf4ca2447b629688d2508ee97467595506a823f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGER_IDX, "ocl_cblas_sger");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SGER_IDX, "ocl_cblas_sger");
+
#ifdef __cplusplus
try
#else
index 79a7dcbfb6da15956a44b1bb3eabbf0ca453d710..013acfabcdf148eb1bcc16480e45f5d9ee9b81f2 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SNRM2_IDX, "ocl_cblas_snrm2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SNRM2_IDX, "ocl_cblas_snrm2");
+
#ifdef __cplusplus
try
#else
index 051e99a68f1e988d49bf33412e31a652ff75826e..cafda115a4d0d08b9352f6f0864a0812bb416330 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROT_IDX, "ocl_cblas_srot");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROT_IDX, "ocl_cblas_srot");
+
#ifdef __cplusplus
try
#else
index 2b7a071bb9a248c351b5b0ee5f4397d599c3d94a..dbe5e2d6c86d533809a642281dca108f4878cce8 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTG_IDX, "ocl_cblas_srotg");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTG_IDX, "ocl_cblas_srotg");
+
#ifdef __cplusplus
try
#else
index e67142c90213ae74fc2522d00b4a4381099d6b68..42a2bf43dbfc23695ec3e34b15b19a5ec7b7ac7b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTM_IDX, "ocl_cblas_srotm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTM_IDX, "ocl_cblas_srotm");
+
#ifdef __cplusplus
try
#else
index 008e2d0a35a5878f61dd8cef1cede20cc1b58c63..892f0646b2af12ee17521023641ef47b87f687fd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTMG_IDX, "ocl_cblas_srotmg");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SROTMG_IDX, "ocl_cblas_srotmg");
+
#ifdef __cplusplus
try
#else
index 08d9dd7479b36b0172ba457a90ca089824ca9cc5..8e7f770cfccd9abf1cd3a93936bdd58f1289241b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSBMV_IDX, "ocl_cblas_ssbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSBMV_IDX, "ocl_cblas_ssbmv");
+
#ifdef __cplusplus
try
#else
index d3afdb9af57c433e646f69fb153bc69b3d2d4169..d375b548ca045d954c31030bb01a3ef76b1d8941 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSCAL_IDX, "ocl_cblas_sscal");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSCAL_IDX, "ocl_cblas_sscal");
+
#ifdef __cplusplus
try
#else
index 78f82ddba2e69a90b0e49dd5ad1083ad8bd22ca0..d292322a2c27abfa3ffca743e9fb8305c7e6b31e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPMV_IDX, "ocl_cblas_sspmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPMV_IDX, "ocl_cblas_sspmv");
+
#ifdef __cplusplus
try
#else
index 707533f1203af0ff8e44dd7565d2540094f4f931..229cc92b922d28d3fa8096ac72c80a631caae19d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPR_IDX, "ocl_cblas_sspr");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPR_IDX, "ocl_cblas_sspr");
+
#ifdef __cplusplus
try
#else
index dac6d8538f1a8aa11aab90484f408d19893fa50e..30c784ea1b50e838cfb8977fc2a79a642b1118ac 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPR2_IDX, "ocl_cblas_sspr2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSPR2_IDX, "ocl_cblas_sspr2");
+
#ifdef __cplusplus
try
#else
index 241d2136298fbee4b1b8061244d8a306bc031112..2f7865eecf9215983fac2111e695c37074930a53 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSWAP_IDX, "ocl_cblas_sswap");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSWAP_IDX, "ocl_cblas_sswap");
+
#ifdef __cplusplus
try
#else
index c090bed21bdefa2027be33820dfda2ab4d1586b3..f3852044622292eeb09776f982424e8d953e4617 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYMM_IDX, "ocl_cblas_ssymm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYMM_IDX, "ocl_cblas_ssymm");
+
#ifdef __cplusplus
try
#else
@@ -193,22 +194,29 @@ void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -230,6 +238,7 @@ void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index e92214f0d9b4b9f67c938c86df235519614774b8..6aedd21b2099f9c68f579f2b095647bebc6c6da6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYMV_IDX, "ocl_cblas_ssymv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYMV_IDX, "ocl_cblas_ssymv");
+
#ifdef __cplusplus
try
#else
index 5c45b02b42ccab1327a9fdcabae28d5d7eb0e8a5..64166fb7805017b77f8915fd5eaefb28dea769c6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR_IDX, "ocl_cblas_ssyr");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR_IDX, "ocl_cblas_ssyr");
+
#ifdef __cplusplus
try
#else
index ea04df9a24f991f193fdd6f7b9cb887f2341c97d..d5edeac4e9449fe712ae4f487bdf0742bab36d92 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR2_IDX, "ocl_cblas_ssyr2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR2_IDX, "ocl_cblas_ssyr2");
+
#ifdef __cplusplus
try
#else
index 97b6b443b80c33ae0988494f948b559cd83c8647..8564f71b855ce2d164fa90786928001a17692661 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR2K_IDX, "ocl_cblas_ssyr2k");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYR2K_IDX, "ocl_cblas_ssyr2k");
+
#ifdef __cplusplus
try
#else
@@ -193,22 +194,29 @@ void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -230,6 +238,7 @@ void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 17c8bbddeeaee3871f6cb208fc49eb4d73a5a18f..a9f8a2db47ddd19ef695eb5039aaf9e83e22b656 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYRK_IDX, "ocl_cblas_ssyrk");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSYRK_IDX, "ocl_cblas_ssyrk");
+
#ifdef __cplusplus
try
#else
@@ -172,22 +173,29 @@ void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(12, msmc_size);
#else
err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(13, buf_DDR);
+ __K->setArg(14, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(13, buf_err);
+ __K->setArg(15, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -209,6 +217,7 @@ void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 97b7922d647442cdc1f94eaa8d67aba92cf2cb2d..60fe80300ab035b668a733666a499ca83e86844e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STBMV_IDX, "ocl_cblas_stbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STBMV_IDX, "ocl_cblas_stbmv");
+
#ifdef __cplusplus
try
#else
index 47002da6eaa00838607cb2a56bac48e161455d86..7ac2d478afac27e1f380af80382e6de53045f293 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STBSV_IDX, "ocl_cblas_stbsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STBSV_IDX, "ocl_cblas_stbsv");
+
#ifdef __cplusplus
try
#else
index a5ea0271c4383d0167bb98c8dd5c7a70bbd8a847..0e0a326afeca05ba803d6f0944cc47f5b386e047 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STPMV_IDX, "ocl_cblas_stpmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STPMV_IDX, "ocl_cblas_stpmv");
+
#ifdef __cplusplus
try
#else
index 0b23825a29b71d1927177aff2dee0b1b14bc7b06..e54e5650deb312b09f00c746d4247f5f0e8d8780 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STPSV_IDX, "ocl_cblas_stpsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STPSV_IDX, "ocl_cblas_stpsv");
+
#ifdef __cplusplus
try
#else
index fc1d9d82400c27ccc8adb3423f5576cd573e6bc5..4e80830d0e4ac23c9e1133e6098afd336ed2ebce 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRMM_IDX, "ocl_cblas_strmm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRMM_IDX, "ocl_cblas_strmm");
+
#ifdef __cplusplus
try
#else
@@ -178,22 +179,29 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(13, msmc_size);
#else
err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(14, buf_DDR);
+ __K->setArg(15, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(14, buf_err);
+ __K->setArg(16, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -215,6 +223,7 @@ void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 933bd345d9e178504416da29d1234f01d227607f..9860ef5389428bac5ac5bc344f55043531ef2e83 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRMV_IDX, "ocl_cblas_strmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRMV_IDX, "ocl_cblas_strmv");
+
#ifdef __cplusplus
try
#else
index 56ad072c622be0bb7b1489001e296e77c4d18676..4bf872ca5f052ca5c8db46826e154982c7e698d6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRSM_IDX, "ocl_cblas_strsm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRSM_IDX, "ocl_cblas_strsm");
+
#ifdef __cplusplus
try
#else
@@ -178,22 +179,29 @@ void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(13, msmc_size);
#else
err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(14, buf_DDR);
+ __K->setArg(15, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(14, buf_err);
+ __K->setArg(16, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -215,6 +223,7 @@ void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 97aed05220eae5f502c1c6f462e67caca929906f..1a7e2096f48a89a55075e2816760173a67ab0b20 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRSV_IDX, "ocl_cblas_strsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_STRSV_IDX, "ocl_cblas_strsv");
+
#ifdef __cplusplus
try
#else
index 2ff97c8e9002195a31690106b57c6741a9bc578a..f7da8da5f5bfcbee9eaa11924c8a55b556cae538 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_XERBLA_IDX, "ocl_cblas_xerbla");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_XERBLA_IDX, "ocl_cblas_xerbla");
+
#ifdef __cplusplus
try
#else
index 8450fae6dddcad9b73926122c1d31c307607af9b..c4899aa2aa4e361b238f6d07d600028dddc9e8d2 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZAXPY_IDX, "ocl_cblas_zaxpy");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZAXPY_IDX, "ocl_cblas_zaxpy");
+
#ifdef __cplusplus
try
#else
index 60edee0b1a1dcd0898b4e4e537e0f1881ad22f77..36b75550ec81044c32c1e0f358d180b5d1d07b35 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZCOPY_IDX, "ocl_cblas_zcopy");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZCOPY_IDX, "ocl_cblas_zcopy");
+
#ifdef __cplusplus
try
#else
diff --git a/blasblisacc/src/ti_cblas_cblas_zdotc_sub.c b/blasblisacc/src/ti_cblas_cblas_zdotc_sub.c
index 80cd4d8d19fa29ed864e90aff61fb3bf55901008..7bc1336cd31f3d5576c4676db93f5301f9cc6b27 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDOTC_SUB_IDX, "ocl_cblas_zdotc_sub");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDOTC_SUB_IDX, "ocl_cblas_zdotc_sub");
+
#ifdef __cplusplus
try
#else
diff --git a/blasblisacc/src/ti_cblas_cblas_zdotu_sub.c b/blasblisacc/src/ti_cblas_cblas_zdotu_sub.c
index 841cf7103056e3085d99ca635e23c0ac9ab41396..0327eebe150194bb5ea43796d293af7103a1eb41 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDOTU_SUB_IDX, "ocl_cblas_zdotu_sub");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDOTU_SUB_IDX, "ocl_cblas_zdotu_sub");
+
#ifdef __cplusplus
try
#else
index d2c7eca168abcbea0f24d50e003b9cd0ad42de62..838c7dea90c45dbcb4ee8e60e6f01c4fb0a8430d 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDSCAL_IDX, "ocl_cblas_zdscal");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZDSCAL_IDX, "ocl_cblas_zdscal");
+
#ifdef __cplusplus
try
#else
index 0a2a0740e8ce2892c5a71309b72157bcc707ac89..049ca5811653975b0bce644c61be20cb0356cf9e 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGBMV_IDX, "ocl_cblas_zgbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGBMV_IDX, "ocl_cblas_zgbmv");
+
#ifdef __cplusplus
try
#else
index 77e5bf914df46ef5c0d22898d3b5988180284cd8..fc11cb1c7672bc45fc2fbbea949939375d677332 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGEMM_IDX, "ocl_cblas_zgemm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGEMM_IDX, "ocl_cblas_zgemm");
+
#ifdef __cplusplus
try
#else
@@ -209,22 +210,29 @@ void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
err |= clSetKernelArg(__K, 14, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(15, msmc_size);
#else
err |= clSetKernelArg(__K, 15, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(16, buf_DDR);
+ __K->setArg(17, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(16, buf_err);
+ __K->setArg(18, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 18, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -246,6 +254,7 @@ void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index a195377e20dbc6f6f0257e60ef63d0ba9797b168..2e1dbc1d6209cd2e3c8c340bfca0d54a9c32c716 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGEMV_IDX, "ocl_cblas_zgemv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGEMV_IDX, "ocl_cblas_zgemv");
+
#ifdef __cplusplus
try
#else
index 341c8f1fe67c11eb94930b8a0fada68781c109de..6292ddccffd4e7509d3e2515c6362177a1937c88 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGERC_IDX, "ocl_cblas_zgerc");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGERC_IDX, "ocl_cblas_zgerc");
+
#ifdef __cplusplus
try
#else
index 94cddea2813c7f4cb1fcbfa312f41b51984b9b2c..8302431314f7ef2f0452302e7771b983121dd136 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGERU_IDX, "ocl_cblas_zgeru");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZGERU_IDX, "ocl_cblas_zgeru");
+
#ifdef __cplusplus
try
#else
index 36fa157152bb4caad5c34d72e0c86987e848f27a..6afd4cbd2c2c27421dceed1e9d16227fe91fda32 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHBMV_IDX, "ocl_cblas_zhbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHBMV_IDX, "ocl_cblas_zhbmv");
+
#ifdef __cplusplus
try
#else
index d206e1361fdcb13fe6151a4ac8b82dfc7384a434..193b0f345d6d6d8390e39525947a7e6f7961a5db 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHEMM_IDX, "ocl_cblas_zhemm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHEMM_IDX, "ocl_cblas_zhemm");
+
#ifdef __cplusplus
try
#else
@@ -203,22 +204,29 @@ void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -240,6 +248,7 @@ void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 6e87053d010dd9a82dc474db8a40b02f62c2a1d8..283bc8d77feec964a2f6205685d33073c0172551 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHEMV_IDX, "ocl_cblas_zhemv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHEMV_IDX, "ocl_cblas_zhemv");
+
#ifdef __cplusplus
try
#else
index a3dcd1b25557b440cc52078b8ce54920806b8be6..bf0dec2e521f2e40394ebd26ed609a0794301e70 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER_IDX, "ocl_cblas_zher");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER_IDX, "ocl_cblas_zher");
+
#ifdef __cplusplus
try
#else
index 146d17ff455202d1746c5b1c112724192aff2c37..9b72f952c717e8b87f0e45464343b7e4a45500c5 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER2_IDX, "ocl_cblas_zher2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER2_IDX, "ocl_cblas_zher2");
+
#ifdef __cplusplus
try
#else
index 6f52d3ae5d260957a04446aee24309c2ca037cfd..94442469dbf76005979ff4b53146ddbf3c9f4ef8 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER2K_IDX, "ocl_cblas_zher2k");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHER2K_IDX, "ocl_cblas_zher2k");
+
#ifdef __cplusplus
try
#else
@@ -198,22 +199,29 @@ void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -235,6 +243,7 @@ void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index e5437115432891ca5fa07b00195a43f301729402..651fab2d45c10ff4ff177a89ac8089699d99aaec 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHERK_IDX, "ocl_cblas_zherk");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHERK_IDX, "ocl_cblas_zherk");
+
#ifdef __cplusplus
try
#else
@@ -172,22 +173,29 @@ void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(12, msmc_size);
#else
err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(13, buf_DDR);
+ __K->setArg(14, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(13, buf_err);
+ __K->setArg(15, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -209,6 +217,7 @@ void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 9a5dd8fc42da0346c0cb73bb11b8a59dc3d11b98..8c14a93696a54038c91b093be502852b680fd6d9 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPMV_IDX, "ocl_cblas_zhpmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPMV_IDX, "ocl_cblas_zhpmv");
+
#ifdef __cplusplus
try
#else
index d479109e494b12efc0b9afcd9f963d6cb9f40687..ff810ec1ded88adb08f1f8aca3d66df6308abfc1 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPR_IDX, "ocl_cblas_zhpr");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPR_IDX, "ocl_cblas_zhpr");
+
#ifdef __cplusplus
try
#else
index e55ec9f3bcf193671f735c27c7e6434eb0694065..33c9fb6149e06308ed18df853a188c5b1ddba158 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPR2_IDX, "ocl_cblas_zhpr2");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZHPR2_IDX, "ocl_cblas_zhpr2");
+
#ifdef __cplusplus
try
#else
index 41d2041dd63bd8727e59b8ead3fe98b8c39c6f07..f13b5c94c59ca73113957901051ee8933b2fc773 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZROTG_IDX, "ocl_cblas_zrotg");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZROTG_IDX, "ocl_cblas_zrotg");
+
#ifdef __cplusplus
try
#else
index c08248fa6b7bb3457d89d3fa723d7ceb3dd43018..c54445b49c073aeab180876b1bf32d1dc18e0f4f 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSCAL_IDX, "ocl_cblas_zscal");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSCAL_IDX, "ocl_cblas_zscal");
+
#ifdef __cplusplus
try
#else
index a966c27943faa13cd8cda00a678db1646a2ee556..d4855ae08da00f6abf1519179d23430905a8827c 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSWAP_IDX, "ocl_cblas_zswap");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSWAP_IDX, "ocl_cblas_zswap");
+
#ifdef __cplusplus
try
#else
index deda9fd7c9995cd58cb5b0f7f602228acdd4125c..cd9731d6cb66008a89a604a38f9e28c3189229bd 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYMM_IDX, "ocl_cblas_zsymm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYMM_IDX, "ocl_cblas_zsymm");
+
#ifdef __cplusplus
try
#else
@@ -203,22 +204,29 @@ void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -240,6 +248,7 @@ void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 62de2c1216ee1e8b84370c748851aa67baf099d7..5c83eedc82d6d7212f1fd816652debc8dce66dc8 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYR2K_IDX, "ocl_cblas_zsyr2k");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYR2K_IDX, "ocl_cblas_zsyr2k");
+
#ifdef __cplusplus
try
#else
@@ -203,22 +204,29 @@ void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
err |= clSetKernelArg(__K, 13, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(14, msmc_size);
#else
err |= clSetKernelArg(__K, 14, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(15, buf_DDR);
+ __K->setArg(16, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(15, buf_err);
+ __K->setArg(17, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 17, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -240,6 +248,7 @@ void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, cons
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 5271187201085378fb052d7f2ef8071ff0b31caa..49622bba4be258772826ffaff90d3e66818d9739 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYRK_IDX, "ocl_cblas_zsyrk");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZSYRK_IDX, "ocl_cblas_zsyrk");
+
#ifdef __cplusplus
try
#else
@@ -182,22 +183,29 @@ void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
err |= clSetKernelArg(__K, 11, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(12, msmc_size);
#else
err |= clSetKernelArg(__K, 12, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(13, buf_DDR);
+ __K->setArg(14, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(13, buf_err);
+ __K->setArg(15, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 13, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 15, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -219,6 +227,7 @@ void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 5b7dc34149d73f267e505e6b2a9bcf50e45b0282..f17892612f30f69f0a03cbc6555baf5727aa8989 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTBMV_IDX, "ocl_cblas_ztbmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTBMV_IDX, "ocl_cblas_ztbmv");
+
#ifdef __cplusplus
try
#else
index fd9ba055b3443d816665ce5f420ea50b4206b7f3..6202e8d8ed3c0fcbb7026b62638994eb4fb575a6 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTBSV_IDX, "ocl_cblas_ztbsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTBSV_IDX, "ocl_cblas_ztbsv");
+
#ifdef __cplusplus
try
#else
index 469a4996d0cea55b2c59c4dd90816391dc6254ce..9d26625f0c751cac87d6cd0e6f3f17a86f3da394 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTPMV_IDX, "ocl_cblas_ztpmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTPMV_IDX, "ocl_cblas_ztpmv");
+
#ifdef __cplusplus
try
#else
index 4def0efd93f10316fc3993786f16584f4c7f2cce..1c47aa7e9bdded2604a2a9611b77d46595d8c12a 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTPSV_IDX, "ocl_cblas_ztpsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTPSV_IDX, "ocl_cblas_ztpsv");
+
#ifdef __cplusplus
try
#else
index 803bc4bc43c0fd03d6af68b031d8cd64a74129bd..4953031e56496c0551003e67f2f225a8279dbcb7 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRMM_IDX, "ocl_cblas_ztrmm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRMM_IDX, "ocl_cblas_ztrmm");
+
#ifdef __cplusplus
try
#else
@@ -183,22 +184,29 @@ void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(13, msmc_size);
#else
err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(14, buf_DDR);
+ __K->setArg(15, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(14, buf_err);
+ __K->setArg(16, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -220,6 +228,7 @@ void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index a0d12f44c92e89a983365953a0bd51ad19e539b3..e3f466cdbb08267fe2c6998ffd7c440a0593cc71 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRMV_IDX, "ocl_cblas_ztrmv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRMV_IDX, "ocl_cblas_ztrmv");
+
#ifdef __cplusplus
try
#else
index 8a2411a5c917f88e3b78197918ffa6bebca23964..1be9ded6fa0b7171f9a4875c196cf1db10a1cb6b 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRSM_IDX, "ocl_cblas_ztrsm");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRSM_IDX, "ocl_cblas_ztrsm");
+
#ifdef __cplusplus
try
#else
@@ -183,22 +184,29 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
err |= clSetKernelArg(__K, 12, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
-
+
#ifdef __cplusplus
__K->setArg(13, msmc_size);
#else
err |= clSetKernelArg(__K, 13, sizeof(msmc_size), &msmc_size);
#endif
-
+
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg(14, buf_DDR);
+ __K->setArg(15, ddr_size);
+
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg(14, buf_err);
+ __K->setArg(16, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 14, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, 16, sizeof(buf_err), &buf_err);
TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
#endif
@@ -220,6 +228,7 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const
}
ti_cblas_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
ti_cblas_delete_kernel(__K);
index 02d2797cc74948d66524f55e0f86282d8a5137b9..558a8873098fa7924ce835ed274f00777f8285ae 100644 (file)
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRSV_IDX, "ocl_cblas_ztrsv");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_ZTRSV_IDX, "ocl_cblas_ztrsv");
+
#ifdef __cplusplus
try
#else
index f2dd549b161d11bab0d5df6b8cac9fcd3c724d7e..7dcdbc968822268fb21b4380dc46d889af62f508 100644 (file)
if(ti_cblas_init_done == 0)
return 0;
- //r_val = ti_blis_finalize();
+ r_val = ti_blis_finalize();
/*Using same name as ti_cblas_init critical region. See notes in bli_init*/
#pragma omp critical (ti_cblas_init_critical)
{
index 71cf70833aaf7c6516aa88481b58e47c16abafdc..36b6a4da1e4d1a63565c69096a8ccf320ee24249 100644 (file)
void cblas_cgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_cgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_cgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_cgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_cgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_cgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_cgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_cgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_cgemv_facade(order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
void cblas_chbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_chbmv_facade(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_chemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_chemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_chemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_chemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_chemv_facade(order, Uplo, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -84,12 +84,12 @@ kernel void ocl_cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_cher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda);
kernel void ocl_cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda)
{ cblas_cher2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_cher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_cherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_cher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const float beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_cher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
+void cblas_cherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const void *A, const int lda, const float beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_cherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_chpmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_chpmv_facade(order, Uplo, N, alpha, Ap, X, incX, beta, Y, incY); }
@@ -111,15 +111,15 @@ kernel void ocl_cblas_csscal(const int N, const float alpha, global void *X, con
void cblas_cswap_facade(const int N, global void *X, const int incX, global void *Y, const int incY);
kernel void ocl_cblas_cswap(const int N, global void *X, const int incX, global void *Y, const int incY)
{ cblas_cswap_facade(N, X, incX, Y, incY); }
-void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_csymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_csyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_csyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_csymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_csymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
+void cblas_csyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_csyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
+void cblas_csyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_csyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_ctbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ctbmv_facade(order, Uplo, TransA, Diag, N, K, A, lda, X, incX); }
@@ -132,15 +132,15 @@ kernel void ocl_cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_ctpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX);
kernel void ocl_cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX)
{ cblas_ctpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_ctrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
+void cblas_ctrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_ctrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_ctrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ctrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_ctrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
+void cblas_ctrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_ctrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_ctrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ctrsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
void cblas_dgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
{ cblas_dgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_dgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_dgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_dgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_dgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
{ cblas_dgemv_facade(order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
void cblas_dswap_facade(const int N, global double *X, const int incX, global double *Y, const int incY);
kernel void ocl_cblas_dswap(const int N, global double *X, const int incX, global double *Y, const int incY)
{ cblas_dswap_facade(N, X, incX, Y, incY); }
-void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_dsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_dsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_dsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_dsymv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY);
kernel void ocl_cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *A, const int lda, global const double *X, const int incX, const double beta, global double *Y, const int incY)
{ cblas_dsymv_facade(order, Uplo, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -216,12 +216,12 @@ kernel void ocl_cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_dsyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *X, const int incX, global const double *Y, const int incY, global double *A, const int lda);
kernel void ocl_cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const double alpha, global const double *X, const int incX, global const double *Y, const int incY, global double *A, const int lda)
{ cblas_dsyr2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_dsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_dsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_dsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, global const double *B, const int ldb, const double beta, global double *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_dsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
+void cblas_dsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const double *A, const int lda, const double beta, global double *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_dsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_dtbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const double *A, const int lda, global double *X, const int incX)
{ cblas_dtbmv_facade(order, Uplo, TransA, Diag, N, K, A, lda, X, incX); }
@@ -234,15 +234,15 @@ kernel void ocl_cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_dtpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *Ap, global double *X, const int incX);
kernel void ocl_cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *Ap, global double *X, const int incX)
{ cblas_dtpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_dtrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
+void cblas_dtrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_dtrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_dtrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX)
{ cblas_dtrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_dtrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
+void cblas_dtrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const double alpha, global const double *A, const int lda, global double *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_dtrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_dtrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX);
kernel void ocl_cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const double *A, const int lda, global double *X, const int incX)
{ cblas_dtrsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
void cblas_sgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_sgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
{ cblas_sgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_sgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_sgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_sgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_sgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_sgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
{ cblas_sgemv_facade(order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
void cblas_sswap_facade(const int N, global float *X, const int incX, global float *Y, const int incY);
kernel void ocl_cblas_sswap(const int N, global float *X, const int incX, global float *Y, const int incY)
{ cblas_sswap_facade(N, X, incX, Y, incY); }
-void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_ssymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_ssymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_ssymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_ssymv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY);
kernel void ocl_cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *A, const int lda, global const float *X, const int incX, const float beta, global float *Y, const int incY)
{ cblas_ssymv_facade(order, Uplo, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -342,12 +342,12 @@ kernel void ocl_cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_ssyr2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *A, const int lda);
kernel void ocl_cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const float alpha, global const float *X, const int incX, global const float *Y, const int incY, global float *A, const int lda)
{ cblas_ssyr2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_ssyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_ssyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_ssyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, global const float *B, const int ldb, const float beta, global float *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_ssyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
+void cblas_ssyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const float alpha, global const float *A, const int lda, const float beta, global float *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_ssyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_stbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const float *A, const int lda, global float *X, const int incX)
{ cblas_stbmv_facade(order, Uplo, TransA, Diag, N, K, A, lda, X, incX); }
@@ -360,15 +360,15 @@ kernel void ocl_cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_stpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *Ap, global float *X, const int incX);
kernel void ocl_cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *Ap, global float *X, const int incX)
{ cblas_stpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_strmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
+void cblas_strmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_strmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_strmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX)
{ cblas_strmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_strsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
+void cblas_strsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, const float alpha, global const float *A, const int lda, global float *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_strsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_strsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX);
kernel void ocl_cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const float *A, const int lda, global float *X, const int incX)
{ cblas_strsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
@@ -393,9 +393,9 @@ kernel void ocl_cblas_zdscal(const int N, const double alpha, global void *X, co
void cblas_zgbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zgbmv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const int KL, const int KU, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zgbmv_facade(order, TransA, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_zgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_zgemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_zgemm_facade(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_zgemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zgemv(const enum CBLAS_ORDER order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zgemv_facade(order, TransA, M, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -408,9 +408,9 @@ kernel void ocl_cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int
void cblas_zhbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zhbmv_facade(order, Uplo, N, K, alpha, A, lda, X, incX, beta, Y, incY); }
-void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_zhemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_zhemm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_zhemm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_zhemv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *A, const int lda, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zhemv_facade(order, Uplo, N, alpha, A, lda, X, incX, beta, Y, incY); }
@@ -420,12 +420,12 @@ kernel void ocl_cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO U
void cblas_zher2_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda);
kernel void ocl_cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *X, const int incX, global const void *Y, const int incY, global void *A, const int lda)
{ cblas_zher2_facade(order, Uplo, N, alpha, X, incX, Y, incY, A, lda); }
-void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_zher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_zherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_zher2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, const double beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_zher2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
+void cblas_zherk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, const double alpha, global const void *A, const int lda, const double beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_zherk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_zhpmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY);
kernel void ocl_cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, global const void *alpha, global const void *Ap, global const void *X, const int incX, global const void *beta, global void *Y, const int incY)
{ cblas_zhpmv_facade(order, Uplo, N, alpha, Ap, X, incX, beta, Y, incY); }
@@ -444,15 +444,15 @@ kernel void ocl_cblas_zscal(const int N, global const void *alpha, global void *
void cblas_zswap_facade(const int N, global void *X, const int incX, global void *Y, const int incY);
kernel void ocl_cblas_zswap(const int N, global void *X, const int incX, global void *Y, const int incY)
{ cblas_zswap_facade(N, X, incX, Y, incY); }
-void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_zsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_zsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
-void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_zsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, err_code); }
+void cblas_zsymm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const int M, const int N, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_zsymm_facade(Order, Side, Uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
+void cblas_zsyr2k_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *B, const int ldb, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_zsyr2k_facade(Order, Uplo, Trans, N, K, alpha, A, lda, B, ldb, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
+void cblas_zsyrk_facade(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE Trans, const int N, const int K, global const void *alpha, global const void *A, const int lda, global const void *beta, global void *C, const int ldc, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_zsyrk_facade(Order, Uplo, Trans, N, K, alpha, A, lda, beta, C, ldc, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_ztbmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, const int K, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ztbmv_facade(order, Uplo, TransA, Diag, N, K, A, lda, X, incX); }
@@ -465,15 +465,15 @@ kernel void ocl_cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO
void cblas_ztpsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX);
kernel void ocl_cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *Ap, global void *X, const int incX)
{ cblas_ztpsv_facade(order, Uplo, TransA, Diag, N, Ap, X, incX); }
-void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_ztrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
+void cblas_ztrmm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_ztrmm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_ztrmv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ztrmv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
-void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code);
-kernel void ocl_cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global double *l3_buf, size_t l3_buf_size, global int *err_code)
-{ cblas_ztrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, err_code); }
+void cblas_ztrsm_facade(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code);
+kernel void ocl_cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int M, const int N, global const void *alpha, global const void *A, const int lda, global void *B, const int ldb, global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code)
+{ cblas_ztrsm_facade(Order, Side, Uplo, TransA, Diag, M, N, alpha, A, lda, B, ldb, l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code); }
void cblas_ztrsv_facade(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX);
kernel void ocl_cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, const int N, global const void *A, const int lda, global void *X, const int incX)
{ cblas_ztrsv_facade(order, Uplo, TransA, Diag, N, A, lda, X, incX); }
index ce6daf1eac232ed34102073e9668dd1e63a221f2..7b9c5ab232f9f72cd4bcbf7f02ea28d067cee465 100644 (file)
#include "../../ticblas/ticblas.h"
#include <libarch.h>
-#define BLIS_L3_DDR_SIZE_ZERO (0)
-
extern void bli_init();
extern void bli_finalize();
-int bli_l3_mem_config(double *msmc_buf, size_t msmc_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig)
+#ifdef TI_CBLAS_DEBUG
+int malloc_size;
+extern lib_memdscr_t * blas_memdscr_tab[4];
+#endif
+
+int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_t ddr_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig)
{
size_t smem_size_vfast, smem_size_fast, smem_size_med, smem_size_slow;
void *l1d_SRAM_ptr, *l2_SRAM_ptr;
- int l1d_cfg_err, l2_cfg_err;
-
+ int l1d_cfg_err, l2_cfg_err, blas_ret_err_code;
+
+#ifdef TI_CBLAS_DEBUG
+ malloc_size = 0;
+ printf("Memory buffers passed to bli_l3_mem_config are: MSMC base 0x%x, size %d, DDR base 0x%x, size%d.\n", (unsigned int)msmc_buf, msmc_buf_size, (unsigned int) ddr_buf, ddr_buf_size);
+ printf("Before calling BLIS, malloc_size is %d.\n", malloc_size);
+#endif
+
/* First, verify the provided/available memory meet requirements */
tiCblasGetSizes(&smem_size_vfast, &smem_size_fast, &smem_size_med, &smem_size_slow);
if( (smem_size_vfast> lib_get_L1D_total_size()) /* total available L1D */
||(smem_size_fast > lib_get_L2_total_size()) /* total available L2 */
||(smem_size_med > msmc_buf_size) /* provided MSMC memory */
- ||(smem_size_slow > BLIS_L3_DDR_SIZE_ZERO) /* DDR not used */
+ ||(smem_size_slow > ddr_buf_size) /* provided DDR memory */
) {
return(TICBLAS_INIT_ERROR);
}
l1d_cfg_err = LIB_CACHE_SUCCESS;
#ifdef TI_CBLAS_DEBUG
- printf("Original L1D SRAM size is: %d\n", *l1D_SRAM_size_orig);
- printf("Required L1D SRAM size is: %d\n", smem_size_vfast);
+ printf("Original L1D SRAM size is: %d\n", *l1D_SRAM_size_orig);
+ printf("Required L1D SRAM size is: %d\n", smem_size_vfast);
#endif
if(*l1D_SRAM_size_orig < smem_size_vfast) { /* configure L1D if needs more SRAM */
#pragma omp parallel
{
int core_id = lib_get_coreID();
- printf("New L1D SRAM size from core %d is: %d\n", core_id, lib_get_L1D_SRAM_size());
}
-
- CSL_IDMA_chan1Wait();
- printf("IDMA1 source register: 0x%x, destination register: 0x%x, count register: 0x%x.\n", (unsigned int)hIdma->IDMA1_SOURCE, (unsigned int)hIdma->IDMA1_DEST, (unsigned int)hIdma->IDMA1_COUNT);
- printf("Configure IDMA1 to transfer 128 bytes from 0x820000 to 0xf00000.\n");
- hIdma->IDMA1_SOURCE = (uint32_t)0x820000;
- hIdma->IDMA1_DEST = (uint32_t)0xf00000;
- hIdma->IDMA1_COUNT = CSL_FMK(CGEM_IDMA1_COUNT_PRI, (uint32_t)7) |
- CSL_FMK(CGEM_IDMA1_COUNT_INT, (uint32_t)0) |
- CSL_FMK(CGEM_IDMA1_COUNT_FILL, (uint32_t)0) |
- CSL_FMK(CGEM_IDMA1_COUNT_COUNT, 128);
- printf("IDMA1 source register: 0x%x, destination register: 0x%x, count register: 0x%x.\n", (unsigned int)hIdma->IDMA1_SOURCE, (unsigned int)hIdma->IDMA1_DEST, (unsigned int)hIdma->IDMA1_COUNT);
#endif
/* Configure L2 if necessary */
@@ -120,16 +117,27 @@ int bli_l3_mem_config(double *msmc_buf, size_t msmc_buf_size, size_t *l1D_SRAM_s
l2_SRAM_ptr = lib_get_L2_SRAM_base();
#ifdef TI_CBLAS_DEBUG
- printf("L1D SRAM base address is 0x%x.\n", (unsigned int)l1d_SRAM_ptr);
- printf("L2 SRAM base address is 0x%x.\n", (unsigned int) l2_SRAM_ptr);
- printf("MSMC SRAM address is 0x%x.\n", (unsigned int) msmc_buf);
+ printf("L1D SRAM base address is 0x%x.\n", (unsigned int)l1d_SRAM_ptr);
+ printf("L2 SRAM base address is 0x%x.\n", (unsigned int) l2_SRAM_ptr);
+ printf("MSMC SRAM address is 0x%x.\n", (unsigned int) msmc_buf);
#endif
/* pass allocated memories for heap initialization */
- return(tiCblasInit(l1d_SRAM_ptr, smem_size_vfast,
- l2_SRAM_ptr, smem_size_fast,
- msmc_buf, msmc_buf_size,
- NULL, BLIS_L3_DDR_SIZE_ZERO));
+ blas_ret_err_code = tiCblasInit(l1d_SRAM_ptr, smem_size_vfast,
+ l2_SRAM_ptr, smem_size_fast,
+ msmc_buf, msmc_buf_size,
+ ddr_buf, ddr_buf_size);
+
+#ifdef TI_CBLAS_DEBUG
+ if(blas_ret_err_code == TICBLAS_SUCCESS) {
+ printf("Before calling BLIS, memory descriptor base is 0x%x, used is %d.\n", blas_memdscr_tab[3]->base, blas_memdscr_tab[3]->used);
+ }
+ else {
+ printf("BLAS init error.\n");
+ }
+#endif
+
+ return(blas_ret_err_code);
} /* bli_l3_mem_config */
/*==============================================================================
@@ -138,7 +146,12 @@ int bli_l3_mem_config(double *msmc_buf, size_t msmc_buf_size, size_t *l1D_SRAM_s
int bli_l3_mem_reconfig(size_t l1D_SRAM_size_orig, size_t l2_SRAM_size_orig)
{
int l1d_cfg_err, l2_cfg_err;
-
+
+#ifdef TI_CBLAS_DEBUG
+ printf("After calling BLIS, malloc_size is %d.\n", malloc_size);
+ printf("After calling BLIS, used_size in memory descriptor is %d.\n", blas_memdscr_tab[3]->used);
+#endif
+
/* configure L1D back if necessary */
l1d_cfg_err = LIB_CACHE_SUCCESS;
if(l1D_SRAM_size_orig!=lib_get_L1D_SRAM_size()) {
{
#ifdef TI_CBLAS_DEBUG
printf("In function ti_bli_init_dsp, l3_buff is 0x%x, l2_buf is 0x%x.\n", (unsigned int)l3_buf, (unsigned int)l2_buf);
-#endif
+ malloc_size = 0;
+ printf("Before calling bli_init, malloc_size is %d.\n", malloc_size);
+#endif
+
bli_init();
+
+#ifdef TI_CBLAS_DEBUG
+ printf("After calling bli_init, malloc_size is %d.\n", malloc_size);
+#endif
}
/* This function will be removed. Function tiCblasDelete() will be used instead. */
index 8121b8d7fef96d1ffa518bfcd5df40782cea3212..74c949da7e0e657d1a474c6cc23ad98146791128 100755 (executable)
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
# print "This is a level 3 function - " . $trampname . "\n";
-# $i_plus_1 = $i+1;
+ $i_plus_1 = $i+1;
$armcode .= "
void *msmc_ptr;
size_t msmc_size = MSMC_BUF_SIZE;
err |= clSetKernelArg(__K, $i, sizeof(buf_MSMC), &buf_MSMC);
TI_CBLAS_OCL_CHKERROR(\"clSetKernelArg\",err);
#endif
-";
- $i++;
- $i_plus_1 = $i+1;
- $armcode .= "
+
#ifdef __cplusplus
- __K->setArg($i, msmc_size);
+ __K->setArg($i_plus_1, msmc_size);
#else
- err |= clSetKernelArg(__K, $i, sizeof(msmc_size), &msmc_size);
+ err |= clSetKernelArg(__K, $i_plus_1, sizeof(msmc_size), &msmc_size);
#endif
-
+";
+ $i=$i+2;
+ $i_plus_1 = $i+1;
+ $armcode .= "
+ void *ddr_ptr;
+ size_t ddr_size = DDR_BUF_SIZE;
+ ddr_ptr = __malloc_ddr(DDR_BUF_SIZE);
+ Buffer buf_DDR(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, DDR_BUF_SIZE, ddr_ptr);
+ __K->setArg($i, buf_DDR);
+ __K->setArg($i_plus_1, ddr_size);
+";
+ $i=$i+2;
+ $armcode .= "
/* create a buffer argument to get the return error code from the DSP */
int err_code;
#ifdef __cplusplus
Buffer buf_err(*${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
- __K->setArg($i_plus_1, buf_err);
+ __K->setArg($i, buf_err);
#else
cl_mem buf_err = clCreateBuffer(*${namespace}_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code, &err);
${NAMESPACE}_OCL_CHKERROR(\"clCreateBuffer\",err);
- err |= clSetKernelArg(__K, $i_plus_1, sizeof(buf_err), &buf_err);
+ err |= clSetKernelArg(__K, $i, sizeof(buf_err), &buf_err);
${NAMESPACE}_OCL_CHKERROR(\"clSetKernelArg\",err);
#endif
";
- $i+=2;
+ $i++;
}
if ($tramptype !~ /^void$/i) {
}
${namespace}_mem_free(msmc_ptr);
+ __free_ddr(ddr_ptr);
";
}
$armcode .= "
else {
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
print "In generate_kernel_from_proto, this is a level 3 function - " . $trampname . "\n";
-# $oclcode .= ", global double *l3_buf, local double *l2_buf_loc";
-# $trampproto .= ", global double *l3_buf, local double *l2_buf_loc";
- $oclcode .= ", global double *l3_buf, size_t l3_buf_size, int *err_code";
- $trampproto .= ", global double *l3_buf, size_t l3_buf_size, int *err_code";
+ $oclcode .= ", global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code";
+ $trampproto .= ", global void *l3_buf, size_t l3_buf_size, global void *ddr_buf, size_t ddr_buf_size, global int *err_code";
}
}
}
}
- $oclcode .= "${comma}$tramptype *retval" unless ($tramptype =~ /^void$/i);
- $trampproto .= "${comma}$tramptype *retval" unless ($tramptype =~ /^void$/i);
+ $oclcode .= "${comma}global $tramptype *retval" unless ($tramptype =~ /^void$/i);
+ $trampproto .= "${comma}global $tramptype *retval" unless ($tramptype =~ /^void$/i);
$trampproto .= ");";
$oclcode .= ")\n{ ";
$oclcode .= "${comma}retval" unless ($tramptype =~ /^void$/i);
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
-# $oclcode .= ", l3_buf, l2_buf_loc";
- $oclcode .= ", l3_buf, l3_buf_size, err_code";
+ $oclcode .= ", l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, err_code";
}
$oclcode .= "); }";
$oclcode = $trampproto . "\n" . $oclcode;
#include "../../cblas/include/cblas.h"
#include "../../ticblas/ticblas.h"
-extern int bli_l3_mem_config(double *msmc_buf, size_t msmc_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig);
+#ifdef TI_CBLAS_DEBUG
+#include "stdio.h"
+
+extern char *pool_mk_mem_L1;
+extern char *pool_kn_mem_L1;
+extern char *pool_mn_mem_L1;
+extern char *pool_mk_mem_L2;
+extern char *pool_kn_mem_L2;
+extern char *pool_mn_mem_L2;
+extern char *pool_mk_mem_L3;
+extern char *pool_kn_mem_L3;
+extern char *pool_mn_mem_L3;
+#endif
+
+extern int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_t ddr_buf_size, size_t *l1D_SRAM_size_orig, size_t *l2_SRAM_size_orig);
extern int bli_l3_mem_reconfig(size_t l1D_SRAM_size_orig, size_t l2_SRAM_size_orig);
FACADE_PROLOGUE
$kernel_name = substr($trampname, 7);
if (index($blas_L3, '.'.$kernel_name.'|') != -1) {
-# $dspcode .= ", float *l3_buf, float *l2_buf_loc";
-# $trampproto .= ", float *l3_buf, float *l2_buf_loc" ;
- $dspcode .= ", double *l3_buf, size_t l3_buf_size, int *err_code";
- $trampproto .= ", double *l3_buf, size_t l3_buf_size, int *err_code";
+ $dspcode .= ", void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code";
+ $trampproto .= ", void *l3_buf, size_t l3_buf_size, void *ddr_buf, size_t ddr_buf_size, int *err_code";
}
$trampproto .= ");";
$dspcode .= "
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
- *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
+ *err_code = bli_l3_mem_config(l3_buf, l3_buf_size, ddr_buf, ddr_buf_size, &l1D_SRAM_size_orig, &l2_SRAM_size_orig);
if(*err_code != TICBLAS_SUCCESS) {
return;
}
index 9376360a73a8282442a8c3856cb5f512405ea3ce..51935210691c863d6ae239e832948cde33b899c9 100644 (file)
{
if( thread == NULL ) return;
//Assume that the ocomm and the icomm are freed by something else and don't need to be freed.
- bli_free(thread);
+ bli_free_scratch(thread);
}
packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id,
index 4bd441d20db906c287f6ee10168be5ac7be83890..e88970edcc21c30264fc98bfd34d338702464ed9 100644 (file)
packm_thrinfo_t* ipackm,
gemm_thrinfo_t* sub_gemm )
{
- gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc( sizeof( gemm_thrinfo_t ) );
+ gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc_scratch( sizeof( gemm_thrinfo_t ) );
bli_setup_gemm_thrinfo_node( thread, ocomm, ocomm_id,
icomm, icomm_id,
n_way, work_id,
bli_packm_thrinfo_free( thread->opackm );
bli_packm_thrinfo_free( thread->ipackm );
bli_gemm_thrinfo_free( thread->sub_gemm );
- bli_free( thread );
+ bli_free_scratch( thread );
return;
}
bli_gemm_thrinfo_free( threads[i] );
}
- bli_free( threads );
+ bli_free_scratch( threads );
}
gemm_thrinfo_t** bli_create_gemm_thrinfo_paths( )
dim_t ir_nt = 1;
- gemm_thrinfo_t** paths = (gemm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( gemm_thrinfo_t* ) );
+ gemm_thrinfo_t** paths = (gemm_thrinfo_t**) bli_malloc_scratch( global_num_threads * sizeof( gemm_thrinfo_t* ) );
thread_comm_t* global_comm = bli_create_communicator( global_num_threads );
for( int a = 0; a < jc_way; a++ )
index 8bec82440d26958c70dbb79de30e3fa77d7f5ab8..0fdfc2f40c25a6e016816ea4fd61630101db4040 100644 (file)
packm_thrinfo_t* ipackm,
herk_thrinfo_t* sub_herk )
{
- herk_thrinfo_t* thread = ( herk_thrinfo_t* ) bli_malloc( sizeof( herk_thrinfo_t ) );
+ herk_thrinfo_t* thread = ( herk_thrinfo_t* ) bli_malloc_scratch( sizeof( herk_thrinfo_t ) );
bli_setup_herk_thrinfo_node( thread, ocomm, ocomm_id,
icomm, icomm_id,
n_way, work_id,
bli_packm_thrinfo_free( thread->opackm );
bli_packm_thrinfo_free( thread->ipackm );
bli_herk_thrinfo_free( thread->sub_herk );
- bli_free( thread );
+ bli_free_scratch( thread );
return;
}
{
for( int i = 0; i < num; i++)
bli_herk_thrinfo_free( threads[i] );
- bli_free( threads );
+ bli_free_scratch( threads );
}
herk_thrinfo_t** bli_create_herk_thrinfo_paths( )
dim_t ir_nt = 1;
- herk_thrinfo_t** paths = (herk_thrinfo_t**) bli_malloc( global_num_threads * sizeof( herk_thrinfo_t* ) );
+ herk_thrinfo_t** paths = (herk_thrinfo_t**) bli_malloc_scratch( global_num_threads * sizeof( herk_thrinfo_t* ) );
thread_comm_t* global_comm = bli_create_communicator( global_num_threads );
for( int a = 0; a < jc_way; a++ )
index 3319b7700070617068ab622b9467690c88a549f7..d5b07ad152f378490c5a1862dc5f527677363a5c 100644 (file)
packm_thrinfo_t* ipackm,
trmm_thrinfo_t* sub_trmm )
{
- trmm_thrinfo_t* thread = ( trmm_thrinfo_t* ) bli_malloc( sizeof( trmm_thrinfo_t ) );
+ trmm_thrinfo_t* thread = ( trmm_thrinfo_t* ) bli_malloc_scratch( sizeof( trmm_thrinfo_t ) );
bli_setup_trmm_thrinfo_node( thread, ocomm, ocomm_id,
icomm, icomm_id,
n_way, work_id,
bli_packm_thrinfo_free( thread->opackm );
bli_packm_thrinfo_free( thread->ipackm );
bli_trmm_thrinfo_free( thread->sub_trmm );
- bli_free( thread );
+ bli_free_scratch( thread );
return;
}
{
for( int i = 0; i < num; i++)
bli_trmm_thrinfo_free( threads[i] );
- bli_free( threads );
+ bli_free_scratch( threads );
}
trmm_thrinfo_t** bli_create_trmm_thrinfo_paths( bool_t jc_dependency )
dim_t jr_nt = ir_way;
dim_t ir_nt = 1;
- trmm_thrinfo_t** paths = (trmm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( trmm_thrinfo_t* ) );
+ trmm_thrinfo_t** paths = (trmm_thrinfo_t**) bli_malloc_scratch( global_num_threads * sizeof( trmm_thrinfo_t* ) );
thread_comm_t* global_comm = bli_create_communicator( global_num_threads );
for( int a = 0; a < jc_way; a++ )
index dd138b3ff9bf8319058d7b028dbf8d5a566ca202..477d98f8d5285e57fa630d9fe6e5bccca419a033 100644 (file)
packm_thrinfo_t* ipackm,
trsm_thrinfo_t* sub_trsm )
{
- trsm_thrinfo_t* thread = ( trsm_thrinfo_t* ) bli_malloc( sizeof( trsm_thrinfo_t ) );
+ trsm_thrinfo_t* thread = ( trsm_thrinfo_t* ) bli_malloc_scratch( sizeof( trsm_thrinfo_t ) );
bli_setup_trsm_thrinfo_node( thread, ocomm, ocomm_id,
icomm, icomm_id,
n_way, work_id,
bli_packm_thrinfo_free( thread->opackm );
bli_packm_thrinfo_free( thread->ipackm );
bli_trsm_thrinfo_free( thread->sub_trsm );
- bli_free( thread );
+ bli_free_scratch( thread );
return;
}
{
for( int i = 0; i < num; i++)
bli_trsm_thrinfo_free( threads[i] );
- bli_free( threads );
+ bli_free_scratch( threads );
}
trsm_thrinfo_t** bli_create_trsm_thrinfo_paths( bool_t right_sided )
dim_t ir_nt = 1;
- trsm_thrinfo_t** paths = (trsm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( trsm_thrinfo_t* ) );
+ trsm_thrinfo_t** paths = (trsm_thrinfo_t**) bli_malloc_scratch( global_num_threads * sizeof( trsm_thrinfo_t* ) );
thread_comm_t* global_comm = bli_create_communicator( global_num_threads );
for( int a = 0; a < jc_way; a++ )
index 9e68045ea64b4caefbe1ada53a77f2f27938e603..c500c82c7a1ab8771a3fc7dc99854c3c7316f10a 100644 (file)
#include <CL/cl_ext.h>
#endif
+extern void * blasGetMemHandle();
+
+/* This function is used to allocate memory during BLIS initialization.
+ Allocated memory will be freed when bli_finalize() is called */
void* bli_malloc( siz_t size )
{
void* p = NULL;
return p;
}
+/* This function is used to allocate memory for kernel computation.
+ The allocated memory will be freed at the end of the computation.
+ For TI DSP implementation, LibArch scratch heap allocator will be
+ used to obtain a memory block from a scratch heap that is
+ initialized during BLIS initialization. */
+void* bli_malloc_scratch( siz_t size )
+{
+ void* p = NULL;
+#if !defined(BLIS_ENABLE_TI_ARM_OPENCL) && !defined(_WIN32) && !defined(BLIS_ENABLE_C66X_BUILD) && (BLIS_HEAP_ADDR_ALIGN_SIZE != 1)
+ int r_val;
+#endif
+
+ if ( size == 0 ) return NULL;
+
+#if defined(BLIS_ENABLE_TI_ARM_OPENCL)
+ _Pragma( "omp critical (bli_malloc_critical)" )
+ {
+ p = __malloc_ddr( ( size_t )size );
+ }
+#elif BLIS_HEAP_ADDR_ALIGN_SIZE == 1
+ p = malloc( ( size_t )size );
+#elif defined(_WIN32)
+ p = _aligned_malloc( ( size_t )size,
+ ( size_t )BLIS_HEAP_ADDR_ALIGN_SIZE );
+#elif defined (BLIS_ENABLE_C66X_BUILD)
+ /* Use LibArch slow scratch memory allocator */
+ p = lib_smem_salloc(blasGetMemHandle(), size, 1);
+#else
+ r_val = posix_memalign( &p,
+ ( size_t )BLIS_HEAP_ADDR_ALIGN_SIZE,
+ ( size_t )size );
+
+ if ( r_val != 0 ) bli_abort();
+#endif
+
+ if ( p == NULL ) bli_abort();
+
+ return p;
+}
+
#ifdef BLIS_ENABLE_C66X_BUILD
+/* This function is used to allocate memory for kernel computation
+ with required alignment. */
void* bli_memalign(siz_t alignment, siz_t size )
{
void* p = NULL;
return p;
}
+
+/* This function is used to allocate memory for kernel computation
+ with required alignment using LibArch scratch heap allocator.
+ A memory block will be obtained from a scratch heap that is
+ initialized during BLIS initialization. */
+void* bli_malloc_scratch_align(siz_t alignment, siz_t size )
+{
+ void* p = NULL;
+
+ p = lib_smem_salloc(blasGetMemHandle(), size, BLIS_CACHE_LINE_SIZE);
+
+ if ( p == NULL ) bli_abort();
+
+ return p;
+}
+
#endif
+/* This function is used to free the memory allocated by bli_malloc. */
void bli_free( void* p )
{
#if defined(BLIS_ENABLE_TI_ARM_OPENCL)
#endif
}
+/* This function is used to free the memory allocated by bli_malloc_scratch. */
+void bli_free_scratch( void* p )
+{
+#if defined(BLIS_ENABLE_TI_ARM_OPENCL)
+ _Pragma( "omp critical (bli_malloc_critical)" )
+ {
+ __free_ddr( p );
+ }
+#elif defined (BLIS_ENABLE_C66X_BUILD)
+ /* for DSP implementation, freeing scratch heap is not needed. */
+
+#elif BLIS_HEAP_ADDR_ALIGN_SIZE == 1 || !defined(_WIN32)
+ free( p );
+#else
+ _aligned_free( p );
+#endif
+}
index afc3e5d0bfe792f4fc8b813c6c14a76cedaedfc8..c9723d7ffbcbbd94080d9a99497c7e4d32c33d2d 100644 (file)
*/
void* bli_malloc( siz_t size );
+void* bli_malloc_scratch( siz_t size );
#ifdef BLIS_ENABLE_C66X_BUILD
void* bli_memalign(siz_t alignment, siz_t size );
+void* bli_malloc_scratch_align(siz_t alignment, siz_t size );
#endif
void bli_free( void* p );
+void bli_free_scratch( void* p );
index e3c4cb77fbae2d6cc57ffe507fa96d0a613a6328..bd753f808c3197dfc2e1e1624928023846632d22 100644 (file)
if( barrier->count == 0 )
{
bli_free_barrier_tree( barrier->dad );
- bli_free( barrier );
+ bli_free_scratch( barrier );
}
return;
}
barrier_t* bli_create_tree_barrier(int num_threads, int arity, barrier_t** leaves, int leaf_index)
{
- barrier_t* me = (barrier_t*) bli_malloc(sizeof(barrier_t));
+ barrier_t* me = (barrier_t*) bli_malloc_scratch(sizeof(barrier_t));
me->dad = NULL;
me->signal = 0;
{
bli_free_barrier_tree( communicator->barriers[i] );
}
- bli_free( communicator->barriers );
+ bli_free_scratch( communicator->barriers );
}
void bli_setup_communicator( thread_comm_t* communicator, dim_t n_threads)
{
if( communicator == NULL ) return;
communicator->sent_object = NULL;
communicator->n_threads = n_threads;
- communicator->barriers = ( barrier_t** ) bli_malloc( sizeof( barrier_t* ) * n_threads );
+ communicator->barriers = ( barrier_t** ) bli_malloc_scratch( sizeof( barrier_t* ) * n_threads );
bli_create_tree_barrier( n_threads, BLIS_TREE_BARRIER_ARITY, communicator->barriers, 0 );
}
{
if( communicator == NULL ) return;
bli_cleanup_communicator( communicator );
- bli_free( communicator );
+ bli_free_scratch( communicator );
}
thread_comm_t* bli_create_communicator( dim_t n_threads )
*/
siz_t communicator_size;
communicator_size = BLIS_CACHE_LINE_SIZE*(sizeof(thread_comm_t)/BLIS_CACHE_LINE_SIZE +1);
- comm = (thread_comm_t*) bli_memalign(BLIS_CACHE_LINE_SIZE, communicator_size );
+ comm = (thread_comm_t*) bli_malloc_scratch_align(BLIS_CACHE_LINE_SIZE, communicator_size );
#else
- thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) );
+ thread_comm_t* comm = (thread_comm_t*) bli_malloc_scratch( sizeof(thread_comm_t) );
#endif
bli_setup_communicator( comm, n_threads );
return comm;
@@ -258,7 +258,7 @@ thrinfo_t* bli_create_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_
dim_t n_way, dim_t work_id )
{
- thrinfo_t* thr = (thrinfo_t*) bli_malloc( sizeof(thrinfo_t) );
+ thrinfo_t* thr = (thrinfo_t*) bli_malloc_scratch( sizeof(thrinfo_t) );
bli_setup_thread_info( thr, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id );
return thr;
}
diff --git a/examples/matmpy/main.c b/examples/matmpy/main.c
index 3063ec8ddff6293b174674f06f20fc13c90b6d26..94f65587cd698d50b946519ac47181b7dcb62c1c 100644 (file)
--- a/examples/matmpy/main.c
+++ b/examples/matmpy/main.c
int t;
double checksum;
char *ti_cblas_offload_env;
- int numtests = 10;
+ int numtests = 1;
/* configuration */
m = k = n = 1000;
diff --git a/setup_env_devkit.sh b/setup_env_devkit.sh
index bce350dd7ffdb42b36b73587b331e9da5421defc..b4545831fb5869695c4865f90b7e52fabb5b9664 100644 (file)
--- a/setup_env_devkit.sh
+++ b/setup_env_devkit.sh
#!/bin/bash
-export TI_OCL_INSTALL_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/opencl"
-export CGTROOT="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
-export TI_OCL_CGT_INSTALL="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
-export PDK_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-pdk-tree"
-export FC_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-framework-components-tree"
-export XDAIS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-xdais-tree"
-export BIOS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-sysbios-tree"
-export OMP_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-omp-tree"
-export LIBARCH_DIR="/home/a0869574local/proclibs/libarch_intgit/libarch"
-export TARGET_ROOTDIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi"
+export TI_OCL_INSTALL_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/opencl"
+export CGTROOT="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
+export TI_OCL_CGT_INSTALL="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
export XDC_DIR=/home/a0869574local/ti-rtos-sdk-12-08/xdctools_3_31_02_38_core
+export BIOS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-sysbios-tree"
+export XDAIS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-xdais-tree"
+export FC_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-framework-components-tree"
+export PDK_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-pdk-tree"
+export OMP_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-omp-tree"
+export LIBARCH_DIR="/home/a0869574local/proclibs/libarch_intgit/libarch"
+export TARGET_ROOTDIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi"
-export PATH=/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x/bin:/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.06/linux-devkit/sysroots/x86_64-arago-linux/usr/bin:$PATH
+export PATH=/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x/bin:/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/bin:$PATH
diff --git a/ticblas/src/ticblas.c b/ticblas/src/ticblas.c
index 545b54ef5f07aae0d0e7125fc58f66944274bde3..554355768de32600de193f129d111a02ef048911 100644 (file)
--- a/ticblas/src/ticblas.c
+++ b/ticblas/src/ticblas.c
#define BLAS_MEM_SIZE_VFAST BLAS_LEVEL3_L1DSRAM_SIZE \r
#define BLAS_MEM_SIZE_FAST BLAS_LEVEL3_L2SRAM_SIZE\r
#define BLAS_MEM_SIZE_MEDIUM BLAS_LEVEL3_MSMC_SIZE\r
-#define BLAS_MEM_SIZE_SLOW (0)\r
+#define BLAS_MEM_SIZE_SLOW (4804)\r
\r
\r
/* Define memory descriptors for memory management */\r
if( ((mem_vfast_base == NULL) || (mem_vfast_size < BLAS_MEM_SIZE_VFAST) )\r
||((mem_fast_base == NULL) || (mem_fast_size < BLAS_MEM_SIZE_FAST) )\r
||((mem_medium_base == NULL) || (mem_medium_size < BLAS_MEM_SIZE_MEDIUM) )\r
-/* ||((mem_slow_base == NULL) || (mem_slow_size < BLAS_MEM_SIZE_SLOW) )*/\r
+ ||((mem_slow_base == NULL) || (mem_slow_size < BLAS_MEM_SIZE_SLOW) )\r
) {\r
return(TICBLAS_INIT_ERROR);\r
}\r
lib_smem_vinit(blas_mem_handle, mem_vfast_base, mem_vfast_size);\r
lib_smem_finit(blas_mem_handle, mem_fast_base, mem_fast_size);\r
lib_smem_minit(blas_mem_handle, mem_medium_base, mem_medium_size);\r
-/* lib_smem_sinit(blas_mem_handle, mem_slow_base, mem_slow_size); */\r
+ lib_smem_sinit(blas_mem_handle, mem_slow_base, mem_slow_size); \r
\r
pool_mk_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);\r
pool_kn_mem_L1 = lib_smem_valloc(blas_mem_handle, BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE);\r