index f67e00a27037f6359161277bb5492e2b00a122b3..2f7865eecf9215983fac2111e695c37074930a53 100644 (file)
* THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
-#include "ti_cblas.h"
+#include "ti_cblas_acc.h"
+#include "../../ticblas/ticblas.h"
#ifdef __cplusplus
extern "C" {
@@ -49,27 +50,26 @@ void cblas_sswap(const int N, float *X, const int incX, float *Y, const int incY
* the offload of this routine to the DSP is disabled.
*/
#ifndef TI_CBLAS_SWAP_ENABLE_OFFLOAD
- TI_CBLAS_PROFILE_START();
+ TI_CBLAS_PROFILE_START();
TI_CBLAS_DEBUG_PRINT("Executing ARM %s\n", "cblas_sswap");
- __real_cblas_sswap(N,X,incX,Y,incY);
- TI_CBLAS_PROFILE_REPORT(" Entire %s call (ARM) took %8.2f us\n","cblas_sswap", (float) clock_diff);
+ __real_cblas_sswap(N,X,incX,Y,incY);
+ TI_CBLAS_PROFILE_REPORT(" Entire %s call (ARM) took %8.2f us\n","cblas_sswap", (float) clock_diff);
return ;
#else
- TI_CBLAS_PROFILE_START();
+ TI_CBLAS_PROFILE_START();
/* Dynamic condtional offload to ARM */
- if ((TI_CBLAS_L1_OFFLOAD == TI_CBLAS_OFFLOAD_NONE)) {
- TI_CBLAS_DEBUG_PRINT("Executing ARM %s\n", "cblas_sswap");
- __real_cblas_sswap(N,X,incX,Y,incY);
- TI_CBLAS_PROFILE_REPORT(" Entire %s call (ARM) took %8.2f us\n","cblas_sswap", (float) clock_diff);
- return ;
- }
- /* End ARM offload */
-
+ if ((TI_CBLAS_L1_OFFLOAD == TI_CBLAS_OFFLOAD_NONE)) {
+ TI_CBLAS_DEBUG_PRINT("Executing ARM %s\n", "cblas_sswap");
+ __real_cblas_sswap(N,X,incX,Y,incY);
+ TI_CBLAS_PROFILE_REPORT(" Entire %s call (ARM) took %8.2f us\n","cblas_sswap", (float) clock_diff);
+ return ;
+ }
+ /* End ARM offload */
/******************************************************************/
/* DSP offload WILL be done if control reaches here */
- TI_CBLAS_DEBUG_PRINT("Offloading to DSP %s\n", "cblas_sswap");
+ TI_CBLAS_DEBUG_PRINT("Offloading to DSP %s\n", "cblas_sswap");
/* Lookup kernel pointer from global table */
#ifdef __cplusplus
#else
cl_kernel __K;
#endif
- __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSWAP_IDX, "ocl_cblas_sswap");
+ __K = ti_cblas_get_kernel(TI_CBLAS_CBLAS_SSWAP_IDX, "ocl_cblas_sswap");
+
#ifdef __cplusplus
try
#else
#endif
{
-
#ifdef __cplusplus
__K->setArg(0, N);
#else
@@ -144,9 +144,10 @@ void cblas_sswap(const int N, float *X, const int incX, float *Y, const int incY
TI_CBLAS_OCL_CHKERROR("clWaitForEvents",err);
err |= clReleaseEvent(e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
-
#endif
+
ti_cblas_delete_kernel(__K);
+
TI_CBLAS_DEBUG_PRINT("Finished executing %s\n", "cblas_sswap");
TI_CBLAS_PROFILE_REPORT(" Entire %s call (DSP) took %8.2f us\n","cblas_sswap", (float) clock_diff);
return ;