summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: c20ff90)
raw | patch | inline | side by side (parent: c20ff90)
author | Jianzhong Xu <xuj@ti.com> | |
Fri, 4 Mar 2016 15:30:03 +0000 (10:30 -0500) | ||
committer | Jianzhong Xu <xuj@ti.com> | |
Fri, 4 Mar 2016 15:30:03 +0000 (10:30 -0500) |
index e37943d880890816c9951cd823242474000e2a8a..290eb9f9d97babbf26fa4fd76ba5fe2fc54cb803 100755 (executable)
#define BLIS_GEMM_DMAB_CNTL gemm_dmab_cntl
#elif defined (MEM_MODEL_SMALL)
-#define BLIS_GEMM_DMAA_CNTL NULL
+#define BLIS_GEMM_DMAA_CNTL NULL // disabling EDMA
#define BLIS_GEMM_DMAB_CNTL NULL
#endif
// contiguous memory pools.
#define BLIS_NUM_MC_X_KC_BLOCKS_L3 0
+#if defined (MEM_MODEL_SMALL)
+#define BLIS_NUM_MC_X_KC_BLOCKS_L2 1 // no need of ping-pong buffer if EDMA is not used. for matrix A, DDR->L2
+#else
#define BLIS_NUM_MC_X_KC_BLOCKS_L2 2 //Each L2 ram is local to the DSP Just need one buffer per thread that is packed
+#endif
#define BLIS_NUM_MC_X_KC_BLOCKS_L1 0
#define BLIS_NUM_MR_X_KC_BLOCKS_L1 2 // To transfer A to L1 in a ping-poing manner
#define BLIS_NUM_MC_X_KC_BLOCKS 2*BLIS_MAX_NUM_THREADS + 1 //To test w/o DMA and L2, L3 memory, all memory must be in DDR3 now
+#if defined (MEM_MODEL_SMALL)
+#define BLIS_NUM_KC_X_NC_BLOCKS_L3 1 // no need of ping-pong buffer if EDMA is not used. for matrix B, DDR->L3
+#else
#define BLIS_NUM_KC_X_NC_BLOCKS_L3 2 // Each thread shares a B block, so do not need 8 buffers *BLIS_MAX_NUM_THREADS // One for the partitioned B1, and one for the packed B1
+#endif
#define BLIS_NUM_KC_X_NC_BLOCKS_L2 0
#define BLIS_NUM_KC_X_NC_BLOCKS_L1 0
#define BLIS_NUM_KC_X_NR_BLOCKS_L1 1
#define BLIS_NUM_MC_X_NC_BLOCKS_L3 0
#define BLIS_NUM_MC_X_NC_BLOCKS_L2 0
+// still using EDMA in bli_gemm_ker_var2 (DDR->L2 for C output buffer, L2->L1 for matrix A, MSMC->L1 for B)
#define BLIS_NUM_MC_X_NR_BLOCKS_L2 3 //Bringing C into the L2 memory. We need 3 buffers, one to read, one to compute and one to write.
#define BLIS_NUM_MC_X_NC_BLOCKS_L1 0
#define BLIS_NUM_MR_X_NR_BLOCKS_L1 0
index 65dcd61634258fdfbf4aec094877152737629ebe..1a9c9100ed9eece66534d9245ccc3096e09f4992 100755 (executable)
#elif defined(MEM_MODEL_SMALL)
+// use this when EDMA is disabled for A and B
+#define BLIS_DEFAULT_MC_S 112
+#define BLIS_DEFAULT_KC_S 428
+#define BLIS_DEFAULT_NC_S 1224
+
+#define BLIS_DEFAULT_MC_D 96
+#define BLIS_DEFAULT_KC_D 220
+#define BLIS_DEFAULT_NC_D 1184
+
+#define BLIS_DEFAULT_MC_C 88
+#define BLIS_DEFAULT_KC_C 260
+#define BLIS_DEFAULT_NC_C 1008
+
+#define BLIS_DEFAULT_MC_Z 64
+#define BLIS_DEFAULT_KC_Z 178
+#define BLIS_DEFAULT_NC_Z 736
+
+#define BLIS_DEFAULT_4M_MC_C 108
+#define BLIS_DEFAULT_4M_KC_C 220
+#define BLIS_DEFAULT_4M_NC_C 1184
+
+#define BLIS_DEFAULT_4M_MC_Z 64
+#define BLIS_DEFAULT_4M_KC_Z 178
+#define BLIS_DEFAULT_4M_NC_Z 736
+
+#define BLIS_DEFAULT_3M_MC_C 64
+#define BLIS_DEFAULT_3M_KC_C 220
+#define BLIS_DEFAULT_3M_NC_C 792
+
+#define BLIS_DEFAULT_3M_MC_Z 48
+#define BLIS_DEFAULT_3M_KC_Z 178
+#define BLIS_DEFAULT_3M_NC_Z 488
+
+/*
+#define BLIS_DEFAULT_MC_S 144
+#define BLIS_DEFAULT_KC_S 428
+#define BLIS_DEFAULT_NC_S 1224
+
+#define BLIS_DEFAULT_MC_D 140
+#define BLIS_DEFAULT_KC_D 220
+#define BLIS_DEFAULT_NC_D 1184
+
+#define BLIS_DEFAULT_MC_C 116
+#define BLIS_DEFAULT_KC_C 260
+#define BLIS_DEFAULT_NC_C 1008
+
+#define BLIS_DEFAULT_MC_Z 86
+#define BLIS_DEFAULT_KC_Z 178
+#define BLIS_DEFAULT_NC_Z 736
+
+#define BLIS_DEFAULT_4M_MC_C 140
+#define BLIS_DEFAULT_4M_KC_C 220
+#define BLIS_DEFAULT_4M_NC_C 1184
+#define BLIS_DEFAULT_4M_MC_Z 86
+#define BLIS_DEFAULT_4M_KC_Z 178
+#define BLIS_DEFAULT_4M_NC_Z 736
+
+#define BLIS_DEFAULT_3M_MC_C 88
+#define BLIS_DEFAULT_3M_KC_C 220
+#define BLIS_DEFAULT_3M_NC_C 792
+
+#define BLIS_DEFAULT_3M_MC_Z 56
+#define BLIS_DEFAULT_3M_KC_Z 178
+#define BLIS_DEFAULT_3M_NC_Z 488
+*/
+// use this when EDMA is enabled
+/*
#define BLIS_DEFAULT_MC_S 104
#define BLIS_DEFAULT_KC_S 196
#define BLIS_DEFAULT_NC_S 824
#define BLIS_DEFAULT_3M_MC_Z 36
#define BLIS_DEFAULT_3M_KC_Z 108
#define BLIS_DEFAULT_3M_NC_Z 196
-
+*/
#endif
// -- Register blocksizes --
-
+// same for different memory models (C66x architecture), need to redefine for C7x
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 8 //4 //
index 1a0a9d9d827e56ec501134d8df725ab0692d8a92..1f7fe1df02e71ef1c5c74574e2c22e59f0dff650 100644 (file)
// Initialize libblis.
//bli_init();
/* Configure memory and initialize TI CBLAS */
+#ifdef BLIS_TEST_DSP
prepare_for_ticblas();
-
+#endif
+
// Initialize some strings.
libblis_test_init_strings();
// Finalize libblis.
//bli_finalize();
/* Finalize TI CBLAS and reconfigure memory */
+#ifdef BLIS_TEST_DSP
cleanup_after_ticblas();
-
+#endif
+
// Return peacefully.
return 0;
}
index b18c9dabfb1d25a8f8419939e8cb28d91b30f4a8..2fdce29258924dc15533c5f88a272dc38ee5165b 100644 (file)
.SILENT:
-#
-# Check if required environment variables are defined
-#
-ifneq ($(MAKECMDGOALS),clean)
-
-# Platform file
-ifeq ($(TARGET),SOC_AM572x)
-export PDK_DIR=$(AM572_PDK_DIR)
-XDCPLATFORM = ti.runtime.openmp.platforms.am57x
-else ifeq ($(TARGET),SOC_C6678)
-export PDK_DIR=$(C6678_PDK_DIR)
-XDCPLATFORM = ti.runtime.openmp.platforms.evm6678
-else ifeq ($(TARGET),SOC_K2H)
-export PDK_DIR=$(C6636_PDK_DIR)
-XDCPLATFORM = ti.runtime.openmp.platforms.evmTCI6636K2H
-endif
-
-
-ifeq ($(TARGET),SOC_AM572x)
-PDK_DIR = AM572_PDK_DIR
-else ifeq ($(TARGET),SOC_K2H)
-PDK_DIR = C6636_PDK_DIR
-else ifeq ($(TARGET),SOC_C6678)
-PDK_DIR = C6678_PDK_DIR
-else
-$(call error, ERROR - TARGET NOT DEFINED. Must specify one of: TARGET=SOC_K2H,TARGET=SOC_C6678,TARGET=SOC_AM572X)
-endif
-
-# Path to C6000 compiler tools
-ifeq ($(CGTROOT),)
-$(call error,ERROR - CGTROOT NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(XDC_DIR),)
-$(call error,ERROR - XDC_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(XDAIS_DIR),)
-$(call error,ERROR - XDAIS_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(BIOS_DIR),)
-$(call error,ERROR - BIOS_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(IPC_DIR),)
-$(call error,ERROR - IPC_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(OMP_DIR),)
-$(call error,ERROR - OMP_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($($(PDK_DIR)),)
-$(call error,ERROR - $(PDK_DIR) NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(FC_DIR),)
-$(call error,ERROR - FC_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(EDMA3_DIR),)
-$(call error,ERROR - EDMA3_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(LIBARCH_DIR),)
-$(call error,ERROR - LIBARCH_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-ifeq ($(LINALG_DIR),)
-$(call error,ERROR - LINALG_DIR NOT DEFINED, PLEASE REFER TO README.txt)
-endif
-
-endif
-
MFS = $(wildcard */Makefile)
DIRS = $(patsubst %/Makefile,%,$(MFS))
index 49afda1448b23d233f333b388f24f5dc366736ee..e37a6e417c13983febc121db0edd642a5069afed 100644 (file)
# Makefile to build OpenMP applications
+#
+# Check if required environment variables are defined
+#
+ifneq ($(MAKECMDGOALS),clean)
+
+# Path to C6000 compiler tools
+ifeq ($(CGTROOT),)
+$(call error,ERROR - CGTROOT NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(XDC_DIR),)
+$(call error,ERROR - XDC_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(XDAIS_DIR),)
+$(call error,ERROR - XDAIS_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(BIOS_DIR),)
+$(call error,ERROR - BIOS_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(IPC_DIR),)
+$(call error,ERROR - IPC_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(OMP_DIR),)
+$(call error,ERROR - OMP_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(PDK_DIR),)
+$(call error,ERROR - PDK_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(FC_DIR),)
+$(call error,ERROR - FC_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(EDMA3_DIR),)
+$(call error,ERROR - EDMA3_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(LIBARCH_DIR),)
+$(call error,ERROR - LIBARCH_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+ifeq ($(LINALG_DIR),)
+$(call error,ERROR - LINALG_DIR NOT DEFINED, PLEASE REFER TO README.txt)
+endif
+
+endif
+
+
default: all
# Configuration file used (without the .cfg extension)
ifeq ($(TARGET),SOC_AM572x)
XDCPLATFORM = ti.runtime.openmp.platforms.am57x
+CL_OPTS += -DSOC_AM572x
else ifeq ($(TARGET),SOC_C6678)
XDCPLATFORM = ti.runtime.openmp.platforms.evm6678
+CL_OPTS += -DSOC_C6678
else ifeq ($(TARGET),SOC_K2H)
XDCPLATFORM = ti.runtime.openmp.platforms.evmTCI6636K2H
+CL_OPTS += -DSOC_K2H
else
$(call error, ERROR - TARGET NOT DEFINED. Must specify one of: TARGET=SOC_K2H,TARGET=SOC_C6678,TARGET=SOC_AM572X)
endif
-ifeq ($(MEM_MODEL),Large)
- CL_OPTS += -DMEM_MODEL_Large
-else ifeq ($(MEM_MODEL),Small)
- CL_OPTS += -DMEM_MODEL_Small
-else ifeq ($(MEM_MODEL),Medium)
- CL_OPTS += -DMEM_MODEL_Medium
-else
-$(call error, ERROR - MEM_MODEL NOT DEFINED. Must specify one of: MEM_MODEL=Large, MEM_MODEL=Medium, MEM_MODEL=Small)
-endif
-
ifeq ($(BUILD_TYPE),debug)
CL_OPTS += -g --optimize_with_debug=on
else
endif
+testfiles_obj += ticblas_config.obj fc_config_c6678.obj
+
CL_OPTS += -@ $(OPT_CMD) -mv6600 --omp -I $(OMP_DIR)/packages/ti/runtime/openmp
CL_OPTS += -I$(FC_DIR)/packages -I$(XDAIS_DIR)/packages -I$(EDMA3_DIR)/packages -I$(LIBARCH_DIR)/include -I$(LINALG_DIR)/include
CL_OPTS += -D$(TARGET) -DLIB_RTOS
index 65972053151ea337a669163538d42b9e1e2c29a1..e97b97807cf38b2ef87c230f10c48948eefd49e7 100644 (file)
#include <cblas.h>
/* use small memory model of BLAS */
-#ifdef MEM_MODEL_Small
-#define BLAS_L2_BUF_SIZE (183*1024UL)
-#define BLAS_MSMC_BUF_SIZE (2*1024*1024UL)
+#ifdef SOC_C6678
+#define BLAS_L2_BUF_SIZE (220*1024UL) /* 220KB SRAM is available in L2 for C6678 EVM */
+#define BLAS_MSMC_BUF_SIZE (2*1024*1024UL) /* reserve 2MB for BLAS */
#define BLAS_L3_DDR_SIZE (5120)
#else
-# if MEM_MODEL_Large
+# if SOC_K2H
# define BLAS_L2_BUF_SIZE (768*1024UL)
# define BLAS_MSMC_BUF_SIZE (4608*1024UL) /* 4.5MB */
# define BLAS_L3_DDR_SIZE (5120)
# else
-# error "MEM_MODEL undefined! Must be one of MEM_MODEL_Small or MEM_MODEL_Large"
+# error "Target undefined! Must be one of SOC_C6678 or SOC_K2H"
# endif
#endif
-//#define BLAS_L2_BUF_SIZE (384*1024UL) /* 384KB */
-//#define BLAS_MSMC_BUF_SIZE (4718592UL) /* 4.5MB */
-//#define BLAS_L3_DDR_SIZE (5120) /* 5KB */
-
size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+/* define MSMC memory for BLAS - can be shared with other libraries */
#pragma DATA_SECTION(blas_msmc_buf, ".blas_msmc")
#pragma DATA_ALIGN(blas_msmc_buf,32)
char blas_msmc_buf[BLAS_MSMC_BUF_SIZE];
+/* define L2 memory for BLAS - can be shared with other libraries */
#pragma DATA_SECTION(blas_l2_buf, ".blas_l2")
#pragma DATA_ALIGN(blas_l2_buf,32)
char blas_l2_buf[BLAS_L2_BUF_SIZE];
index 23ea2c0588aca4a7732dc9a50a5d766c13713eca..ae5cad4e08254480ee62fd897bc8942a202b29f3 100644 (file)
-# Default to RTSC mode
COMMON_FOLDER = ../common
-#testfiles = dgemm_test.c $(COMMON_FOLDER)/ticblas_config.c $(COMMON_FOLDER)/fc_config_c6678.c
-testfiles_obj = dgemm_test.obj ticblas_config.obj fc_config_c6678.obj
+testfiles_obj = dgemm_test.obj
outfile = dgemm_test.out
CL_OPTS =
diff --git a/setup_env_devkit.sh b/setup_env_k2h_ocl.sh
similarity index 62%
rename from setup_env_devkit.sh
rename to setup_env_k2h_ocl.sh
index b4545831fb5869695c4865f90b7e52fabb5b9664..ad14d06da4a167f4f4360ef3da275c5570dc3317 100644 (file)
rename from setup_env_devkit.sh
rename to setup_env_k2h_ocl.sh
index b4545831fb5869695c4865f90b7e52fabb5b9664..ad14d06da4a167f4f4360ef3da275c5570dc3317 100644 (file)
--- a/setup_env_devkit.sh
+++ b/setup_env_k2h_ocl.sh
#!/bin/bash
-export TI_OCL_INSTALL_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/opencl"
-export CGTROOT="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
-export TI_OCL_CGT_INSTALL="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
+export TI_OCL_INSTALL_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/opencl"
+export CGTROOT="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
+export TI_OCL_CGT_INSTALL="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
export XDC_DIR=/home/a0869574local/ti-rtos-sdk-12-08/xdctools_3_31_02_38_core
-export BIOS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-sysbios-tree"
-export XDAIS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-xdais-tree"
-export FC_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-framework-components-tree"
-export PDK_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-pdk-tree"
-export OMP_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-omp-tree"
+export BIOS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-sysbios-tree"
+export XDAIS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-xdais-tree"
+export FC_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-framework-components-tree"
+export PDK_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-pdk-tree"
+export OMP_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-omp-tree"
export LIBARCH_DIR="/home/a0869574local/proclibs/libarch_intgit/libarch"
-export TARGET_ROOTDIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi"
+export TARGET_ROOTDIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi"
-export PATH=/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x/bin:/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/bin:$PATH
+export PATH=/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x/bin:/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.03/linux-devkit/sysroots/x86_64-arago-linux/usr/bin:$PATH