summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: b990a50)
raw | patch | inline | side by side (parent: b990a50)
author | Jianzhong Xu <xuj@ti.com> | |
Thu, 7 Apr 2016 19:08:06 +0000 (15:08 -0400) | ||
committer | Jianzhong Xu <xuj@ti.com> | |
Thu, 7 Apr 2016 19:08:06 +0000 (15:08 -0400) |
13 files changed:
diff --git a/src/ti/linalg/Makefile b/src/ti/linalg/Makefile
index edf6007a4df28b27d55e2742dabe31b27c9e2f7c..b3af5bdee2a682408c565ad8a17232a54d21b33a 100644 (file)
--- a/src/ti/linalg/Makefile
+++ b/src/ti/linalg/Makefile
CBLAS_HEADERS +=$(LINALG_TICBLAS_DIR)/ticblas.h
endif
+ifeq ($(TARGET),SOC_K2H)
+NUM_ARM_CORES=4
+NUM_DSP_CORES=8
+else ifeq ($(TARGET),SOC_SHANNON)
+NUM_DSP_CORES=8
+else ifeq ($(TARGET),SOC_AM572x)
+NUM_ARM_CORES=2
+NUM_DSP_CORES=2
+else
+$(call error, ERROR - MEM_MODEL NOT DEFINED. Must specify one of: MEM_MODEL=Large, MEM_MODEL=Medium, MEM_MODEL=Small)
+endif
+
+endif
+
+# MEM_MODEL needs to be defined for both build and clean
ifeq ($(MEM_MODEL),Large)
BLIS_CFG = c66xLarge
else ifeq ($(MEM_MODEL),Medium)
$(call error, ERROR - MEM_MODEL NOT DEFINED. Must specify one of: MEM_MODEL=Large, MEM_MODEL=Medium, MEM_MODEL=Small)
endif
-endif
-
+# Build DSP code for DSP-only platforms and for ARM+DSP platforms.
+# All object binaries are added to a singl archive: lib/libcblas.ae66, which includes:
+# - CBLAS
+# - BLIS for specified TARGET, MEM_MODEL, LIBOS (BIOS or OPENCL), NUM_DSP_CORES
+# - TICBLAS for specified TARGET, MEM_MODEL, LIBOS (BIOS or OPENCL)
DSPlibs:
cd $(LINALG_CBLAS_DIR); make arch=C66 alllib; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); make install; \
+ cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; \
+ make -j8 MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS) NUM_DSP_CORES=$(NUM_DSP_CORES); make install; \
cd ../$(LINALG_TICBLAS_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); cd ../lib; \
echo "combining BLIS, CBLAS, and TICBLAS libraries into one: libcblas.ae66"; \
mkdir -p objs; cd objs; rm -f *; ar x ../../../blis/install/$(BLIS_CFG)/lib/libblis.ae66; mmv 'cblas*.o' 'blis_cblas#1.o'; \
mkdir -p lib; cd lib; rm -f *; ar -cr libcblas.ae66 ../ticblas/lib/objs/*; cd ..; \
mkdir -p include; rm -f include/*; cp $(CBLAS_HEADERS) include
+# Build ARM code for ARM+DSP platforms
ARMlibs:
cd $(LINALG_CBLAS_DIR); make arch=ARM alllib; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/arm cortex-a15; make -j8; make install; \
+ cd ../$(LINALG_BLIS_DIR); ./configure -p install/arm cortex-a15; make -j8 NUM_ARM_CORES=$(NUM_ARM_CORES); make install; \
cd ../$(LINALG_CLAPACK_DIR); make f2clib; make cblaswrap; cd SRC; make -j8; cd ..
+# Build host-callable libraries for ARM+DSP platforms.
+# BLAS has a single archive: libcblas_armplusdsp.a, which includes:
+# - CBLAS ARM code
+# - BLIS ARM code for specified NUM_ARM_CORES
+# - BLAS wrapper (accelerator) code
+# - DSP code built by DSPlibs
+# LAPACK has 3 archives:
+# - liblapack.a: CLAPACK ARM library
+# - libcblaswr.a: CBLAS wrapper library used by CLAPACK
+# - libf2c.a: F2C library used by CLAPACK
ARMplusDSP: DSPlibs ARMlibs
cd $(LINALG_BLASACC_DIR)/src; make MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET); cd ../..; \
cd lib; rm -f *; \
index 0566c4f99121ac8bd9d999e94e3bcb6db0f81e6a..5885abb81d0a1ec8c1b2d480dc6aaa008d42b7dd 100644 (file)
endif
CPP_DEBUG = -g
-CPP_FLAGS = -D_LITTLE_ENDIAN -D__ARMv7 -D$(TARGET) -I../../cblas/include -I../../blis/install/arm/include/blis/ -I$(TI_OCL_INSTALL_DIR)/include -fopenmp
+CPP_FLAGS = -D_LITTLE_ENDIAN -D__ARMv7 -DMEM_MODEL_${MEM_MODEL} -D$(TARGET) -I../../cblas/include -I../../blis/install/arm/include/blis/ -I$(TI_OCL_INSTALL_DIR)/include -fopenmp
CL6X_FLAGS = $(INCS) --openmp --use_g2 -D$(TARGET) -DLIB_OPENCL
CLOCL_FLAGS =
OBJCOPY_ARGS=
diff --git a/src/ti/linalg/blasblisacc/src/ti_cblas_acc.h b/src/ti/linalg/blasblisacc/src/ti_cblas_acc.h
index 70a040c7758529e14f487b8e89a092300dd61e3c..d2ba1ac48cc8d3243673b43f92d942eebe517115 100644 (file)
extern int TI_CBLAS_L2_OFFLOAD;
extern int TI_CBLAS_L3_OFFLOAD;
-// allocated MSMC and L2 buffer sizes;
-// be careful to allocate enough so memory overrun
-// does not happen in BLIS/BLAS calls
-//#define L2_BUF_SIZE 0x84000
-//L2 Cache
-//MK: Pool Size 550976, Num Blocks 2, Block size 275424
-//KN: Pool Size 128, Num Blocks 0, Block size 3071640
-//MN: Pool Size 128, Num Blocks 0, Block size 1400832
-//0x86940=551232 = 550976+128+128
-//#define L2_BUF_SIZE 0x86940
-//#define L2_BUF_SIZE 0x862A0
-//Added 3 MNR buffers for C. and increasing Kc for Z
-#define L2_BUF_SIZE 0xBFE00
-//#define L2_BUF_SIZE 0xBF980 // MR=NR=4 for S
-
-
-//L3 Cache
-//MK: Pool Size 128, Num Blocks 0, Block size 275424
-//KN: Pool Size 6143536, Num Blocks 2, Block size 3071640
-//MN: Pool Size 128, Num Blocks 0, Block size 1400832
-//0x5DBF30 = 6143792=6143536+128+128
-//ccs map file says 5dbf40
-//Changed KC values to fit in the 4.5M of MSMC
-//4647B0
-// 0x4664B4
-// Within 4.5M
-//#define MSMC_BUF_SIZE 0x4647C0
-#define MSMC_BUF_SIZE 0x47FDC0
-//#define MSMC_BUF_SIZE 0x47F100 // MR=NR=4 for S
+/* Allocated MSMC/L3 */
+#ifdef MEM_MODEL_Large
+# define MSMC_BUF_SIZE 0x47FDC0 /* 4.5MB */
+#else
+# ifdef MEM_MODEL_Medium
+# define MSMC_BUF_SIZE (5*512*1024UL) /* 2.5MB */
+# else
+# ifdef MEM_MODEL_Small
+# define MSMC_BUF_SIZE (1024UL*1024UL) /* 1MB */
+# endif
+# endif
+#endif
#define DDR_BUF_SIZE (16384)
-//DSPBLIS
-//#define MSMC_BUF_SIZE 0x400000
-
extern char ofld_tbl_sgemm[GEMM_OFFLOAD_TBL_SIZE];
extern char ofld_tbl_dgemm[GEMM_OFFLOAD_TBL_SIZE];
extern char ofld_tbl_cgemm[GEMM_OFFLOAD_TBL_SIZE];
diff --git a/src/ti/linalg/blasblisacc/src/ti_cblas_initfini.c b/src/ti/linalg/blasblisacc/src/ti_cblas_initfini.c
index 7dcdbc968822268fb21b4380dc46d889af62f508..48da1ea5abeb94f410f75bd19e66c4a1a86bf1ca 100644 (file)
cl_int err = CL_SUCCESS;
#endif
{
- void *msmc_ptr;
TI_CBLAS_DEBUG_PRINT("Initializing BLIS DSP\n");
- msmc_ptr = ti_cblas_mem_alloc(MSMC_BUF_SIZE);
-#ifdef __cplusplus
- Buffer buf_MSMC(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr);
- __K->setArg(0, buf_MSMC);
-
-#else
- cl_mem buf_MSMC = clCreateBuffer(ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, MSMC_BUF_SIZE, (void *)msmc_ptr, &err);
- TI_CBLAS_OCL_CHKERROR("clCreateBuffer",err);
- err |= clSetKernelArg(__K, 0, sizeof(buf_MSMC), &buf_MSMC);
- TI_CBLAS_OCL_CHKERROR("clSetKernelArg",err);
-#endif
-
-#ifdef __cplusplus
- __K->setArg(1, __local(L2_BUF_SIZE));
-#else
- err |= clSetKernelArg(__K, 1, L2_BUF_SIZE, NULL);
-#endif
#ifdef __cplusplus
ti_cblas_ocl_Q->enqueueTask(*__K, 0, &e);
TI_CBLAS_OCL_CHKERROR("clReleaseEvent",err);
#endif
- ti_cblas_mem_free(msmc_ptr);
ti_cblas_delete_kernel(__K);
TI_CBLAS_DEBUG_PRINT("BLIS DSP initialized\n");
diff --git a/src/ti/linalg/blasblisacc/src/ti_cblas_kernel.cl b/src/ti/linalg/blasblisacc/src/ti_cblas_kernel.cl
index 36b6a4da1e4d1a63565c69096a8ccf320ee24249..0a3736214862c210befc6c228a76f23c6a2cb283 100644 (file)
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
-void ti_bli_init_dsp(global char *l3_buf, local char *l2_buf);
-kernel void ocl_bli_init(global char *l3_buf, local char *l2_buf)
-{ ti_bli_init_dsp(l3_buf, l2_buf); }
+void ti_bli_init_dsp(void);
+kernel void ocl_bli_init(void)
+{ ti_bli_init_dsp(); }
void ti_bli_finalize_dsp(void);
kernel void ocl_bli_finalize(void)
{ ti_bli_finalize_dsp(); }
diff --git a/src/ti/linalg/blasblisacc/src/ti_cblas_mem_config.c b/src/ti/linalg/blasblisacc/src/ti_cblas_mem_config.c
index c0cb75b020770118f1d22cd6c966bc6c6f90fa15..ee89893d58d10f04f08abc49a38178c724430ddc 100644 (file)
/*==============================================================================
* This function initializes BLIS before first CBLAS call is made.
*============================================================================*/
-void ti_bli_init_dsp(char *l3_buf, char *l2_buf)
+void ti_bli_init_dsp(void)
{
-#ifdef TI_CBLAS_DEBUG
- printf("In function ti_bli_init_dsp, l3_buff is 0x%x, l2_buf is 0x%x.\n", (unsigned int)l3_buf, (unsigned int)l2_buf);
-
- malloc_size = 0;
- printf("Before calling bli_init, malloc_size is %d.\n", malloc_size);
-#endif
-
tiCblasNew();
-
-#ifdef TI_CBLAS_DEBUG
- printf("After calling bli_init, malloc_size is %d.\n", malloc_size);
-#endif
}
/*==============================================================================
diff --git a/src/ti/linalg/blis/config/c66x/bli_config.h b/src/ti/linalg/blis/config/c66x/bli_config.h
index 97c1c8e057f09592567af0d4c708320ddc7d3d2e..aea8021bf5b0f92183157a22dbb2c60c65ff4e33 100755 (executable)
*/
#define BLIS_ENABLE_MULTITHREADING
#define BLIS_ENABLE_OPENMP
-#define BLIS_MAX_NUM_THREADS 8
+/* BLIS_MAX_NUM_THREADS is defined in make_defs.mk
#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MAX_NUM_THREADS 8
-#define BLIS_C66X_IC_NT BLIS_MAX_NUM_THREADS
#elif defined (MEM_MODEL_SMALL)
#define BLIS_MAX_NUM_THREADS 2
-#define BLIS_C66X_IC_NT 2
#endif
+*/
+#define BLIS_C66X_IC_NT BLIS_MAX_NUM_THREADS
#define BLIS_C66X_JC_NT 1
#define BLIS_C66X_JR_NT 1
diff --git a/src/ti/linalg/blis/config/c66x/bli_kernel.h b/src/ti/linalg/blis/config/c66x/bli_kernel.h
index 96cdbf6364b128c8eb94471334726f91da77c407..1a637a87e7f3c7d23baaeb4cf758a2c120c038d0 100755 (executable)
#define BLIS_DEFAULT_4M_MC_C 64
#define BLIS_DEFAULT_4M_KC_C 180
-#define BLIS_DEFAULT_4M_NC_C 5404
+#define BLIS_DEFAULT_4M_NC_C 540
#define BLIS_DEFAULT_4M_MC_Z 32
#define BLIS_DEFAULT_4M_KC_Z 145
diff --git a/src/ti/linalg/blis/config/c66x/make_defs.mk b/src/ti/linalg/blis/config/c66x/make_defs.mk
index d7f2f285c2c0058dbad826e31ce3266518e9e61b..73d9f94a5f87ac59bce01b317c0dbeddac261295 100755 (executable)
BLIS_MEM_MODEL = MEM_MODEL_SMALL
endif
-CMISCFLAGS += -D$(BLIS_MEM_MODEL) -D$(TARGET) -D$(LIBOS)
+CMISCFLAGS += -D$(BLIS_MEM_MODEL) -D$(TARGET) -D$(LIBOS) -DBLIS_MAX_NUM_THREADS=${NUM_DSP_CORES}
CDBGFLAGS := -s -k -mw
CWARNFLAGS :=
diff --git a/src/ti/linalg/blis/config/cortex-a15/bli_config.h b/src/ti/linalg/blis/config/cortex-a15/bli_config.h
index fe9e500fba624dba4ce34f64d3e3eb1c7a2912f4..a22f87ac9f6ee844e65b0fc6b77ecc7a6d502094 100644 (file)
#define BLIS_ENABLE_MULTITHREADING
#define BLIS_ENABLE_OPENMP
-// The maximum number of BLIS threads that will run concurrently.
-#define BLIS_MAX_NUM_THREADS 4
-
+// The maximum number of BLIS threads that will run concurrently - defined in make_defs.mk
+//#define BLIS_MAX_NUM_THREADS 4
// -- MEMORY ALLOCATION --------------------------------------------------------
diff --git a/src/ti/linalg/blis/config/cortex-a15/make_defs.mk b/src/ti/linalg/blis/config/cortex-a15/make_defs.mk
index cc70709f27202c0923fc76ba1e85e29e890d0aa4..58130a01d24fc28a4e8af43e83c37321c2763331 100644 (file)
#CC := gcc
# Enable IEEE Standard 1003.1-2004 (POSIX.1d).
# NOTE: This is needed to enable posix_memalign().
-CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L
+CPPROCFLAGS := -D_POSIX_C_SOURCE=200112L -DBLIS_MAX_NUM_THREADS=${NUM_ARM_CORES}
CMISCFLAGS := -std=c99 -mfloat-abi=hard -mfpu=neon -fopenmp -I$(TARGET_ROOTDIR)/usr/include #-I$(TI_OCL_INSTALL_DIR)/include
CPICFLAGS := -fPIC
CDBGFLAGS := #-save-temps #-g
index 43f68c27d7d52cc0e5618c08434490378ff8236b..61dcf9de53db21d1b4a46c9cabda74690b1a608d 120000 (symlink)
-Makefile.C66
\ No newline at end of file
+Makefile.ARM
\ No newline at end of file
index 5136d752d7908a3a32d60c5ed877515dd86e68d5..6beabc2fb41c0c3389be4f72a2d7f4d9eac584c1 100644 (file)
\r
#define getNextMultiple(x, y) ( ( ((x)+(y)-1)/(y) )* (y) )\r
\r
-#if 0\r
-#ifdef MEM_MODEL_LARGE\r
-#define BLAS_LEVEL3_L1DSRAM_SIZE (28*1024UL)\r
-#define BLAS_LEVEL3_L2SRAM_SIZE (767*1024UL) /* 767KB */\r
-#define BLAS_LEVEL3_MSMC_SIZE (0x47FDC0) /* 4.5MB */\r
-#else\r
-# ifdef MEM_MODEL_MEDIUM\r
-# define BLAS_LEVEL3_L1DSRAM_SIZE (28*1024UL)\r
-# define BLAS_LEVEL3_L2SRAM_SIZE (384*1024UL) /* 384KB */\r
-# define BLAS_LEVEL3_MSMC_SIZE (0x380000) /* 3.5MB */\r
-# else\r
-# ifdef MEM_MODEL_SMALL\r
-# define BLAS_LEVEL3_L1DSRAM_SIZE (18*1024UL)\r
-# define BLAS_LEVEL3_L2SRAM_SIZE (183*1024UL) /* 187KB */\r
-# define BLAS_LEVEL3_MSMC_SIZE (1520*1024UL)/* 1.5MB */\r
-# else\r
-# error "Unsupported memory model"\r
-# endif\r
-# endif\r
-#endif\r
-#endif\r
-/*\r
-#define BLAS_MEM_SIZE_VFAST BLAS_LEVEL3_L1DSRAM_SIZE \r
-#define BLAS_MEM_SIZE_FAST BLAS_LEVEL3_L2SRAM_SIZE\r
-#define BLAS_MEM_SIZE_MEDIUM BLAS_LEVEL3_MSMC_SIZE\r
-*/\r
#define BLAS_MEM_SIZE_VFAST ( getNextMultiple(BLIS_MK_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE) \\r
+ getNextMultiple(BLIS_KN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE) \\r
+ getNextMultiple(BLIS_MN_POOL_SIZE_L1, BLIS_CACHE_LINE_SIZE) )\r