summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: c18ad8e)
raw | patch | inline | side by side (parent: c18ad8e)
author | Jianzhong Xu <xuj@ti.com> | |
Wed, 23 Mar 2016 13:23:05 +0000 (09:23 -0400) | ||
committer | Jianzhong Xu <xuj@ti.com> | |
Wed, 23 Mar 2016 13:23:05 +0000 (09:23 -0400) |
diff --git a/readme.txt b/readme.txt
index 0ad8f0fd2b5bdafc79ee96b576dee9afab8b9d25..06f557770cac624d3982726e498f6455e207aa96 100644 (file)
--- a/readme.txt
+++ b/readme.txt
3) <os_name> must be one of LIB_OPENCL or LIB_RTOS
Examples:
-1) to build LINALG large memory model for K2H in OpenCL environment, type:
+1) to build LINALG Large memory model for K2H in OpenCL environment, type:
make -f build/Makefile MEM_MODEL=Large TARGET=SOC_K2H LIBOS=LIB_OPENCL
-2) to build LINALG small memory model for C6678 in RTOS environment, type:
-make -f build/Makefile MEM_MODEL=Small TARGET=SOC_C6678 LIBOS=LIB_RTOS
+2) to build LINALG Medium memory model for C6678 in RTOS environment, type:
+make -f build/Makefile MEM_MODEL=Medium TARGET=SOC_C6678 LIBOS=LIB_RTOS
3.--------- Build LINALG examples ---------
# in RTSC packaging style: $(LINALG_DIR)/packages/ti/linalg. Therefore, to build BLIS test suite, LINALG needs to be installed first.
# Use same environment variables as building the examles.
cd src/ti/linalg
-make cleanDSPlibs MEM_MODEL=Small TARGET=SOC_C6678 LIBOS=LIB_RTOS
+make cleanDSPlibs MEM_MODEL=Medium TARGET=SOC_C6678 LIBOS=LIB_RTOS
cd -
rm -r exports
-make -f build/Makefile MEM_MODEL=Small TARGET=SOC_C6678 LIBOS=LIB_RTOS
+make -f build/Makefile MEM_MODEL=Medium TARGET=SOC_C6678 LIBOS=LIB_RTOS
rm -r ~/ti/linalg_1_2_0_0_rtos/*
cp -r exports/linalg_1_2_0_0/* ~/ti/linalg_1_2_0_0_rtos
cd src/ti/linalg/blis/testsuite/dsponly
make TARGET=SOC_C6678
-
diff --git a/src/ti/linalg/Makefile b/src/ti/linalg/Makefile
index 33f7b32cf8700e2c8e569b606208e87fde92eff0..edf6007a4df28b27d55e2742dabe31b27c9e2f7c 100644 (file)
--- a/src/ti/linalg/Makefile
+++ b/src/ti/linalg/Makefile
BLIS_VERSION = $(shell cat $(LINALG_BLIS_DIR)/version)
CBLAS_HEADERS =$(LINALG_CBLAS_DIR)/include/cblas.h
-ifeq ($(LIBOS),LIB_RTOS)
-CBLAS_HEADERS +=$(LINALG_TICBLAS_DIR)/ticblas.h
-endif
CLAPACK_HEADERS =$(LINALG_CLAPACK_DIR)/INCLUDE/blaswrap.h
CLAPACK_HEADERS+=$(LINALG_CLAPACK_DIR)/INCLUDE/clapack.h
CLAPACK_HEADERS+=$(LINALG_CLAPACK_DIR)/INCLUDE/f2c.h
+ifneq (,$(findstring DSP,$(MAKECMDGOALS)))
+
+ifeq ($(LIBOS),LIB_RTOS)
+CBLAS_HEADERS +=$(LINALG_TICBLAS_DIR)/ticblas.h
+endif
+
ifeq ($(MEM_MODEL),Large)
BLIS_CFG = c66xLarge
else ifeq ($(MEM_MODEL),Medium)
$(call error, ERROR - MEM_MODEL NOT DEFINED. Must specify one of: MEM_MODEL=Large, MEM_MODEL=Medium, MEM_MODEL=Small)
endif
+endif
+
DSPlibs:
cd $(LINALG_CBLAS_DIR); make arch=C66 alllib; \
cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); make install; \
cleanDSPlibs:
rm -f lib/*
cd $(LINALG_CBLAS_DIR); make arch=C66 clean; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 clean; \
+ cd ../$(LINALG_BLIS_DIR); ./configure -p install/$(BLIS_CFG) c66x; make -j8 clean; rm -r install/$(BLIS_CFG); \
cd ../$(LINALG_TICBLAS_DIR)/src; make clean;
cleanARMlibs:
cd $(LINALG_CBLAS_DIR); make arch=ARM clean; \
- cd ../$(LINALG_BLIS_DIR); ./configure -p install/arm cortex-a15; make clean; \
+ cd ../$(LINALG_BLIS_DIR); ./configure -p install/arm cortex-a15; make clean; rm -r install/arm; \
cd ../$(LINALG_BLASACC_DIR); make clean; \
cd ../$(LINALG_BLIS_DIR)/testsuite; make clean; \
cd ../../$(LINALG_CLAPACK_DIR); make clean
diff --git a/src/ti/linalg/blis/config/c66x/bli_config.h b/src/ti/linalg/blis/config/c66x/bli_config.h
index 79340ee8474f29a8efa2217bbf6478042114fa0c..97c1c8e057f09592567af0d4c708320ddc7d3d2e 100755 (executable)
#ifdef BLIS_ENABLE_C66X_EDMA
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_GEMM_DMAA_CNTL gemm_dmaa_cntl
#define BLIS_GEMM_DMAB_CNTL gemm_dmab_cntl
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
#define BLIS_GEMM_DMAA_CNTL NULL // disabling EDMA
#define BLIS_GEMM_DMAB_CNTL NULL
#endif
#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MAX_NUM_THREADS 8
-#define BLIS_C66X_IC_NT BLIS_MAX_NUM_THREADS
+#define BLIS_C66X_IC_NT BLIS_MAX_NUM_THREADS
#elif defined (MEM_MODEL_SMALL)
-#define BLIS_MAX_NUM_THREADS 8
-#define BLIS_C66X_IC_NT 8
+#define BLIS_MAX_NUM_THREADS 2
+#define BLIS_C66X_IC_NT 2
#endif
-#define BLIS_C66X_JC_NT 1
-#define BLIS_C66X_JR_NT 1
-#define BLIS_C66X_IR_NT 1
-
-
-
+#define BLIS_C66X_JC_NT 1
+#define BLIS_C66X_JR_NT 1
+#define BLIS_C66X_IR_NT 1
// -- MEMORY ALLOCATION --------------------------------------------------------
// contiguous memory pools.
#define BLIS_NUM_MC_X_KC_BLOCKS_L3 0
-#if defined (MEM_MODEL_SMALL)
+#if defined (MEM_MODEL_MEDIUM) || defined (MEM_MODEL_SMALL)
#define BLIS_NUM_MC_X_KC_BLOCKS_L2 1 // no need of ping-pong buffer if EDMA is not used. for matrix A, DDR->L2
#else
#define BLIS_NUM_MC_X_KC_BLOCKS_L2 2 //Each L2 ram is local to the DSP Just need one buffer per thread that is packed
#endif
#define BLIS_NUM_MC_X_KC_BLOCKS_L1 0
-#define BLIS_NUM_MR_X_KC_BLOCKS_L1 2 // To transfer A to L1 in a ping-poing manner
+#define BLIS_NUM_MR_X_KC_BLOCKS_L1 2 // To transfer A to L1 in a ping-poing manner
#define BLIS_NUM_MC_X_KC_BLOCKS 2*BLIS_MAX_NUM_THREADS + 1 //To test w/o DMA and L2, L3 memory, all memory must be in DDR3 now
-#if defined (MEM_MODEL_SMALL)
+#if defined (MEM_MODEL_MEDIUM) || defined (MEM_MODEL_SMALL)
#define BLIS_NUM_KC_X_NC_BLOCKS_L3 1 // no need of ping-pong buffer if EDMA is not used. for matrix B, DDR->L3
#else
#define BLIS_NUM_KC_X_NC_BLOCKS_L3 2 // Each thread shares a B block, so do not need 8 buffers *BLIS_MAX_NUM_THREADS // One for the partitioned B1, and one for the packed B1
#endif
#define BLIS_NUM_KC_X_NC_BLOCKS_L2 0
#define BLIS_NUM_KC_X_NC_BLOCKS_L1 0
-#define BLIS_NUM_KC_X_NR_BLOCKS_L1 1
+#define BLIS_NUM_KC_X_NR_BLOCKS_L1 1
#define BLIS_NUM_KC_X_NC_BLOCKS 2*BLIS_MAX_NUM_THREADS //To test w/o DMA and L2, L3 memory, all memory must be in DDR3 now
#define BLIS_NUM_MC_X_NC_BLOCKS_L3 0
* The remaining available space in L1 is divided such that
* 2*BLIS_MRK_BLOCK_BUFFER_L1+1*BLIS_KNR_PANEL_BUFFER_L1+0BLIS_MRNR_BLOCK_BUFFER_L1_S = remaining available space.
*/
-#define BLIS_MRK_BLOCK_BUFFER_L1_S 128 //
-#define BLIS_MRK_BLOCK_BUFFER_L1_D 64 //
-#define BLIS_MRK_BLOCK_BUFFER_L1_C 64 //
-#define BLIS_MRK_BLOCK_BUFFER_L1_Z 32 //
-#define BLIS_KNR_PANEL_BUFFER_L1_S 256 //
-#define BLIS_KNR_PANEL_BUFFER_L1_D 128
-#define BLIS_KNR_PANEL_BUFFER_L1_C 128
-#define BLIS_KNR_PANEL_BUFFER_L1_Z 64
-#define BLIS_MRNR_BLOCK_BUFFER_L1_S 128 //
-#define BLIS_MRNR_BLOCK_BUFFER_L1_D 64 //
-#define BLIS_MRNR_BLOCK_BUFFER_L1_C 64 //
-#define BLIS_MRNR_BLOCK_BUFFER_L1_Z 32 //
+#define BLIS_MRK_BLOCK_BUFFER_L1_S 128 //
+#define BLIS_MRK_BLOCK_BUFFER_L1_D 64 //
+#define BLIS_MRK_BLOCK_BUFFER_L1_C 64 //
+#define BLIS_MRK_BLOCK_BUFFER_L1_Z 32 //
+#define BLIS_KNR_PANEL_BUFFER_L1_S 256 //
+#define BLIS_KNR_PANEL_BUFFER_L1_D 128
+#define BLIS_KNR_PANEL_BUFFER_L1_C 128
+#define BLIS_KNR_PANEL_BUFFER_L1_Z 64
+#define BLIS_MRNR_BLOCK_BUFFER_L1_S 128 //
+#define BLIS_MRNR_BLOCK_BUFFER_L1_D 64 //
+#define BLIS_MRNR_BLOCK_BUFFER_L1_C 64 //
+#define BLIS_MRNR_BLOCK_BUFFER_L1_Z 32 //
//
diff --git a/src/ti/linalg/blis/config/c66x/bli_kernel.h b/src/ti/linalg/blis/config/c66x/bli_kernel.h
index fcc5ac36ab236d31e0460fd2662745a7d3db0ad2..59993769e15d6108ab17721dce9ea482dd410ea6 100755 (executable)
#elif defined (MEM_MODEL_MEDIUM)
-#define BLIS_DEFAULT_MC_S 128
-#define BLIS_DEFAULT_KC_S 240
-#define BLIS_DEFAULT_NC_S 1288
-
-#define BLIS_DEFAULT_MC_D 68
-#define BLIS_DEFAULT_KC_D 240
-#define BLIS_DEFAULT_NC_D 844
-
-#define BLIS_DEFAULT_MC_C 68
-#define BLIS_DEFAULT_KC_C 240
-#define BLIS_DEFAULT_NC_C 844
-
-#define BLIS_DEFAULT_MC_Z 60
-#define BLIS_DEFAULT_KC_Z 136
-#define BLIS_DEFAULT_NC_Z 631
-
-#define BLIS_DEFAULT_4M_MC_C 68
-#define BLIS_DEFAULT_4M_KC_C 240
-#define BLIS_DEFAULT_4M_NC_C 844
-
-#define BLIS_DEFAULT_4M_MC_Z 60
-#define BLIS_DEFAULT_4M_KC_Z 136
-#define BLIS_DEFAULT_4M_NC_Z 628
-
-#define BLIS_DEFAULT_3M_MC_C 68
-#define BLIS_DEFAULT_3M_KC_C 160
-#define BLIS_DEFAULT_3M_NC_C 720
-
-#define BLIS_DEFAULT_3M_MC_Z 52
-#define BLIS_DEFAULT_3M_KC_Z 100
-#define BLIS_DEFAULT_3M_NC_Z 524
-
-
-#elif defined(MEM_MODEL_SMALL)
-// use this when EDMA is disabled for A and B
-/*
-#define BLIS_DEFAULT_MC_S 112
-#define BLIS_DEFAULT_KC_S 428
-#define BLIS_DEFAULT_NC_S 1224
-
-#define BLIS_DEFAULT_MC_D 96
-#define BLIS_DEFAULT_KC_D 220
-#define BLIS_DEFAULT_NC_D 1184
-
-#define BLIS_DEFAULT_MC_C 88
-#define BLIS_DEFAULT_KC_C 260
-#define BLIS_DEFAULT_NC_C 1008
-
-#define BLIS_DEFAULT_MC_Z 64
-#define BLIS_DEFAULT_KC_Z 178
-#define BLIS_DEFAULT_NC_Z 736
-
-#define BLIS_DEFAULT_4M_MC_C 108
-#define BLIS_DEFAULT_4M_KC_C 220
-#define BLIS_DEFAULT_4M_NC_C 1184
-
-#define BLIS_DEFAULT_4M_MC_Z 64
-#define BLIS_DEFAULT_4M_KC_Z 178
-#define BLIS_DEFAULT_4M_NC_Z 736
-
-#define BLIS_DEFAULT_3M_MC_C 64
-#define BLIS_DEFAULT_3M_KC_C 220
-#define BLIS_DEFAULT_3M_NC_C 792
-
-#define BLIS_DEFAULT_3M_MC_Z 48
-#define BLIS_DEFAULT_3M_KC_Z 178
-#define BLIS_DEFAULT_3M_NC_Z 488
-*/
-
#define BLIS_DEFAULT_MC_S 144
#define BLIS_DEFAULT_KC_S 400 //320 good // 240 good // 428 error
#define BLIS_DEFAULT_NC_S 1224
#define BLIS_DEFAULT_3M_KC_Z 178
#define BLIS_DEFAULT_3M_NC_Z 488
-// use this when EDMA is enabled
-/*
-#define BLIS_DEFAULT_MC_S 104
-#define BLIS_DEFAULT_KC_S 196
-#define BLIS_DEFAULT_NC_S 824
+#elif defined(MEM_MODEL_SMALL)
+
+#define BLIS_DEFAULT_MC_S 128
+#define BLIS_DEFAULT_KC_S 224
+#define BLIS_DEFAULT_NC_S 1024
-#define BLIS_DEFAULT_MC_D 64
+#define BLIS_DEFAULT_MC_D 80
#define BLIS_DEFAULT_KC_D 180
-#define BLIS_DEFAULT_NC_D 540
+#define BLIS_DEFAULT_NC_D 720
#define BLIS_DEFAULT_MC_C 64
#define BLIS_DEFAULT_KC_C 180
#define BLIS_DEFAULT_4M_MC_C 64
#define BLIS_DEFAULT_4M_KC_C 180
-#define BLIS_DEFAULT_4M_NC_C 540
+#define BLIS_DEFAULT_4M_NC_C 5404
#define BLIS_DEFAULT_4M_MC_Z 32
#define BLIS_DEFAULT_4M_KC_Z 145
#define BLIS_DEFAULT_3M_MC_Z 36
#define BLIS_DEFAULT_3M_KC_Z 108
#define BLIS_DEFAULT_3M_NC_Z 196
-*/
+
#endif
// -- Register blocksizes --
diff --git a/src/ti/linalg/blis/frame/include/bli_mem_pool_macro_defs.h b/src/ti/linalg/blis/frame/include/bli_mem_pool_macro_defs.h
index 4423b5c26a436c7e6d12da5af31b18632095f2db..0f8936a47b1c8cf74befe1c8ea4d59e318b13c33 100644 (file)
#ifdef BLIS_ENABLE_C66X_MEM_POOLS
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_MK_BLOCK_SIZE_S ( bli_max( BLIS_POOL_MC_S*(BLIS_POOL_MC_S + BLIS_POOL_KC_S), \
(BLIS_POOL_MC_S + BLIS_POOL_KC_S)*(BLIS_POOL_MC_S + BLIS_POOL_KC_S)/4 \
BLIS_SIZEOF_S \
)
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
//DMA is not used, and so we do not need to calculate the extra memory that needs to DMA'ed to rebuild symmetric matrices
#ifdef BLIS_ENABLE_C66X_MEM_POOLS
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_MK_BLOCK_SIZE_D ( bli_max( BLIS_POOL_MC_D*(BLIS_POOL_MC_D + BLIS_POOL_KC_D), \
(BLIS_POOL_MC_D + BLIS_POOL_KC_D)*(BLIS_POOL_MC_D + BLIS_POOL_KC_D)/4 \
BLIS_SIZEOF_D \
)
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MK_BLOCK_SIZE_D ( BLIS_POOL_MC_D * \
( BLIS_POOL_KC_D \
) * \
#ifdef BLIS_ENABLE_C66X_MEM_POOLS
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_MK_BLOCK_SIZE_C ( bli_max( BLIS_POOL_MC_C*(BLIS_POOL_MC_C + BLIS_POOL_KC_C), \
(BLIS_POOL_MC_C + BLIS_POOL_KC_C)*(BLIS_POOL_MC_C + BLIS_POOL_KC_C)/4 \
BLIS_SIZEOF_D \
)
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MK_BLOCK_SIZE_C ( BLIS_POOL_MC_C * \
( BLIS_POOL_KC_C \
#ifdef BLIS_ENABLE_C66X_MEM_POOLS
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_MK_BLOCK_SIZE_Z ( bli_max( BLIS_POOL_MC_Z * (BLIS_POOL_MC_Z + BLIS_POOL_KC_Z), \
(BLIS_POOL_MC_Z + BLIS_POOL_KC_Z)*(BLIS_POOL_MC_Z + BLIS_POOL_KC_Z)/4 \
BLIS_SIZEOF_Z \
)
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MK_BLOCK_SIZE_Z ( BLIS_POOL_MC_Z * \
( BLIS_POOL_KC_Z \
//
#ifdef BLIS_ENABLE_C66X_MEM_POOLS
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_MK_BLOCK_SIZE_4M_C ( bli_max( BLIS_POOL_4M_MC_C*(BLIS_POOL_4M_MC_C + BLIS_POOL_4M_KC_C), \
(BLIS_POOL_4M_MC_C + BLIS_POOL_4M_KC_C)*(BLIS_POOL_4M_MC_C + BLIS_POOL_4M_KC_C)/4 \
) * \
BLIS_SIZEOF_C \
)
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MK_BLOCK_SIZE_4M_C ( BLIS_POOL_4M_MC_C * \
( BLIS_POOL_4M_KC_C \
) * \
//
#ifdef BLIS_ENABLE_C66X_MEM_POOLS
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_MK_BLOCK_SIZE_4M_Z ( bli_max( BLIS_POOL_4M_MC_Z*(BLIS_POOL_4M_MC_Z + BLIS_POOL_4M_KC_Z), \
(BLIS_POOL_4M_MC_Z + BLIS_POOL_4M_KC_Z)*(BLIS_POOL_4M_MC_Z + BLIS_POOL_4M_KC_Z)/4 \
BLIS_SIZEOF_Z \
)
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MK_BLOCK_SIZE_4M_Z ( BLIS_POOL_4M_MC_Z * \
( BLIS_POOL_4M_KC_Z \
// NOTE: We scale by 3/2 because 3m requires 50% more space than 4m.
#ifdef BLIS_ENABLE_C66X_MEM_POOLS
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_MK_BLOCK_SIZE_3M_C ( bli_max( BLIS_POOL_3M_MC_C*(BLIS_POOL_3M_MC_C + BLIS_POOL_3M_KC_C), \
(BLIS_POOL_3M_MC_C + BLIS_POOL_3M_KC_C)*(BLIS_POOL_3M_MC_C + BLIS_POOL_3M_KC_C)/4 \
3 / 2 \
)
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MK_BLOCK_SIZE_3M_C ( BLIS_POOL_3M_MC_C * \
( BLIS_POOL_3M_KC_C \
#ifdef BLIS_ENABLE_C66X_MEM_POOLS
-#if defined(MEM_MODEL_LARGE) || defined (MEM_MODEL_MEDIUM)
+#if defined(MEM_MODEL_LARGE)
#define BLIS_MK_BLOCK_SIZE_3M_Z ( bli_max( BLIS_POOL_3M_MC_Z*(BLIS_POOL_3M_MC_Z + BLIS_POOL_3M_KC_Z), \
(BLIS_POOL_3M_MC_Z + BLIS_POOL_3M_KC_Z)*(BLIS_POOL_3M_MC_Z + BLIS_POOL_3M_KC_Z) / 4 \
3 / 2 \
)
-#elif defined (MEM_MODEL_SMALL)
+#elif defined (MEM_MODEL_SMALL) || defined (MEM_MODEL_MEDIUM)
#define BLIS_MK_BLOCK_SIZE_3M_Z ( BLIS_POOL_3M_MC_Z * \
( BLIS_POOL_3M_KC_Z \
) * \