summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: dfcbfcd)
raw | patch | inline | side by side (parent: dfcbfcd)
author | Jianzhong Xu <xuj@ti.com> | |
Mon, 21 Mar 2016 19:43:43 +0000 (15:43 -0400) | ||
committer | Jianzhong Xu <xuj@ti.com> | |
Mon, 21 Mar 2016 19:43:43 +0000 (15:43 -0400) |
15 files changed:
diff --git a/build/Makefile b/build/Makefile
index 225df3c269874cfe5d8e5bff086f4333009eef92..7358e2c69a085b9e6a26f65d94cad03de3825c3a 100644 (file)
--- a/build/Makefile
+++ b/build/Makefile
XDC=$(XDC_DIR)/xdc
XS=$(XDC_DIR)/xs
XDCPKG=$(XDC_DIR)/bin/xdcpkg
-
+
PACKAGE_NAME = linalg_1_2_0_0
all: package
@cp -R $(LINALG_BASE_DIR)/src/ti/linalg/lib exports/$(PACKAGE_NAME)/packages/ti/linalg
@cp $(LINALG_BASE_DIR)/src/ti/linalg/include/* exports/$(PACKAGE_NAME)/packages/ti/linalg
@cp -R $(LINALG_BASE_DIR)/src/ti/linalg/docs exports/$(PACKAGE_NAME)
- @ln -s doxygen/html/index.html exports/$(PACKAGE_NAME)/docs/linalg_user_guide.html
@cp -R $(LINALG_BASE_DIR)/$(EXAMPLES)/* exports/$(PACKAGE_NAME)/examples/
@echo "archive files to tar ball ..."
@cd exports; zip -q -r $(PACKAGE_NAME).zip $(PACKAGE_NAME)
+
+clean:
+ cd $(LINALG_BASE_DIR)/src/ti/linalg; \
+ make $(CLEAN_TARGET) MEM_MODEL=$(MEM_MODEL) TARGET=$(TARGET) LIBOS=$(LIBOS); cd -;\
+ rm -r exports/$(PACKAGE_NAME); rm exports/$(PACKAGE_NAME).zip
index 328d09ae36e9b9320fe2f3c10ae8aa1f1f7cae9e..7b2cfab5725afe8005e312a78520450c0265edeb 100644 (file)
XDCPLATFORM = ti.runtime.openmp.platforms.am57x
CL_OPTS += -DSOC_AM572x
else ifeq ($(TARGET),SOC_C6678)
-XDCPLATFORM = ti.runtime.openmp.platforms.evm6678
+#XDCPLATFORM = ti.runtime.openmp.platforms.evm6678
+XDCPLATFORM = ti.linalg.platforms.evm6678
CL_OPTS += -DSOC_C6678
else ifeq ($(TARGET),SOC_K2H)
XDCPLATFORM = ti.runtime.openmp.platforms.evmTCI6636K2H
CL_OPTS += -D$(TARGET) -DLIB_RTOS
# Include . to find config.bld
-XDCPATH = $(PDK_DIR)/packages;$(OMP_DIR)/packages;$(BIOS_DIR)/packages;$(IPC_DIR)/packages;$(FC_DIR)/packages;$(XDAIS_DIR)/packages;$(EDMA3_DIR)/packages;.
+XDCPATH = $(PDK_DIR)/packages;$(OMP_DIR)/packages;$(BIOS_DIR)/packages;$(IPC_DIR)/packages;$(FC_DIR)/packages;$(XDAIS_DIR)/packages;$(EDMA3_DIR)/packages;$(LINALG_DIR)/packages;.
XS = $(XDC_DIR)/xs
XDC = $(XDC_DIR)/xdc
index 9e8838ede1d707bc2c84c770e5f5d1df2197de99..c8f78c5b7a262f75bfc9a826ada71ede599d5852 100644 (file)
SECTIONS
{
+
.fclocalfar :
{
"edmamgr.ae66" (.fardata)
"rman.ae66" (.fardata)
"nullres.ae66" (.fardata)
"fcsettings.ae66" (.fardata)
- "edma3_lld_rm.ae66" (.fardata)
+/* "edma3_lld_rm.ae66" (.fardata)*/
"edmamgr.ae66" (.far)
"edma3Chan.ae66" (.far)
"rman.ae66" (.far)
"nullres.ae66" (.far)
"fcsettings.ae66" (.far)
- "edma3_lld_rm.ae66" (.far)
+/* "edma3_lld_rm.ae66" (.far)*/
} > L2SRAM
}
index e82f0aad77fd923d2782a2d7050719ea6d6d7cf6..4d3985b780db95fa44071781ff23a98d040c9a5d 100644 (file)
var sharedRegionId = 0;
// Size of the core local heap
- var localHeapSize = 0x8000;
+ var localHeapSize = 0x800;
// Size of the heap shared by all the cores
- var sharedHeapSize = 0x08000000;
+ var sharedHeapSize = 0x1D000000;
// Initialize a Shared Region & create a heap in the DDR3 memory region
var SharedRegion = xdc.useModule('ti.sdo.ipc.SharedRegion');
index 413b687eb2ad9f1a3be91d976b1bfcbd9116d67c..02c0ebd13d7ac1356427f937bbe51cec0de966a1 100644 (file)
/* use small memory model of BLAS */
#ifdef SOC_C6678
-#define BLAS_L2_BUF_SIZE (220*1024UL) /* 220KB SRAM is available in L2 for C6678 EVM */
+//#define BLAS_L2_BUF_SIZE (220*1024UL) /* 220KB SRAM is available in L2 for C6678 EVM */
+#define BLAS_L2_BUF_SIZE (256*1024UL) /* 256KB SRAM is available in L2 for C6678 EVM */
#define BLAS_MSMC_BUF_SIZE (2*1024*1024UL) /* reserve 2MB for BLAS */
#define BLAS_L3_DDR_SIZE (5120)
#else
index 32ada315d714de80a711244413a43cdc8ad4db6c..a7d5f057e384e09851d97e5004c52febfb459639 100644 (file)
A = (double *)malloc( m*k*sizeof( double ) );
B = (double *)malloc( k*n*sizeof( double ) );
C = (double *)malloc( m*n*sizeof( double ) );
+
+// printf("Matrix A, B, C address: 0x%x, 0x%x, 0x%x\n", (unsigned int)A, (unsigned int)B, (unsigned int)C);
C_copy = (double *)malloc( m*n*sizeof( double ) );
if (A == NULL || B == NULL || C == NULL || C_copy == NULL) {
printf( "\nERROR: Can't allocate memory for matrices. Aborting... \n\n");
diff --git a/readme.txt b/readme.txt
index 881ecc0280ddddab1e09e580b4a6fc894e18e7c0..0ad8f0fd2b5bdafc79ee96b576dee9afab8b9d25 100644 (file)
--- a/readme.txt
+++ b/readme.txt
1.--------- Set environment variables ---------
-Following environment vaialbes must be set in order to build LINALG (version numbers are used as examples):
+Following environment vaialbes must be set in order to build DSP-only LINALG (version numbers are used as examples):
export CGTROOT="<TI_CGT_INSTALLATION_ROOT>/cgt-c6x"
export PDK_DIR="<COMPONENTS_INSTALLATION_ROOT>/pdk_c667x_2_0_1"
export LIBARCH_DIR="<COMPONENTS_INSTALLATION_ROOT>/libarch_1_0_0_0"
export PATH=<TI_CGT_INSTALLATION_ROOT>/cgt-c6x/bin:$PATH
+Additional environment variables must be set to build ARM+DSP LINALG:
+export TI_OCL_INSTALL_DIR="<PSDK_INSTALLATION_ROOT>/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/opencl"
+export TI_OCL_CGT_INSTALL="<PSDK_INSTALLATION_ROOT>/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
+export TARGET_ROOTDIR="<PSDK_INSTALLATION_ROOT>/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi"
+
2.--------- Build LINALG source code and make it a RTSC package ---------
From the root folder of LINALG, the build command is:
1) For DSP-only: cd dsponly; make TARGET=<target_name> LIBOS=LIB_RTOS
where: <target_name> must be one of SOC_K2H, SOC_C6678, or SOC_AM572x
+4.--------- Build BLIS test suite for ARM+DSP ---------
+# BLIS ARM+DSP test suite Makefile links ARM+DSP lib and ARM BLIS lib from inside src folder
+# BLIS_ARM_LIB_DIR = ../install/arm/lib
+# BLIS_BLAS_ACC_LIB_DIR = ../../blasblisacc/lib
+# BLIS_LIB := $(BLIS_BLAS_ACC_LIB_DIR)/libcblas_armplusdsp.a $(BLIS_ARM_LIB_DIR)/libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lpthread
+# Use same environment variables as building the examles.
+cd src/ti/linalg
+make BLIStest
+
+5.--------- Build BLIS test suite for DSP-only ---------
+# BLIS DSP test suite shares same make process defined in examples/dsponly/common/Makefile.common. It searches include path and lib path
+# in RTSC packaging style: $(LINALG_DIR)/packages/ti/linalg. Therefore, to build BLIS test suite, LINALG needs to be installed first.
+# Use same environment variables as building the examles.
+cd src/ti/linalg
+make cleanDSPlibs MEM_MODEL=Small TARGET=SOC_C6678 LIBOS=LIB_RTOS
+cd -
+rm -r exports
+make -f build/Makefile MEM_MODEL=Small TARGET=SOC_C6678 LIBOS=LIB_RTOS
+rm -r ~/ti/linalg_1_2_0_0_rtos/*
+cp -r exports/linalg_1_2_0_0/* ~/ti/linalg_1_2_0_0_rtos
+cd src/ti/linalg/blis/testsuite/dsponly
+make TARGET=SOC_C6678
+
diff --git a/src/ti/linalg/blasblisacc/src/ti_cblas_mem_config.c b/src/ti/linalg/blasblisacc/src/ti_cblas_mem_config.c
index 31ae22fe11375fa7ab83435589d3d0ed408b0489..c0cb75b020770118f1d22cd6c966bc6c6f90fa15 100644 (file)
@@ -55,12 +55,21 @@ int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_
/* First, verify the provided/available memory meet requirements */
tiCblasGetSizes(&smem_size_vfast, &smem_size_fast, &smem_size_med, &smem_size_slow);
+
+#ifdef TI_CBLAS_DEBUG
+ printf("Very fast mem size is %d, fast mem size is %d, medium mem size is %d, slow mem size is %d.\n", smem_size_vfast, smem_size_fast, smem_size_med, smem_size_slow);
+ printf("Total L1D size is: %d\n", lib_get_L1D_total_size());
+ printf("Total L2 size is: %d\n", lib_get_L2_total_size());
+#endif
if( (smem_size_vfast> lib_get_L1D_total_size()) /* total available L1D */
||(smem_size_fast > lib_get_L2_total_size()) /* total available L2 */
||(smem_size_med > msmc_buf_size) /* provided MSMC memory */
||(smem_size_slow > ddr_buf_size) /* provided DDR memory */
) {
+#ifdef TI_CBLAS_DEBUG
+ printf("No enough memory!\n");
+#endif
return(TICBLAS_ERROR);
}
#pragma omp parallel
{
l1d_cfg_err = lib_L1D_config_SRAM(smem_size_vfast);
+#ifdef TI_CBLAS_DEBUG
+ if(l1d_cfg_err != LIB_CACHE_SUCCESS) {
+ printf("Error in configuring L1D on core %d!\n", lib_get_coreID());
+ }
+ else {
+ printf("On core %d, new L1D SRAM size is %d.\n", lib_get_coreID(), lib_get_L1D_SRAM_size());
+ }
+#endif
}
}
@@ -102,6 +119,14 @@ int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_
{
l2_cfg_err = lib_L2_config_SRAM(smem_size_fast);
}
+#ifdef TI_CBLAS_DEBUG
+ if(l2_cfg_err != LIB_CACHE_SUCCESS) {
+ printf("Error in configuring L2 on core %d!\n", lib_get_coreID());
+ }
+ else {
+ printf("On core %d, new L2 SRAM size is %d.\n", lib_get_coreID(), lib_get_L2_SRAM_size());
+ }
+#endif
}
if(l1d_cfg_err || l2_cfg_err) {
@@ -109,6 +134,7 @@ int bli_l3_mem_config(void *msmc_buf, size_t msmc_buf_size, void *ddr_buf, size_
}
#ifdef TI_CBLAS_DEBUG
+ printf("New L1D SRAM size is: %d\n", lib_get_L1D_SRAM_size());
printf("New L2 SRAM size is: %d\n", lib_get_L2_SRAM_size());
#endif
#ifdef TI_CBLAS_DEBUG
printf("After calling BLIS, malloc_size is %d.\n", malloc_size);
printf("After calling BLIS, used_size in memory descriptor is %d.\n", blas_memdscr_tab[3]->used);
+ printf("Configuring L1D SRAM and L2 SRAM back to %d and %d.\n", l1D_SRAM_size_orig, l2_SRAM_size_orig);
#endif
/* configure L1D back if necessary */
if(l2_cfg_err != LIB_CACHE_SUCCESS) {
return(-4);
}
+
+#ifdef TI_CBLAS_DEBUG
+ printf("Reconfigured L1D SRAM size is: %d\n", lib_get_L1D_SRAM_size());
+ printf("Reconfigured L2 SRAM size is: %d\n", lib_get_L2_SRAM_size());
+#endif
return(TICBLAS_SUCCESS);
} /* bli_l3_mem_reconfig */
diff --git a/src/ti/linalg/blis/config/c66x/bli_kernel.h b/src/ti/linalg/blis/config/c66x/bli_kernel.h
index 1a9c9100ed9eece66534d9245ccc3096e09f4992..fcc5ac36ab236d31e0460fd2662745a7d3db0ad2 100755 (executable)
#elif defined(MEM_MODEL_SMALL)
// use this when EDMA is disabled for A and B
+/*
#define BLIS_DEFAULT_MC_S 112
#define BLIS_DEFAULT_KC_S 428
#define BLIS_DEFAULT_NC_S 1224
#define BLIS_DEFAULT_3M_MC_Z 48
#define BLIS_DEFAULT_3M_KC_Z 178
#define BLIS_DEFAULT_3M_NC_Z 488
+*/
-/*
#define BLIS_DEFAULT_MC_S 144
-#define BLIS_DEFAULT_KC_S 428
+#define BLIS_DEFAULT_KC_S 400 //320 good // 240 good // 428 error
#define BLIS_DEFAULT_NC_S 1224
#define BLIS_DEFAULT_MC_D 140
#define BLIS_DEFAULT_3M_MC_Z 56
#define BLIS_DEFAULT_3M_KC_Z 178
#define BLIS_DEFAULT_3M_NC_Z 488
-*/
+
// use this when EDMA is enabled
/*
#define BLIS_DEFAULT_MC_S 104
diff --git a/src/ti/linalg/blis/testsuite/dsponly/Makefile b/src/ti/linalg/blis/testsuite/dsponly/Makefile
index 56f7d29b3ae2f9a7669bba82105d8fc95a9e2785..00f79e6f7a2b778be7226692538272ad3253fced 100644 (file)
CL_OPTS = -I$(BLIS_INC_PATH) -DCBLAS -DBLIS_TEST_DSP
-COMMON_FOLDER = ../../../examples/dsponly/common
+COMMON_FOLDER = ../../../../../../examples/dsponly/common
testfiles_obj = ticblas_config.obj fc_config_c6678.obj \
test_addm.obj test_dotxaxpyf.obj test_her2.obj test_scal2v.obj test_syr2k.obj \
diff --git a/src/ti/linalg/blis/testsuite/input.general b/src/ti/linalg/blis/testsuite/input.general
index 6a7608261008bd718b687acf364ffa12a720109e..27e3bc1a6c803352a540e274dd7077f46bd33c02 100644 (file)
sdcz # Datatype(s) to test:
# 's' = single real; 'c' = single complex;
# 'd' = double real; 'z' = double complex
-1000 # Problem size: first to test
-4000 # Problem size: maximum to test
-500 # Problem size: increment between experiments
+500 # Problem size: first to test
+2500 # Problem size: maximum to test
+500 # Problem size: increment between experiments
# Complex level-3 implementations
0 # 3mh ('1' = enable; '0' = disable)
0 # 3m ('1' = enable; '0' = disable)
index 65d991c2e6c0f09a6467eec5f536afdb22972b54..3c2bc5847cf2b3131703ecec6f59bcc79e5ad5ea 100644 (file)
# for a project that appears at the top of each page and should give viewer a
# quick idea about the purpose of the project. Keep the description short.
-PROJECT_BRIEF = "Linear Algebra Library"
+PROJECT_BRIEF = "Texas Instruments Dense Linear Algebra Library"
# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
# in the documentation. The maximum height of the logo should not exceed 55
index 638242e9717993b7148162c0e5a64871f189a53a..d68e3878c61e170be43ba63f7cc3eb4bfe106496 100644 (file)
/*! \mainpage LINALG User's Guide
*
* @section introduction Introduction
- * LINALG is TI's Linear Algebra Library, supporting two types of TI platforms:
+ * LINALG is TI's Dense Linear Algebra Library, supporting two types of TI platforms:
* - ARM+DSP platforms such as AM57xx and Keystone II devices
* - DSP-only platforms such as Keystone I devices
*
* - BLAS is based on BLIS (https://github.com/flame/blis) and available on both ARM+DSP and DSP-only plaforms.
* - LAPACK is based on CLAPACK 3.2.1 (http://www.netlib.org/clapack/) and available only on ARM+DSP platforms.
*
- * @section linalgapi LINALG API
+ * @section linalg_api LINALG API
* For ARM+DSP library, LINALG API is the standard CBLAS and CLAPACK API, accessible through ARM (host).
* For DSP-only library, LINALG API includes the standard CBLAS API as well as TI CBLAS API extension.
*
* - CLAPACK API: http://www.netlib.org/clapack/
* - @ref ti_cblas_api
*
+ * @section linalg_integration Integration
*/
diff --git a/src/ti/linalg/docs/linalg_user_guide.html b/src/ti/linalg/docs/linalg_user_guide.html
--- /dev/null
@@ -0,0 +1 @@
+doxygen/html/index.html
\ No newline at end of file
diff --git a/src/ti/linalg/platforms/evm6678/Platform.xdc b/src/ti/linalg/platforms/evm6678/Platform.xdc
index 3f1c643c5dd1804b58675e6b8e4a14ff111415cf..3d4d1369299f2be04a38fe787e9133d6671a9679 100644 (file)
len: 0x003E0000, access: "RWX"}],
["DDR3",
{name: "DDR3", base: 0x80000000,
- len: 0x20000000, access: "RWX"}],
+ len: 0x20000000, access: "RWX"}], /* C6678 EVM has only 512 MB DDR3 */
],
l2Mode:"128k",
l1PMode:"32k",