summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 3d2538f)
raw | patch | inline | side by side (parent: 3d2538f)
author | Jianzhong Xu <xuj@ti.com> | |
Fri, 19 Feb 2016 19:45:08 +0000 (19:45 +0000) | ||
committer | Jianzhong Xu <xuj@ti.com> | |
Fri, 19 Feb 2016 19:45:08 +0000 (19:45 +0000) |
44 files changed:
diff --git a/examples/Makefile b/examples/arm+dsp/Makefile
similarity index 100%
rename from examples/dgemm_test/Makefile
rename to examples/arm+dsp/dgemm_test/Makefile
rename from examples/dgemm_test/Makefile
rename to examples/arm+dsp/dgemm_test/Makefile
similarity index 100%
rename from examples/dgemm_test/dgemm_test.c
rename to examples/arm+dsp/dgemm_test/dgemm_test.c
rename from examples/dgemm_test/dgemm_test.c
rename to examples/arm+dsp/dgemm_test/dgemm_test.c
similarity index 100%
rename from examples/dsyrk_test/Makefile
rename to examples/arm+dsp/dsyrk_test/Makefile
rename from examples/dsyrk_test/Makefile
rename to examples/arm+dsp/dsyrk_test/Makefile
similarity index 100%
rename from examples/dsyrk_test/dsyrk_test.c
rename to examples/arm+dsp/dsyrk_test/dsyrk_test.c
rename from examples/dsyrk_test/dsyrk_test.c
rename to examples/arm+dsp/dsyrk_test/dsyrk_test.c
similarity index 100%
rename from examples/gemm_bench/Makefile
rename to examples/arm+dsp/gemm_bench/Makefile
rename from examples/gemm_bench/Makefile
rename to examples/arm+dsp/gemm_bench/Makefile
similarity index 87%
rename from examples/gemm_bench/main.c
rename to examples/arm+dsp/gemm_bench/main.c
index 88358b1b6486c3085cfdf51bbae69d40c1d43433..a0afa97c4c7e925430eb20f550195c85b5fe001f 100644 (file)
rename from examples/gemm_bench/main.c
rename to examples/arm+dsp/gemm_bench/main.c
index 88358b1b6486c3085cfdf51bbae69d40c1d43433..a0afa97c4c7e925430eb20f550195c85b5fe001f 100644 (file)
int num_size, gemm_err;
int M, N, K, m, n, k, test_idx;
float time_secs, gflops, gflops_ref, cpu_freq_GHz;
- cl_platform_id platform;
- cl_uint num_platforms;
- cl_device_id devices;
- cl_uint num_devices;
+ cl_platform_id platform;
+ cl_uint num_platforms;
+ cl_device_id devices;
+ cl_uint num_devices;
cl_uint cpu_freq;
- size_t cpu_freq_size;
+ size_t cpu_freq_size;
FILE *fp_time, *fp_gflops;
- if(clGetPlatformIDs(1, &platform, &num_platforms) != CL_SUCCESS) {
- printf("Error in clGetPlatformIDs\n.");
- exit(0);
- }
+ if(clGetPlatformIDs(1, &platform, &num_platforms) != CL_SUCCESS) {
+ printf("Error in clGetPlatformIDs\n.");
+ exit(0);
+ }
- if(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &devices, &num_devices) != CL_SUCCESS) {
- printf("Error in clGetDeviceIDs\n.");
- exit(0);
- }
- if(clGetDeviceInfo(devices, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), (void *)&cpu_freq, &cpu_freq_size) != CL_SUCCESS) {
- printf("Error in clGetDeviceInfo\n.");
- exit(0);
- }
- cpu_freq_GHz = (float)cpu_freq/1e3; /* convert from MHz to GHz */
- printf("CPU frequency is %f GHz.\n", cpu_freq_GHz);
+ if(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &devices, &num_devices) != CL_SUCCESS) {
+ printf("Error in clGetDeviceIDs\n.");
+ exit(0);
+ }
+ if(clGetDeviceInfo(devices, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), (void *)&cpu_freq, &cpu_freq_size) != CL_SUCCESS) {
+ printf("Error in clGetDeviceInfo\n.");
+ exit(0);
+ }
+ cpu_freq_GHz = (float)cpu_freq/1e3; /* convert from MHz to GHz */
+ printf("CPU frequency is %f GHz.\n", cpu_freq_GHz);
srand(12345);
- /* setting up TI CBLAS during first call */
- run_dgemm(1000, 1000, 1000, &time_secs, &gflops);
+ /* setting up TI CBLAS during first call */
+ run_dgemm(1000, 1000, 1000, &time_secs, &gflops);
/*------- benchmarking DGEMM ------- */
fp_time = fopen("dgemm_time.dat","w");
fp_gflops = fopen("dgemm_gflops.dat","w");
- test_idx = 0;
+ test_idx = 0;
for (M=GEMM_MATRIX_SIZE_START,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2)
{
for (N=GEMM_MATRIX_SIZE_START,n=0; n<NUM_MATRIX_SIZE_TO_BENCHMARK; n++,N*=2)
if(gemm_err == -1) { /* out of memory for DSP offloading */
printf("DGEMM out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
- exit(0);
+ exit(0);
}
else {
fprintf(fp_time, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, time_secs);
fprintf(fp_gflops, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, gflops);
}
- gflops_ref = dgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
- gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
+ gflops_ref = dgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
+ gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
printf("Measured %f GFLOPS, reference %f GFLOPS.\n", gflops, gflops_ref);
- if((gflops > gflops_ref*GFLOPS_MARGIN) || (gflops < gflops_ref/GFLOPS_MARGIN)) {
- printf("DGEMM test FAILED! GFLOPS deviates from reference unacceptably.\n");
- exit(0);
- }
+ if((gflops > gflops_ref*GFLOPS_MARGIN) || (gflops < gflops_ref/GFLOPS_MARGIN)) {
+ printf("DGEMM test FAILED! GFLOPS deviates from reference unacceptably.\n");
+ exit(0);
+ }
if(gemm_err == -1) { /* out of memory for DSP offloading */
printf("Out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
- exit(0);
+ exit(0);
}
else {
fprintf(fp_time, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, time_secs);
fp_time = fopen("sgemm_time.dat","w");
fp_gflops = fopen("sgemm_gflops.dat","w");
- test_idx = 0;
+ test_idx = 0;
for (M=GEMM_MATRIX_SIZE_START,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2)
{
for (N=GEMM_MATRIX_SIZE_START,n=0; n<NUM_MATRIX_SIZE_TO_BENCHMARK; n++,N*=2)
if(gemm_err == -1) { /* out of memory for DSP offloading */
printf("SGEMM out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
- exit(0);
+ exit(0);
}
else {
fprintf(fp_time, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, time_secs);
fprintf(fp_gflops, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, gflops);
}
- gflops_ref = sgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
- gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
+ gflops_ref = sgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
+ gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
printf("Measured %f GFLOPS, reference %f GFLOPS.\n", gflops, gflops_ref);
- if((gflops > gflops_ref*GFLOPS_MARGIN) || (gflops < gflops_ref/GFLOPS_MARGIN)) {
- printf("SGEMM test FAILED! GFLOPS deviates from reference unacceptably.\n");
- exit(0);
- }
+ if((gflops > gflops_ref*GFLOPS_MARGIN) || (gflops < gflops_ref/GFLOPS_MARGIN)) {
+ printf("SGEMM test FAILED! GFLOPS deviates from reference unacceptably.\n");
+ exit(0);
+ }
if(gemm_err == -1) { /* out of memory for DSP offloading */
printf("Out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
- exit(0);
+ exit(0);
}
else {
fprintf(fp_time, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, time_secs);
fp_time = fopen("cgemm_time.dat","w");
fp_gflops = fopen("cgemm_gflops.dat","w");
- test_idx = 0;
+ test_idx = 0;
for (M=GEMM_MATRIX_SIZE_START,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2)
{
for (N=GEMM_MATRIX_SIZE_START,n=0; n<NUM_MATRIX_SIZE_TO_BENCHMARK; n++,N*=2)
if(gemm_err == -1) { /* out of memory for DSP offloading */
printf("CGEMM out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
- exit(0);
+ exit(0);
}
else {
fprintf(fp_time, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, time_secs);
fprintf(fp_gflops, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, gflops);
}
- gflops_ref = cgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
- gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
+ gflops_ref = cgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
+ gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
printf("Measured %f GFLOPS, reference %f GFLOPS.\n", gflops, gflops_ref);
- if((gflops > gflops_ref*GFLOPS_MARGIN) || (gflops < gflops_ref/GFLOPS_MARGIN)) {
- printf("CGEMM test FAILED! GFLOPS deviates from reference unacceptably.\n");
- exit(0);
- }
+ if((gflops > gflops_ref*GFLOPS_MARGIN) || (gflops < gflops_ref/GFLOPS_MARGIN)) {
+ printf("CGEMM test FAILED! GFLOPS deviates from reference unacceptably.\n");
+ exit(0);
+ }
}
}
}
fp_time = fopen("zgemm_time.dat","w");
fp_gflops = fopen("zgemm_gflops.dat","w");
- test_idx = 0;
+ test_idx = 0;
for (M=GEMM_MATRIX_SIZE_START,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2)
{
for (N=GEMM_MATRIX_SIZE_START,n=0; n<NUM_MATRIX_SIZE_TO_BENCHMARK; n++,N*=2)
gemm_err = run_zgemm(M, N, K, &time_secs, &gflops);
if(gemm_err == -1) { /* out of memory for DSP offloading */
printf("ZGEMM out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
- exit(0);
+ exit(0);
}
else {
fprintf(fp_time, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, time_secs);
fprintf(fp_gflops, "%6d\t%6d\t%6d\t%10.8e\n", M, N, K, gflops);
}
- gflops_ref = zgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
- gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
+ gflops_ref = zgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
+ gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
printf("Measured %f GFLOPS, reference %f GFLOPS.\n", gflops, gflops_ref);
- if((gflops > gflops_ref*GFLOPS_MARGIN) || (gflops < gflops_ref/GFLOPS_MARGIN)) {
- printf("ZGEMM test FAILED! GFLOPS deviates from reference unacceptably.\n");
- exit(0);
- }
+ if((gflops > gflops_ref*GFLOPS_MARGIN) || (gflops < gflops_ref/GFLOPS_MARGIN)) {
+ printf("ZGEMM test FAILED! GFLOPS deviates from reference unacceptably.\n");
+ exit(0);
+ }
}
}
}
fclose(fp_time);
fclose(fp_gflops);
- printf("Passed.\n");
+ printf("Passed.\n");
return 0;
}
return (-1);
}
- total_time = 0.0;
+ total_time = 0.0;
for (iter = 0; iter < NUM_TEST_RUN; iter++)
{
/*----------------------------------------------------------------------
return (-1);
}
- total_time = 0.0;
+ total_time = 0.0;
for (iter = 0; iter < NUM_TEST_RUN; iter++)
{
/*----------------------------------------------------------------------
similarity index 100%
rename from examples/ludinv/Makefile
rename to examples/arm+dsp/ludinv/Makefile
rename from examples/ludinv/Makefile
rename to examples/arm+dsp/ludinv/Makefile
similarity index 100%
rename from examples/ludinv/dlaran.c
rename to examples/arm+dsp/ludinv/dlaran.c
rename from examples/ludinv/dlaran.c
rename to examples/arm+dsp/ludinv/dlaran.c
similarity index 100%
rename from examples/ludinv/dlarnd.c
rename to examples/arm+dsp/ludinv/dlarnd.c
rename from examples/ludinv/dlarnd.c
rename to examples/arm+dsp/ludinv/dlarnd.c
similarity index 100%
rename from examples/ludinv/dlatm1.c
rename to examples/arm+dsp/ludinv/dlatm1.c
rename from examples/ludinv/dlatm1.c
rename to examples/arm+dsp/ludinv/dlatm1.c
similarity index 100%
rename from examples/ludinv/dlatm2.c
rename to examples/arm+dsp/ludinv/dlatm2.c
rename from examples/ludinv/dlatm2.c
rename to examples/arm+dsp/ludinv/dlatm2.c
similarity index 100%
rename from examples/ludinv/dlatm3.c
rename to examples/arm+dsp/ludinv/dlatm3.c
rename from examples/ludinv/dlatm3.c
rename to examples/arm+dsp/ludinv/dlatm3.c
similarity index 100%
rename from examples/ludinv/dlatmr.c
rename to examples/arm+dsp/ludinv/dlatmr.c
rename from examples/ludinv/dlatmr.c
rename to examples/arm+dsp/ludinv/dlatmr.c
diff --git a/examples/make.inc b/examples/arm+dsp/make.inc
similarity index 91%
rename from examples/make.inc
rename to examples/arm+dsp/make.inc
index e4d7634569b4684b21f9a16fea082026813fdffd..de310ba794731e515d90a2810b6322321eafb2a9 100644 (file)
rename from examples/make.inc
rename to examples/arm+dsp/make.inc
index e4d7634569b4684b21f9a16fea082026813fdffd..de310ba794731e515d90a2810b6322321eafb2a9 100644 (file)
--- a/examples/make.inc
LD_FLAGS=-L$(TARGET_ROOTDIR)/lib -L$(TARGET_ROOTDIR)/usr/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/usr/lib
#BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread
BLASLIB = $(LIB_DIR)libcblas_armplusdsp.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread
-LAPACKLIB = $(LIB_DIR)libcblaswr.a $(LIB_DIR)liblapack.a $(LIB_DIR)libf2c.a $(LIB_DIR)libcblas_armplusdsp.a $(LIB_DIR)libblis.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread
+LAPACKLIB = $(LIB_DIR)libcblaswr.a $(LIB_DIR)liblapack.a $(LIB_DIR)libf2c.a $(LIB_DIR)libcblas_armplusdsp.a -lOpenCL -locl_util -lstdc++ -lrt -lm -lgomp -lpthread
%.o: %.c
$(CC) -c $(CFLAGS) $<
similarity index 100%
rename from examples/matmpy/Makefile
rename to examples/arm+dsp/matmpy/Makefile
rename from examples/matmpy/Makefile
rename to examples/arm+dsp/matmpy/Makefile
diff --git a/examples/arm+dsp/run_tests_evm.sh b/examples/arm+dsp/run_tests_evm.sh
--- /dev/null
@@ -0,0 +1,10 @@
+./matmpy/matmpy > testlog.txt
+./dsyrk_test/dsyrk_test >> testlog.txt
+./ztrsm_test/ztrsm_test >> testlog.txt
+./dgemm_test/dgemm_test >> testlog.txt
+./eig/eig >> testlog.txt
+./ludinv/ludinv >> testlog.txt
+./ztrmm_test/ztrmm_test >> testlog.txt
+
+grep FAIL testlog.txt && echo "tests failed" || echo "All tests have passed."
+
similarity index 100%
rename from examples/ztrmm_test/Makefile
rename to examples/arm+dsp/ztrmm_test/Makefile
rename from examples/ztrmm_test/Makefile
rename to examples/arm+dsp/ztrmm_test/Makefile
similarity index 100%
rename from examples/ztrmm_test/ztrmm_test.c
rename to examples/arm+dsp/ztrmm_test/ztrmm_test.c
rename from examples/ztrmm_test/ztrmm_test.c
rename to examples/arm+dsp/ztrmm_test/ztrmm_test.c
similarity index 100%
rename from examples/ztrsm_test/Makefile
rename to examples/arm+dsp/ztrsm_test/Makefile
rename from examples/ztrsm_test/Makefile
rename to examples/arm+dsp/ztrsm_test/Makefile
similarity index 100%
rename from examples/ztrsm_test/ztrsm_test.c
rename to examples/arm+dsp/ztrsm_test/ztrsm_test.c
rename from examples/ztrsm_test/ztrsm_test.c
rename to examples/arm+dsp/ztrsm_test/ztrsm_test.c
similarity index 53%
rename from examples/dsponly/dgemm_test/Makefile.libomp
rename to examples/dsponly/common/Makefile.common
index 683d3b8a3df340d781459bf40de2c946cc0343d7..0e76de80b2a731f07ff7145a2586d8a626300f92 100644 (file)
rename from examples/dsponly/dgemm_test/Makefile.libomp
rename to examples/dsponly/common/Makefile.common
index 683d3b8a3df340d781459bf40de2c946cc0343d7..0e76de80b2a731f07ff7145a2586d8a626300f92 100644 (file)
-#
-# Makefile.libomp
-#
+# Makefile to build OpenMP applications
+
+default: all
# Configuration file used (without the .cfg extension)
CFGDIR = omp_config
@@ -26,15 +26,48 @@ XDCPATH = $(PDK_DIR)/packages;$(OMP_DIR)/packages;$(BIOS_DIR)/packages;$(IPC_DIR
XS = $(XDC_DIR)/xs
XDC = $(XDC_DIR)/xdc
-XDCTARGET = ti.targets.elf.C66
+XDCTARGET = ti.targets.elf.C66
+
+#
+# Compiler option configuration
+#
+LNK_CMD = $(CFGDIR)/linker.cmd
+LNK_CMD_FC= $(COMMON_FOLDER)/linker_fc.cmd
+OPT_CMD = $(CFGDIR)/compiler.opt
+LNK_OPTS = -x -c --priority -w
+CL = $(CGTROOT)/bin/cl6x
+RTS_LIB = $(CGTROOT)/lib/libc.a
+LIBARCH_LIB = $(LIBARCH_DIR)/lib/libArch.ae66
+LINALG_LIB = $(LINALG_DIR)/lib/libcblas.ae66
+
+ifeq ($(BUILD_TYPE),debug)
+ CL_OPTS += -g --optimize_with_debug=on
+else
+ CL_OPTS += -o3
+endif
+
+
+all: $(outfile)
# Generate and build libomp config packages
libomp_config:
@echo making $(CFGDIR) files
+ cp $(COMMON_FOLDER)/$(CFGDIR).cfg .
$(XS) --xdcpath "$(XDCPATH)" xdc.tools.configuro -c $(CGTROOT) --cb -t $(XDCTARGET) -p $(XDCPLATFORM) -r $(BUILD_TYPE) $(CFGDIR).cfg
libomp_clean:
@echo Removing $(CFGDIR)
@rm -rf $(CFGDIR)/
+%.out: $(testfiles) libomp_config
+ echo compiling $<
+ $(CL) $(CL_OPTS) $(testfiles) -z $(LNK_OPTS) -o $@ -m $*.map $(LNK_CMD) $(LNK_CMD_FC) $(RTS_LIB) $(LIBARCH_LIB) $(LINALG_LIB)
+clean: libomp_clean
+ @rm -rf *.map *.out *.obj *.mak $(CFGDIR).cfg
+
+#
+# Cleans libomp artifacts
+#
+realclean: libomp_clean
+ @rm -rf *.map *.out *.obj *.mak
similarity index 100%
rename from examples/dsponly/dgemm_test/config_c6678.c
rename to examples/dsponly/common/fc_config_c6678.c
rename from examples/dsponly/dgemm_test/config_c6678.c
rename to examples/dsponly/common/fc_config_c6678.c
similarity index 100%
rename from examples/dsponly/dgemm_test/linker_fc.cmd
rename to examples/dsponly/common/linker_fc.cmd
rename from examples/dsponly/dgemm_test/linker_fc.cmd
rename to examples/dsponly/common/linker_fc.cmd
similarity index 100%
rename from examples/dsponly/dgemm_test/omp_config.cfg
rename to examples/dsponly/common/omp_config.cfg
rename from examples/dsponly/dgemm_test/omp_config.cfg
rename to examples/dsponly/common/omp_config.cfg
diff --git a/examples/dsponly/common/ticblas_config.c b/examples/dsponly/common/ticblas_config.c
--- /dev/null
@@ -0,0 +1,168 @@
+/******************************************************************************
+ * Copyright (c) 2015, Texas Instruments Incorporated - http://www.ti.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+#include <omp.h>
+#include <string.h>
+#include <stdio.h>
+#include <libarch.h>
+#include <ticblas.h>
+#include <cblas.h>
+
+/* use small memory model of BLAS */
+#define BLAS_L2_BUF_SIZE (183*1024UL) /* 183KB */
+#define BLAS_MSMC_BUF_SIZE (2*1024*1024UL) /* 2MB */
+#define BLAS_L3_DDR_SIZE (5120) /* 5KB */
+
+//#define BLAS_L2_BUF_SIZE (384*1024UL) /* 384KB */
+//#define BLAS_MSMC_BUF_SIZE (4718592UL) /* 4.5MB */
+//#define BLAS_L3_DDR_SIZE (5120) /* 5KB */
+
+size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
+
+#pragma DATA_SECTION(blas_msmc_buf, ".blas_msmc")
+#pragma DATA_ALIGN(blas_msmc_buf,32)
+char blas_msmc_buf[BLAS_MSMC_BUF_SIZE];
+
+#pragma DATA_SECTION(blas_l2_buf, ".blas_l2")
+#pragma DATA_ALIGN(blas_l2_buf,32)
+char blas_l2_buf[BLAS_L2_BUF_SIZE];
+
+char blas_ddr_buf[BLAS_L3_DDR_SIZE];
+
+
+/*==============================================================================
+ * This function configures and initializes memory for BLAS calls
+ *============================================================================*/
+int config_mem_for_ticblas(double *l2_buf, size_t l2_buf_size,
+ double *msmc_buf, size_t msmc_buf_size,
+ double *ddr_buf, size_t ddr_buf_size)
+{
+ size_t smem_size_vfast, smem_size_fast, smem_size_med, smem_size_slow;
+ void *l1d_SRAM_ptr;
+ int l1d_cfg_err;
+
+ /* First, verify the provided/available memory meet requirements */
+ tiCblasGetSizes(&smem_size_vfast, &smem_size_fast, &smem_size_med, &smem_size_slow);
+
+ printf("BLAS memory requirements - vfast size: %d, fast size: %d, medium size: %d, slow size: %d.\n", smem_size_vfast, smem_size_fast, smem_size_med, smem_size_slow);
+
+ if( (smem_size_vfast> lib_get_L1D_total_size()) /* total available L1D */
+ ||(smem_size_fast > l2_buf_size) /* provided L2 size */
+ ||(smem_size_med > msmc_buf_size) /* provided MSMC memory */
+ ||(smem_size_slow > ddr_buf_size)
+ ) {
+ printf("Provided memory is not enough for BLAS!\n");
+ exit(0);
+ }
+
+ /* Configure L1D if necessary */
+ l1D_SRAM_size_orig = lib_get_L1D_SRAM_size(); /* get current L1D SRAM size */
+ l1d_cfg_err = LIB_CACHE_SUCCESS;
+ printf("Original L1D SRAM size is: %d\n", l1D_SRAM_size_orig);
+ printf("Required L1D SRAM size is: %d\n", smem_size_vfast);
+ if(l1D_SRAM_size_orig < smem_size_vfast) { /* configure L1D if needs more SRAM */
+ #pragma omp parallel
+ {
+ l1d_cfg_err = lib_L1D_config_SRAM(smem_size_vfast);
+ if(l1d_cfg_err) {
+ printf("L1D configuration fails on core %d!\n", lib_get_coreID());
+ exit(1);
+ }
+ }
+ }
+
+ #pragma omp parallel
+ {
+ printf("New L1D SRAM size on core %d is: %d\n", lib_get_coreID(), lib_get_L1D_SRAM_size());
+ }
+
+ /* get L1D SRAM base address */
+ l1d_SRAM_ptr = lib_get_L1D_SRAM_base();
+ printf("L1D SRAM base address is 0x%x.\n", (unsigned int)l1d_SRAM_ptr);
+
+ /* pass allocated memories for heap initialization */
+ return(tiCblasInit(l1d_SRAM_ptr, lib_get_L1D_SRAM_size(),
+ l2_buf, l2_buf_size,
+ msmc_buf, msmc_buf_size,
+ ddr_buf, ddr_buf_size));
+} /* config_mem_for_ticblas */
+
+/*==============================================================================
+ * This function reconfigures L1D after processing is finished
+ *============================================================================*/
+int reconfig_mem_after_ticblas()
+{
+ int l1d_cfg_err;
+
+ /* configure L1D back */
+ l1d_cfg_err = LIB_CACHE_SUCCESS;
+ if(l1D_SRAM_size_orig!=lib_get_L1D_SRAM_size()) {
+ #pragma omp parallel
+ {
+ l1d_cfg_err = lib_L1D_config_SRAM(l1D_SRAM_size_orig);
+ if(l1d_cfg_err) {
+ printf("L1D reconfiguration fails on core %d!\n", lib_get_coreID());
+ exit(2);
+ }
+ }
+ }
+
+ printf("L1D SRAM size reconfigured to: %d\n", lib_get_L1D_SRAM_size());
+
+ return(TICBLAS_SUCCESS);
+} /* reconfig_mem_after_ticblas */
+
+
+void prepare_for_ticblas()
+{
+ int err;
+
+ printf("L2 SRAM size is %d, total L2 size is %d.\n", lib_get_L2_SRAM_size(), lib_get_L2_total_size());
+
+ /* Call TI CBLAS API to creat new CBLAS instance */
+ tiCblasNew();
+
+ /* Configure memory for TI CBLAS if necessary */
+ err = config_mem_for_ticblas((double *)blas_l2_buf, (size_t)BLAS_L2_BUF_SIZE,
+ (double *)blas_msmc_buf, (size_t)BLAS_MSMC_BUF_SIZE,
+ (double *)blas_ddr_buf, (size_t)BLAS_L3_DDR_SIZE);
+
+ if(err) {
+ printf("Memory configuration for CBLAS failed with error code %d.\n", err);
+ exit (0);
+ }
+}
+
+void cleanup_after_ticblas()
+{
+ /* Reconfigure memory if necessary */
+ if(reconfig_mem_after_ticblas() == TICBLAS_SUCCESS) {
+ printf("Memory reconfiguration after BLAS call finished.\n");
+ }
+
+ tiCblasDelete();
+}
index 5af6a1f72b27f5b1de13e7a0eac91b1356f809ee..ecaf8b7a6a661368985835677485996512cbab25 100644 (file)
# Default to RTSC mode
-testfiles = dgemm_test.c config_c6678.c
+COMMON_FOLDER = ../common
+testfiles = dgemm_test.c $(COMMON_FOLDER)/ticblas_config.c $(COMMON_FOLDER)/fc_config_c6678.c
outfile = dgemm_test.out
-include Makefile.common
+CL_OPTS = -@ $(OPT_CMD) -mv6600 --omp -I $(OMP_DIR)/packages/ti/runtime/openmp
+CL_OPTS += -I$(FC_DIR)/packages -I$(XDAIS_DIR)/packages -I$(EDMA3_DIR)/packages -I$(LIBARCH_DIR)/include -I$(LINALG_DIR)/include
+CL_OPTS += -D$(TARGET) -DLIB_RTOS
+
+include $(COMMON_FOLDER)/Makefile.common
diff --git a/examples/dsponly/dgemm_test/Makefile.common b/examples/dsponly/dgemm_test/Makefile.common
+++ /dev/null
@@ -1,43 +0,0 @@
-# Makefile to build OpenMP applications
-
-
-default: all
-
-include Makefile.libomp
-
-
-#
-# Compiler option configuration
-#
-LNK_CMD = $(CFGDIR)/linker.cmd
-OPT_CMD = $(CFGDIR)/compiler.opt
-CL_OPTS = -@ $(OPT_CMD) -mv6600 --omp -I $(OMP_DIR)/packages/ti/runtime/openmp
-CL_OPTS += -I$(FC_DIR)/packages -I$(XDAIS_DIR)/packages -I$(EDMA3_DIR)/packages -I$(LIBARCH_DIR)/include -I$(LINALG_DIR)/include
-CL_OPTS += -D$(TARGET) -DLIB_RTOS
-LNK_OPTS = -x -c --priority -w
-CL = $(CGTROOT)/bin/cl6x
-RTS_LIB = $(CGTROOT)/lib/libc.a
-LIBARCH_LIB = $(LIBARCH_DIR)/lib/libArch.ae66
-LINALG_LIB = $(LINALG_DIR)/lib/libcblas.ae66
-
-ifeq ($(BUILD_TYPE),debug)
- CL_OPTS += -g --optimize_with_debug=on
-else
- CL_OPTS += -o3
-endif
-
-
-all: $(outfile)
-
-%.out: $(testfiles) libomp_config
- echo compiling $<
- $(CL) $(CL_OPTS) $< $(testfiles) -z $(LNK_OPTS) -o $@ -m $*.map $(LNK_CMD) ./linker_fc.cmd $(RTS_LIB) $(LIBARCH_LIB) $(LINALG_LIB)
-
-clean: libomp_clean
- @rm -rf *.map *.out *.obj *.mak
-
-#
-# Cleans libomp artifacts
-#
-realclean: libomp_clean
- @rm -rf *.map *.out *.obj *.mak
diff --git a/examples/dsponly/dgemm_test/config.bld b/examples/dsponly/dgemm_test/config.bld
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * ======== config.bld ========
- * Sample Build configuration script
- */
-
-/* Get the Tools Base directory from the Environment Variable. */
-var tiCgtDir = java.lang.System.getenv("CGTROOT");
-if (!tiCgtDir)
-{
- throw new Error("CGTROOT environment variable not set");
-}
-
-//Setup for c66 target
-var C66 = xdc.useModule('ti.targets.elf.C66');
-C66.rootDir = tiCgtDir;
-C66.ccOpts.suffix += " -mi10 -mo ";
-
-//list interested targets in Build.targets array
-Build.targets = [
- C66,
- ];
index 6d7b0397883ae1bfe3530a55d633024c8dfbbbb5..dddd57f3dafa548e3bbeefc8314cc407c28a4323 100644 (file)
+/******************************************************************************
+ * Copyright (c) 2015, Texas Instruments Incorporated - http://www.ti.com
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Texas Instruments Incorporated nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
/******************************************************************************
* FILE: dgemm_test.c
******************************************************************************/
#include <ticblas.h>
#include <cblas.h>
-/* use small memory model of BLAS */
-#define BLAS_L2_BUF_SIZE (183*1024UL) /* 183KB */
-#define BLAS_MSMC_BUF_SIZE (2*1024*1024UL) /* 2MB */
-#define BLAS_L3_DDR_SIZE (5120) /* 5KB */
-
-//#define BLAS_L2_BUF_SIZE (384*1024UL) /* 384KB */
-//#define BLAS_MSMC_BUF_SIZE (4718592UL) /* 4.5MB */
-//#define BLAS_L3_DDR_SIZE (5120) /* 5KB */
-
-size_t l1D_SRAM_size_orig, l2_SRAM_size_orig;
-
-#pragma DATA_SECTION(blas_msmc_buf, ".blas_msmc")
-#pragma DATA_ALIGN(blas_msmc_buf,32)
-char blas_msmc_buf[BLAS_MSMC_BUF_SIZE];
-
-#pragma DATA_SECTION(blas_l2_buf, ".blas_l2")
-#pragma DATA_ALIGN(blas_l2_buf,32)
-char blas_l2_buf[BLAS_L2_BUF_SIZE];
+#define FLOPS_PER_UNIT_PERF 1e9
-char blas_ddr_buf[BLAS_L3_DDR_SIZE];
+extern void cleanup_after_ticblas();
+extern void prepare_for_ticblas();
+extern double omp_get_wtime(void);
-int config_mem_for_ticblas(double *l2_buf, size_t l2_buf_size,
- double *msmc_buf, size_t msmc_buf_size,
- double *ddr_buf, size_t ddr_buf_size);
-int reconfig_mem_after_ticblas();
void matrix_gen(double *A, double *B, double *C, int m, int k, int n);
void mat_mpy(const double * A, const double * B, double * C, int mat_N,
int mat_K, int mat_M, double alpha, double beta);
int main (int argc, char *argv[])
{
double *A, *B, *C, *C_copy;
- int m, n, k, err;
- double alpha, beta, precision_diff;
+ int m, n, k;
+ double alpha, beta, precision_diff, time, time_diff, gflops;
int nthreads, tid;
/* Initialize random number generator */
srand(123456789);
+ /* Configure memory and initialize TI CBLAS */
+ prepare_for_ticblas();
+
/* Generate matrices */
matrix_gen(A, B, C, m, k, n);
memcpy(C_copy, C, m*n*sizeof(double));
- printf("L2 SRAM size is %d, total L2 size is %d.\n", lib_get_L2_SRAM_size(), lib_get_L2_total_size());
-
- /* Call TI CBLAS API to creat new CBLAS instance */
- tiCblasNew();
-
- /* Configure memory for TI CBLAS if necessary */
- err = config_mem_for_ticblas((double *)blas_l2_buf, (size_t)BLAS_L2_BUF_SIZE,
- (double *)blas_msmc_buf, (size_t)BLAS_MSMC_BUF_SIZE,
- (double *)blas_ddr_buf, (size_t)BLAS_L3_DDR_SIZE);
-
- if(err) {
- printf("Memory configuration for CBLAS failed with error code %d.\n", err);
- exit (0);
- }
-
/* Call standard CBLAS API for dgemm */
+ time = omp_get_wtime();
cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, k, alpha, A, k, B, n, beta, C, n);
-
- /* Reconfigure memory if necessary */
- if(reconfig_mem_after_ticblas() == TICBLAS_SUCCESS) {
- printf("Memory reconfiguration after BLAS call finished.\n");
- }
+ time_diff = omp_get_wtime() - time;
+ gflops = ( 2.0 * m * n * k ) / time_diff / FLOPS_PER_UNIT_PERF;
+ printf("DGEMM time for (m,n,k) = (%d,%d,%d) is %e, GFLOPS is %e.\n", m,n,k, time_diff, gflops);
/* Straightforward matrix multiplication as reference */
mat_mpy(A, B, C_copy, m, n, k, alpha, beta);
precision_diff = diff_matrix(C, C_copy, m, k);
printf("Precision error is %e.\n", precision_diff);
+ /* Finalize TI CBLAS and reconfigure memory */
+ cleanup_after_ticblas();
+
return 0;
}
}
-/*==============================================================================
- * This function configures and initializes memory for BLAS calls
- *============================================================================*/
-int config_mem_for_ticblas(double *l2_buf, size_t l2_buf_size,
- double *msmc_buf, size_t msmc_buf_size,
- double *ddr_buf, size_t ddr_buf_size)
-{
- size_t smem_size_vfast, smem_size_fast, smem_size_med, smem_size_slow;
- void *l1d_SRAM_ptr;
- int l1d_cfg_err;
-
- /* First, verify the provided/available memory meet requirements */
- tiCblasGetSizes(&smem_size_vfast, &smem_size_fast, &smem_size_med, &smem_size_slow);
-
- printf("BLAS memory requirements - vfast size: %d, fast size: %d, medium size: %d, slow size: %d.\n", smem_size_vfast, smem_size_fast, smem_size_med, smem_size_slow);
-
- if( (smem_size_vfast> lib_get_L1D_total_size()) /* total available L1D */
- ||(smem_size_fast > l2_buf_size) /* provided L2 size */
- ||(smem_size_med > msmc_buf_size) /* provided MSMC memory */
- ||(smem_size_slow > ddr_buf_size)
- ) {
- printf("Provided memory is not enough for BLAS!\n");
- exit(0);
- }
-
- /* Configure L1D if necessary */
- l1D_SRAM_size_orig = lib_get_L1D_SRAM_size(); /* get current L1D SRAM size */
- l1d_cfg_err = LIB_CACHE_SUCCESS;
- printf("Original L1D SRAM size is: %d\n", l1D_SRAM_size_orig);
- printf("Required L1D SRAM size is: %d\n", smem_size_vfast);
- if(l1D_SRAM_size_orig < smem_size_vfast) { /* configure L1D if needs more SRAM */
- #pragma omp parallel
- {
- l1d_cfg_err = lib_L1D_config_SRAM(smem_size_vfast);
- if(l1d_cfg_err) {
- printf("L1D configuration fails on core %d!\n", lib_get_coreID());
- exit(1);
- }
- }
- }
-
- #pragma omp parallel
- {
- printf("New L1D SRAM size on core %d is: %d\n", lib_get_coreID(), lib_get_L1D_SRAM_size());
- }
-
- /* get L1D SRAM base address */
- l1d_SRAM_ptr = lib_get_L1D_SRAM_base();
- printf("L1D SRAM base address is 0x%x.\n", (unsigned int)l1d_SRAM_ptr);
-
- /* pass allocated memories for heap initialization */
- return(tiCblasInit(l1d_SRAM_ptr, lib_get_L1D_SRAM_size(),
- l2_buf, l2_buf_size,
- msmc_buf, msmc_buf_size,
- ddr_buf, ddr_buf_size));
-} /* config_mem_for_ticblas */
-
-/*==============================================================================
- * This function reconfigures L1D after processing is finished
- *============================================================================*/
-int reconfig_mem_after_ticblas()
-{
- int l1d_cfg_err;
-
- /* configure L1D back */
- l1d_cfg_err = LIB_CACHE_SUCCESS;
- if(l1D_SRAM_size_orig!=lib_get_L1D_SRAM_size()) {
- #pragma omp parallel
- {
- l1d_cfg_err = lib_L1D_config_SRAM(l1D_SRAM_size_orig);
- if(l1d_cfg_err) {
- printf("L1D reconfiguration fails on core %d!\n", lib_get_coreID());
- exit(2);
- }
- }
- }
-
- printf("L1D SRAM size reconfigured to: %d\n", lib_get_L1D_SRAM_size());
-
- return(TICBLAS_SUCCESS);
-} /* reconfig_mem_after_ticblas */
-
/******************************************************************************
* Straightforward implementation of matrix multiplication with row-major
diff --git a/examples/dsponly/dgemm_test/omp_config_bm.cfg b/examples/dsponly/dgemm_test/omp_config_bm.cfg
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2012-2015, Texas Instruments Incorporated
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name of Texas Instruments Incorporated nor the names of
- * its contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-
-/* Include OMP runtime in the build */
-var omp = xdc.useModule("ti.runtime.openmp.Settings");
-
-/* Set up section mappings */
-var program = xdc.useModule('xdc.cfg.Program');
-program.sectMap[".args"] = new Program.SectionSpec();
-program.sectMap[".bss"] = new Program.SectionSpec();
-program.sectMap[".cinit"] = new Program.SectionSpec();
-program.sectMap[".cio"] = new Program.SectionSpec();
-program.sectMap[".const"] = new Program.SectionSpec();
-program.sectMap[".data"] = new Program.SectionSpec();
-program.sectMap[".far"] = new Program.SectionSpec();
-program.sectMap[".fardata"] = new Program.SectionSpec();
-program.sectMap[".neardata"] = new Program.SectionSpec();
-program.sectMap[".rodata"] = new Program.SectionSpec();
-program.sectMap[".stack"] = new Program.SectionSpec();
-program.sectMap[".switch"] = new Program.SectionSpec();
-program.sectMap[".sysmem"] = new Program.SectionSpec();
-program.sectMap[".text"] = new Program.SectionSpec();
-
-/* Must place these sections in core local memory */
-program.sectMap[".args"].loadSegment = "L2SRAM";
-program.sectMap[".cio"].loadSegment = "L2SRAM";
-program.sectMap[".stack"].loadSegment = "L2SRAM";
-
-/* Must place these sections in shared memory - DDR3/MSMC */
-program.sectMap[".bss"].loadSegment = "DDR3";
-program.sectMap[".cinit"].loadSegment = "DDR3";
-program.sectMap[".const"].loadSegment = "DDR3";
-program.sectMap[".data"].loadSegment = "DDR3";
-program.sectMap[".far"].loadSegment = "DDR3";
-program.sectMap[".fardata"].loadSegment = "DDR3";
-program.sectMap[".neardata"].loadSegment = "DDR3";
-program.sectMap[".rodata"].loadSegment = "DDR3";
-program.sectMap[".sysmem"].loadSegment = "DDR3";
-program.sectMap[".switch"].loadSegment = program.platform.codeMemory;
-program.sectMap[".text"].loadSegment = program.platform.codeMemory;
-
-/* Size the default stack */
-var deviceName = String(Program.cpu.deviceName);
-if (deviceName.search("DRA7XX") == -1) { program.stack = 0x20000; }
-else { program.stack = 0x08000; }
-
-if (deviceName.search("DRA7XX") == -1) { program.heap = 0x08000000; }
-else { program.heap = 0x00800000; }
-
diff --git a/examples/run_tests_evm.sh b/examples/run_tests_evm.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-./matmpy/matmpy
-./dsyrk_test/dsyrk_test
-./ztrsm_test/ztrsm_test
-./dgemm_test/dgemm_test
-./eig/eig
-./ludinv/ludinv
-./ztrmm_test/ztrmm_test
-
diff --git a/setup_env_C6678_rtos.sh b/setup_env_C6678_rtos.sh
--- /dev/null
+++ b/setup_env_C6678_rtos.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+export CGTROOT="/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/share/ti/cgt-c6x"
+export C6678_PDK_DIR="/home/a0869574local/ti/processor-sdk-rtos-c667x-evm-02.00.01.07/pdk_c667x_2_0_0"
+export PDK_DIR="/home/a0869574local/ti/processor-sdk-rtos-c667x-evm-02.00.01.07/pdk_c667x_2_0_0"
+export FC_DIR="/home/a0869574local/ti/processor-sdk-rtos-c667x-evm-02.00.01.07/framework_components_3_40_01_04"
+export XDAIS_DIR="/home/a0869574local/ti/processor-sdk-rtos-c667x-evm-02.00.01.07/xdais_7_24_00_04"
+export BIOS_DIR="/home/a0869574local/ti/processor-sdk-rtos-c667x-evm-02.00.01.07/bios_6_45_00_19"
+export OMP_DIR="/home/a0869574local/ti/processor-sdk-rtos-c667x-evm-02.00.01.07/openmp_dsp_c667x_2_02_00_02"
+export LIBARCH_DIR="/home/a0869574local/proclibs/libarch_intgit/libarch"
+export XDC_DIR="/home/a0869574local/ti-rtos-sdk-12-08/xdctools_3_31_02_38_core"
+export IPC_DIR="/home/a0869574local/ti/processor-sdk-rtos-c667x-evm-02.00.01.07/ipc_3_41_00_08"
+export EDMA3_DIR="/home/a0869574local/ti/processor-sdk-rtos-c667x-evm-02.00.01.07/edma3_lld_02_12_01_22"
+export PATH="/home/a0869574local/yocoto/tisdk-rtos/sources/oe-core/scripts:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/bin/arm-linux-gnueabi:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/c667x-evm/usr/bin/crossscripts:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/sbin:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/usr/bin:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/sbin:/home/a0869574local/yocoto/tisdk-rtos/build/arago-tmp-external-linaro-toolchain/sysroots/x86_64-linux/bin:/home/a0869574local/gcc-linaro-4.9-2015.05-x86_64_arm-linux-gnueabihf/bin:/home/a0869574local/gcc-linaro-4.9-2015.05-x86_64_arm-linux-gnueabihf/bin:/home/a0869574local/yocoto/tisdk-rtos/sources/oe-core/scripts:/home/a0869574local/yocoto/tisdk-rtos/sources/bitbake/bin:/home/a0869574local/gcc-linaro-4.9-2015.05-x86_64_arm-linux-gnueabihf/bin:/home/a0869574local/yocoto/tisdk-rtos/sources/oe-core/scripts:/home/a0869574local/yocoto/tisdk-rtos/sources/bitbake/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games"
diff --git a/setup_env_k2h_rtos.sh b/setup_env_k2h_rtos.sh
--- /dev/null
+++ b/setup_env_k2h_rtos.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+export CGTROOT="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x"
+export XDC_DIR="/home/a0869574local/ti-rtos-sdk-12-08/xdctools_3_31_02_38_core"
+export BIOS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-sysbios-tree"
+export XDAIS_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-xdais-tree"
+export IPC_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-ipc-tree"
+export EDMA3_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-edma3lld-tree"
+export FC_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-framework-components-tree"
+export PDK_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-pdk-tree"
+export C6636_PDK_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-pdk-tree"
+export OMP_DIR="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.02.02/linux-devkit/sysroots/cortexa15hf-vfp-neon-linux-gnueabi/usr/share/ti/ti-omp-tree"
+export LIBARCH_DIR="/home/a0869574local/proclibs/libarch_intgit/libarch"
+
+export PATH="/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/share/ti/cgt-c6x/bin:/home/a0869574local/ti/processor-sdk-linux-k2hk-evm-02.00.01.07/linux-devkit/sysroots/x86_64-arago-linux/usr/bin:$PATH"