summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 571caf5)
raw | patch | inline | side by side (parent: 571caf5)
author | Jianzhong Xu <a0869574@ti.com> | |
Thu, 30 Apr 2015 21:24:40 +0000 (17:24 -0400) | ||
committer | Jianzhong Xu <a0869574@ti.com> | |
Thu, 30 Apr 2015 21:24:40 +0000 (17:24 -0400) |
index 5ea020a8b258a2d097e3090c587a86fe9ccc9f5d..bf52b835e6c5abf1535cef1e028ae9a425572f68 100644 (file)
#include <stdio.h>
#include <math.h>
#include <time.h>
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
#include "cblas.h"
-#ifdef __cplusplus
-extern "C" {
-#endif
-#include "cblas.h"
-#ifdef __cplusplus
-}
-#endif
-#define TUNING_START_SIZE_RECTAN_MATRIX 128
-#define NUM_MATRIX_SIZE_TO_BENCHMARK 4
+#define GEMM_MATRIX_SIZE_START 1024
+#define NUM_MATRIX_SIZE_TO_BENCHMARK 2
+#define NUM_TESTS (NUM_MATRIX_SIZE_TO_BENCHMARK*NUM_MATRIX_SIZE_TO_BENCHMARK*NUM_MATRIX_SIZE_TO_BENCHMARK)
#define HAS_MEMORY 1
#define NO_MEMORY 0
#define OFFLOAD 1
#define NO_OFFLOAD 0
-#define NUM_TEST_RUN 1
+#define NUM_TEST_RUN 5
/*-----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
int run_dgemm(int M, int N, int K, float *time, float *gflops);
+/* reference GFLOPS based on 1GHz K2H device */
+float dgemm_gflops_ref[NUM_TESTS] =
+{21.6774,21.9383,22.3325,22.7754,22.6200,23.0515,23.3946,23.6324};
+
/*-----------------------------------------------------------------------------
* MAIN
*----------------------------------------------------------------------------*/
int main()
{
int num_size, dgemm_err;
- int M, N, K, m, n, k;
- float time_secs, gflops;
+ int M, N, K, m, n, k, test_idx;
+ float time_secs, gflops, gflops_ref, cpu_freq_GHz;
+ cl_platform_id platform;
+ cl_uint num_platforms;
+ cl_device_id devices;
+ cl_uint num_devices;
+ cl_uint cpu_freq;
+ size_t cpu_freq_size;
FILE *fp_time, *fp_gflops;
fp_time = fopen("dgemm_time.dat","w");
fp_gflops = fopen("dgemm_gflops.dat","w");
+ if(clGetPlatformIDs(1, &platform, &num_platforms) != CL_SUCCESS) {
+ printf("Error in clGetPlatformIDs\n.");
+ exit(0);
+ }
+
+ if(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 1, &devices, &num_devices) != CL_SUCCESS) {
+ printf("Error in clGetDeviceIDs\n.");
+ exit(0);
+ }
+ if(clGetDeviceInfo(devices, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(cl_uint), (void *)&cpu_freq, &cpu_freq_size) != CL_SUCCESS) {
+ printf("Error in clGetDeviceInfo\n.");
+ exit(0);
+ }
+ cpu_freq_GHz = (float)cpu_freq/1e3; /* convert from MHz to GHz */
+ printf("Found %d devices.\n", num_devices);
+ printf("CPU frequency is %f GHz.\n", cpu_freq_GHz);
+
srand(12345);
/* setting up TI CBLAS during first call */
run_dgemm(1000, 1000, 1000, &time_secs, &gflops);
/* sweep M, K, and N */
- for (M=TUNING_START_SIZE_RECTAN_MATRIX,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2)
+ test_idx = 0;
+ for (M=GEMM_MATRIX_SIZE_START,m=0; m<NUM_MATRIX_SIZE_TO_BENCHMARK; m++,M*=2)
{
- for (N=TUNING_START_SIZE_RECTAN_MATRIX,n=0; n<NUM_MATRIX_SIZE_TO_BENCHMARK; n++,N*=2)
+ for (N=GEMM_MATRIX_SIZE_START,n=0; n<NUM_MATRIX_SIZE_TO_BENCHMARK; n++,N*=2)
{
- for (K=TUNING_START_SIZE_RECTAN_MATRIX,k=0; k<NUM_MATRIX_SIZE_TO_BENCHMARK; k++,K*=2)
+ for (K=GEMM_MATRIX_SIZE_START,k=0; k<NUM_MATRIX_SIZE_TO_BENCHMARK; k++,K*=2)
{
- printf("Running DGEMM for (M,N,K) = (%d,%d,%d).\n", M,N,K);
+ printf("Running DGEMM for (M,N,K) = (%d,%d,%d).\t", M,N,K);
dgemm_err = run_dgemm(M, N, K, &time_secs, &gflops);
+
+ gflops_ref = dgemm_gflops_ref[test_idx++]; /* read reference GFLOPS */
+ gflops_ref = gflops_ref * cpu_freq_GHz; /* scale ref GFLOPS by CPU freq */
+ printf("Measured %f GFLOPS, reference %f GFLOPS.\n", gflops, gflops_ref);
+ if((gflops > gflops_ref*1.1) || (gflops < gflops_ref/1.1)) {
+ printf("DGEMM test FAILED! GFLOPS deviates from reference unacceptably.");
+ exit(0);
+ }
if(dgemm_err == -1) { /* out of memory for DSP offloading */
printf("Out of memory for (M,N,K) = (%d,%d,%d).\n", M,N,K);
fclose(fp_time);
fclose(fp_gflops);
+ printf("PASSED.\n");
return 0;
}
double total_GFLOPS = 0.0f;
int err_code = 0;
+ /*-------------------------------------------------------------------------
+ * Allocate space for the matrices.
+ *------------------------------------------------------------------------*/
+ double *A = (double *) __malloc_ddr((long long)M*(long long)K*(long long)sizeof(double));
+ double *B = (double *) __malloc_ddr((long long)K*(long long)N*(long long)sizeof(double));
+ double *C = (double *) __malloc_ddr((long long)M*(long long)N*(long long)sizeof(double));
+
+ if (!A || !B || !C)
+ {
+ printf("Could not allocate enough space for the arrays!");
+ if(A) __free_ddr(A);
+ if(B) __free_ddr(B);
+ if(C) __free_ddr(C);
+
+ return (-1);
+ }
+
total_time = 0.0;
for (iter = 0; iter < NUM_TEST_RUN; iter++)
- {
- /*-------------------------------------------------------------------------
- * Allocate space for the matrices.
- *------------------------------------------------------------------------*/
- double *A = (double *) __malloc_ddr((long long)M*(long long)K*(long long)sizeof(double));
- double *B = (double *) __malloc_ddr((long long)K*(long long)N*(long long)sizeof(double));
- double *C = (double *) __malloc_ddr((long long)M*(long long)N*(long long)sizeof(double));
-
- if (!A || !B || !C)
- {
- printf("Could not allocate enough space for the arrays!");
- if(A) __free_ddr(A);
- if(B) __free_ddr(B);
- if(C) __free_ddr(C);
-
- return (-1);
- }
-
- /*-------------------------------------------------------------------------
- * Initialize matrices
- *------------------------------------------------------------------------*/
- for (i = 0; i < (long long)M*K; ++i) A[i] = (double)rand()/RAND_MAX;// (double)(rand() % 5 + 1);
- for (i = 0; i < (long long)K*N; ++i) B[i] = (double)rand()/RAND_MAX;// (double)(rand() % 5 + 1);
- for (i = 0; i < (long long)M*N; ++i) C[i] = 0;
-
- int lda = ((order == CblasColMajor && transA == CblasNoTrans) ||
- (order == CblasRowMajor && transA == CblasTrans)) ? M : K;
-
- int ldb = ((order == CblasColMajor && transB == CblasNoTrans) ||
- (order == CblasRowMajor && transB == CblasTrans)) ? K : N;
-
- int ldc = (order == CblasColMajor) ? M : N;
-
- fflush(stdout);
-
- /*------------------------------------------------------------------------
- * Run and time dgemm
- *-----------------------------------------------------------------------*/
- tick();
- cblas_dgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
- time_secs = tock();
- total_time += time_secs;
- total_GFLOPS += operation_count/time_secs*1e-9;
-/*
- if(M==4096 && K==256 && N==16) {
- FILE *file_a = fopen("mat_a.dat","w");
- FILE *file_b = fopen("mat_b.dat","w");
- FILE *file_c = fopen("mat_c.dat","w");
-
- for(i=0; i < M*K; ++i) fprintf(file_a, "%1.10e\n",A[i]);
- for(i=0; i < K*N; ++i) fprintf(file_b, "%1.10e\n",B[i]);
- for(i=0; i < M*N; ++i) fprintf(file_c, "%1.10e\n",C[i]);
- }
-*/
-
- __free_ddr(A);
- __free_ddr(B);
- __free_ddr(C);
+ {
+ /*----------------------------------------------------------------------
+ * Initialize matrices
+ *---------------------------------------------------------------------*/
+ for (i = 0; i < (long long)M*K; ++i) A[i] = (double)rand()/RAND_MAX;
+ for (i = 0; i < (long long)K*N; ++i) B[i] = (double)rand()/RAND_MAX;
+ for (i = 0; i < (long long)M*N; ++i) C[i] = 0;
+
+ int lda = ((order == CblasColMajor && transA == CblasNoTrans) ||
+ (order == CblasRowMajor && transA == CblasTrans)) ? M : K;
+
+ int ldb = ((order == CblasColMajor && transB == CblasNoTrans) ||
+ (order == CblasRowMajor && transB == CblasTrans)) ? K : N;
+
+ int ldc = (order == CblasColMajor) ? M : N;
+
+ fflush(stdout);
+
+ /*------------------------------------------------------------------------
+ * Run and time dgemm
+ *-----------------------------------------------------------------------*/
+ tick();
+ cblas_dgemm(order,transA,transB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
+ time_secs = tock();
+ total_time += time_secs;
+ total_GFLOPS += operation_count/time_secs*1e-9;
}
+
+ __free_ddr(A);
+ __free_ddr(B);
+ __free_ddr(C);
*gflops = total_GFLOPS / (double)NUM_TEST_RUN;
*time = total_time / (double)NUM_TEST_RUN;
index 4b14824e2e8c4e470467957d7edff1eff75c3de8..dee1cce96ac56ed2925c83a37edfd093cc156613 100644 (file)
}
fclose(fp_time);
fclose(fp_gflops);
+
+ printf("Passed.\n");
return 0;
}
index 07285733dff65b54d9ab06f7262261546e01a54e..8b9e21a228e65ebe51edac9def11e69fa84b5ca8 100644 (file)
fclose(fp_time);
fclose(fp_gflops);
+ printf("Passed.\n");
+
return 0;
}
index d4192b5ba045563391a8e05266e919eefe1fcfbb..ab8855b738cb639d216555e6b81e23ad7403daf1 100644 (file)
fclose(fp_time);
fclose(fp_gflops);
+ printf("Passed.\n");
+
return 0;
}