summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 13f204a)
raw | patch | inline | side by side (parent: 13f204a)
author | Gaurav Mitra <gaurav@ti.com> | |
Wed, 31 Jul 2019 04:39:39 +0000 (04:39 +0000) | ||
committer | Gaurav Mitra <gaurav@ti.com> | |
Wed, 31 Jul 2019 05:51:08 +0000 (05:51 +0000) |
- Use -Wall when compiling OpenCL examples
- Fix issues leading to compiler time warnings for examples
(MCT-1218)
- Fix issues leading to compiler time warnings for examples
(MCT-1218)
38 files changed:
index 1b8ed9772f3199470c15e9b44f66486acb7be180..90c2d2bc66ae83f9f2bf7eaa2b2e2cf81cff7908 100644 (file)
run_kernel_wait(devset, k_io_args, buf, -1, "# final k_io w/o error:");
IOQ.enqueueReadBuffer(buf, CL_TRUE, 0, size, ary);
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
exit(-1);
}
}
- catch (Error err)
+ catch (Error& err)
{
cl_int status;
ev.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS, &status);
ev = k(eargs, buf, size, exit_gid);
ev.setCallback(CL_COMPLETE, ev_complete_func, data);
}
- catch (Error err)
+ catch (Error& err)
{
cl_int status;
ev.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS, &status);
index b26e20414e8406b0e70ae5d4a0ddd334de727d81..150673651981e667bbd3409f18cdecf0db3e37a8 100644 (file)
--- a/examples/buffer/main.cpp
+++ b/examples/buffer/main.cpp
int main(int argc, char *argv[])
{
#endif
- cl_int err = CL_SUCCESS;
int bufsize = sizeof(src);
for (int i=0; i < NumElements; ++i) { src[i] = i; dst[i] = 0; }
- try
+ try
{
- Context context(CL_DEVICE_TYPE_ACCELERATOR);
+ Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
Buffer buf (context, CL_MEM_READ_ONLY, bufsize);
Q.enqueueWriteBuffer(buf, CL_TRUE, 0, bufsize, src);
Q.enqueueReadBuffer (buf, CL_TRUE, 0, bufsize, dst);
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
if (memcmp(dst, src, bufsize) != 0) { cout << "Failed!" << endl;
RETURN(-1); }
- else cout << "Passed!" << endl;
+ else cout << "Passed!" << endl;
RETURN(0);
}
index c9665bfc78315f20c89503cfefa9422eea61e270..214d3d22f992ebdff5531d79f339e26dbb277472 100644 (file)
--- a/examples/ccode/main.cpp
+++ b/examples/ccode/main.cpp
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
/*-------------------------------------------------------------------------
* Begin OpenCL Setup code in try block to handle any errors
*------------------------------------------------------------------------*/
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
Program program = Program(context, source);
/*---------------------------------------------------------------------
- * Build the opencl c code and tell it to link with the specified
+ * Build the opencl c code and tell it to link with the specified
* object file.
*--------------------------------------------------------------------*/
- program.build(devices, "ccode.obj");
+ program.build(devices, "ccode.obj");
#else
Program::Binaries binary(1, make_pair(oclwrapper_dsp_bin,
sizeof(oclwrapper_dsp_bin)));
NDRange(1)); // WG size
/*---------------------------------------------------------------------
- * Call the second kernel -> c code function.
- * The result of which should be 0x7f subtracted from each element
+ * Call the second kernel -> c code function.
+ * The result of which should be 0x7f subtracted from each element
* of buffer.
*
- * The modifications to the buffer from the last kernel are still
- * valid for the invocation of the second kernel. Ie. the data in the
+ * The modifications to the buffer from the last kernel are still
+ * valid for the invocation of the second kernel. Ie. the data in the
* buffer is persistent.
*--------------------------------------------------------------------*/
Kernel kernel2(program, "oclwrapper2");
/*-------------------------------------------------------------------------
* Let exception handling deal with any OpenCL error cases
*------------------------------------------------------------------------*/
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
- << ocl_decode_error(err.err()) << ")" << endl;
+ << ocl_decode_error(err.err()) << ")" << endl;
}
/*-------------------------------------------------------------------------
* Check the buffer for all elements == 0x80
*------------------------------------------------------------------------*/
- for (int i = 0; i < sizeof(data); ++i) assert (data[i] == (char)0x80);
+ for (cl_uint i = 0; i < sizeof(data); ++i) assert (data[i] == (char)0x80);
cout << "Success!" << endl;
RETURN(0);
index d6d52ea34dd251a1d30ade6ba594817fcd53c5cf..f6ab2bb09877f8966a5e605bfd1a4049f7efa676 100644 (file)
--- a/examples/conv1d/main.cpp
+++ b/examples/conv1d/main.cpp
int input_numcompunits = 0;
if (argc > 1) input_numcompunits = atoi(argv[1]); // valid: 1, 2, 4, 8
- try
+ try
{
Context context (CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
#else
float *pGolden = (float *) __malloc_ddr(bufSize);
#endif
- if (pGolden == NULL)
+ if (pGolden == NULL)
{
printf("Failed to allocate memory for golden results\n");
exit(0);
__free_ddr(pGolden);
#endif
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
RETURN(-1);
} else
{
- cout << "Pass!" << endl;
+ cout << "Pass!" << endl;
RETURN(0);
}
}
diff --git a/examples/dgemm/libcblas_dgemm_dsp/cblas_dgemm.cpp b/examples/dgemm/libcblas_dgemm_dsp/cblas_dgemm.cpp
index a2ba4c603598a5e67cdf04a81ff9e9da3c174aec..739c30f95612a6f4e44c503f9bb934732242446d 100644 (file)
delete bufMsmc;
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl;
exit(-1);
diff --git a/examples/dgemm/libcblas_dgemm_dsp/init.cpp b/examples/dgemm/libcblas_dgemm_dsp/init.cpp
index b27577e0e7f1dafadab9ae746ed778ead7f610e8..3a429695b6529ecfcb40f133035be19a75fd72c5 100644 (file)
extern "C" DLL_PUBLIC
void ocl_init(bool calc_check, int *NUMCOMPUNITS)
{
- try
+ try
{
ocl.context = new Context(CL_DEVICE_TYPE_ACCELERATOR);
* Create two queues for use. Api can determine which to use
*-----------------------------------------------------------------------*/
ocl.queueInOrder = new CommandQueue(*(ocl.context), devices[0]);
- ocl.queueOutOfOrder = new CommandQueue(*(ocl.context), devices[0],
+ ocl.queueOutOfOrder = new CommandQueue(*(ocl.context), devices[0],
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE);
ocl.K_cblas_dgemm = new Kernel(*(ocl.program), "K_cblas_dgemm_omp");
.bind(*(ocl.queueInOrder), NDRange(1), NDRange(1));
null().wait();
}
- catch (Error err)
+ catch (Error& err)
{cerr<<"ERROR: "<<err.what()<<endl;}
}
index 67dc7225ffb05a7b3cf00d62fe4e9989e879176c..cf9ae082168fd9f5fc117d81e6679d026c01f7ed 100644 (file)
--- a/examples/dgemm/main.cpp
+++ b/examples/dgemm/main.cpp
int M = 1024;
int N = 1024;
int K = 1024;
-double alpha = 1.0;
+double alpha = 1.0;
double beta = 0.0;
CBLAS_ORDER order = CblasColMajor;
CBLAS_TRANSPOSE transA = CblasNoTrans;
* Operation printing helpers
*----------------------------------------------------------------------------*/
const char *op(CBLAS_TRANSPOSE t) { return t == CblasTrans ? "'" : ""; }
-const char *maj(CBLAS_ORDER o) { return (o == CblasColMajor) ? "cMaj "
+const char *maj(CBLAS_ORDER o) { return (o == CblasColMajor) ? "cMaj "
: "rMaj "; }
/*-----------------------------------------------------------------------------
double secs;
/*-------------------------------------------------------------------------
- * Allocate space for the matrices. The matrices that will be passed to
+ * Allocate space for the matrices. The matrices that will be passed to
* the DSP are allocated using device memory. The Ccpu array is not passed
* to the dsp and so can use system memory.
*------------------------------------------------------------------------*/
for (int i = 0; i < M*N; ++i) Ccpu[i] = C[i] = 0;
cout << "Done. Starting Dgemm. " << endl;
- cout << maj(order) << "C["<<M<<","<<N<<"] = "
+ cout << maj(order) << "C["<<M<<","<<N<<"] = "
<< "A"<<op(transA)<<"["<<M<<","<<K<<"] * "
<< "B"<<op(transB)<<"["<<K<<","<<N<<"]: "
<< endl
int ldc = (order == CblasColMajor) ? M : N;
/*-------------------------------------------------------------------------
- * Calling ocl_init is not required, but it does prime the 1 time OpenCL
- * context creation, so that the first dsp cblas_dgemm call timing is not
+ * Calling ocl_init is not required, but it does prime the 1 time OpenCL
+ * context creation, so that the first dsp cblas_dgemm call timing is not
* skewed by this setup cost.
*------------------------------------------------------------------------*/
int NUMCOMPUNITS;
cout << "FAIL with " << num_errors << " errors!" << endl;
return -1;
}
- else
+ else
{
cout << "PASS!" << endl;
return 0;
void PrintUsageAndExit()
{
cout << "C[M,N] = A[M,K] * B[K,N]" << endl
- << "Default value of M,N,K is " << M << endl
+ << "Default value of M,N,K is " << M << endl
<< "Usage: dgemm [options] " << endl
<< "Options: " << endl
<< "-M arg : Number of rows for array C" << endl
index a768826c2a77e1fdfe9e375a616d32b3ff6a30d2..bc20f222739d8b813d69472a901cf1c8161ac451 100644 (file)
using namespace std;
/*-----------------------------------------------------------------------------
-* This example demonstrates how a heap may be created and used on the DSP
+* This example demonstrates how a heap may be created and used on the DSP
* for kernels that call legacy code that needs heap capability. There are dsp
* builtin functions to create and manipulate a user defined heap in both msmc
-* and ddr. These heaps are persistent as long as the underlying memory for
-* them is allocated. In this example we create OpenCL buffers that provide
-* for the underlying memory store. The heaps are active and persistent from
+* and ddr. These heaps are persistent as long as the underlying memory for
+* them is allocated. In this example we create OpenCL buffers that provide
+* for the underlying memory store. The heaps are active and persistent from
* the time they are initialized until the buffers are deallocated.
*
-* Additionally, the standard malloc, calloc, free, etc calls are already
+* Additionally, the standard malloc, calloc, free, etc calls are already
* supported on the dsp, but the underlying memory for that heap is limited.
-* It currently is approximately 8MB. If your heap needs are under that size,
+* It currently is approximately 8MB. If your heap needs are under that size,
* and DDR allocation is ok for you, then the below mechanism is not needed.
*----------------------------------------------------------------------------*/
int main(int argc, char *argv[])
{
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
- devices.resize(1); // Only run on one device for demonstration
+ devices.resize(1); // Only run on one device for demonstration
/*------------------------------------------------------------------------
* OpenCL Build the precompiled kernels
/*------------------------------------------------------------------------
* Create the underlying memory store for the heaps with OpenCL Buffers
* Call kernels to initialize a DDR based and a MSMC based heap, the init
- * step only needs to run once and one 1 core only. See the functor
+ * step only needs to run once and one 1 core only. See the functor
* mapping above that defines the global size to be 1.
*-----------------------------------------------------------------------*/
int ddr_heap_size = 16 << 20; // 16MB
cout << endl;
/*------------------------------------------------------------------------
- * On each core alloc memory from both ddr and msmc. Should see same memory
+ * On each core alloc memory from both ddr and msmc. Should see same memory
* from above alloc_and_free call. This time the memory is not freed.
*-----------------------------------------------------------------------*/
alloc_only(ddr_alloc_size, msmc_alloc_size).wait();
alloc_only(ddr_alloc_size, msmc_alloc_size).wait();
cout << endl;
}
- catch (Error err)
+ catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
}
index 9d2431a4cc1dec6d5c57b87944e6d3ffa9fd93ff..4b78a6fdc6fcc982afa42183d916826002db37a9 100644 (file)
int main(int argc, char *argv[])
{
#endif
- int i, j;
struct timespec t0, t1;
int channel_size = 2 * FFTSZ * sizeof(float);
bool verbose = argc > 1;
printf("fft_db: %d usecs\n", us_diff(t0, t1));
failed |= validate_output(y, FFTCHS, FFTSZ, verbose);
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
index 5377a4d0cb7793f1090949d360ebb8194b22fc8b..74a286fd59bbf9e838939e74025ebe84e19f0fd9 100644 (file)
--- a/examples/edmabw/main.cpp
+++ b/examples/edmabw/main.cpp
int main(int argc, char* argv[])
{
#endif
- cl_int err = CL_SUCCESS;
const cl_device_partition_property device_partition_properties[3] = {
CL_DEVICE_PARTITION_EQUALLY, /* Divide equally */
1, /* 1 compute unit/subdevice */
__free_ddr(result);
free(all_results);
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
{
cl_ulong msmc_size = 0;
ocl_device.getInfo(CL_DEVICE_MSMC_MEM_SIZE_TI, &msmc_size);
- if (msmc_size < bufsize) return 0;
+ if (msmc_size < static_cast<cl_ulong>(bufsize)) return 0;
}else return 0;
return CL_MEM_USE_MSMC_TI;
index 8c77742b8e1478cb1ca97e8fce4c76fcef64bc91..2345124ff11d8a98dd505436061b801e0c35bed4 100644 (file)
int main(int argc, char *argv[])
{
#endif
- cl_int err = CL_SUCCESS;
int bufsize = sizeof(src);
for (int i=0; i < bytes; ++i) { src[i] = 0xAB; dst[i] = 0; }
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
Q.enqueueNDRangeKernel(kernel,NullRange,NDRange(num_chunks),NDRange(1));
Q.enqueueReadBuffer (bufDst, CL_TRUE, 0, bufsize, dst);
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
}
for (int i=0; i < bytes; ++i)
- if (dst[i] != 0x000000AB)
+ if (dst[i] != 0x000000AB)
{ cout << "Failed at Element " << i << endl; RETURN(-1); }
- cout << "Success!" << endl;
+ cout << "Success!" << endl;
RETURN(0);
}
index 4c8205affceed6d12f174e7136af1427a4f10ae5..41c47e896e3dd37036a9fbf8dc5941762a4d8d3e 100644 (file)
#endif
} // end try
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
diff --git a/examples/make.inc b/examples/make.inc
index 698d5c6eff591562ab1d8b94e216d193af14141e..b96590a459fcdee1e55074a06dd6f1c3263a066b 100644 (file)
--- a/examples/make.inc
+++ b/examples/make.inc
# Do not use /usr/include on host machine, it might have different gcc
# versions and could cause problems.
CXXFLAGS += "--sysroot=$(TARGET_ROOTDIR)"
- CXXFLAGS += -I$(TI_OCL_INSTALL)/usr/include
+ CXXFLAGS += -I$(TI_OCL_INSTALL)/usr/include -Wall
CXXFLAGS += -I$(TARGET_ROOTDIR)/usr/include
# If cross-compiling, provide path to dependent ARM libraries on the
# target native compilation
CXX = g++
- CXXFLAGS += -I$(TI_OCL_INSTALL)/usr/include
+ CXXFLAGS += -I$(TI_OCL_INSTALL)/usr/include -Wall
LDFLAGS += -L$(TI_OCL_INSTALL)/usr/lib
LIBS += -lbfd
diff --git a/examples/make_rtos.inc b/examples/make_rtos.inc
index c5bf164a6469710d432047f1223c77ffd1ce2250..6da716bb0a1f03281329e711f4c60730d2924bee 100644 (file)
--- a/examples/make_rtos.inc
+++ b/examples/make_rtos.inc
CXX_FLAGS += -Dfar= -D__DYNAMIC_REENT__ -D_TI_RTOS -fpermissive -std=c++11
CXX_FLAGS += -Wno-ignored-attributes
+CXX_FLAGS += -Wall
CFLAGS += -c -mcpu=cortex-a15 -mabi=aapcs -mapcs -mfpu=neon -mfloat-abi=hard -ffunction-sections -fdata-sections $(CCPROFILE_$(PROFILE)) @$(CONFIG)/compiler.opt -I"$(GCC_ARM_NONE_TOOLCHAIN)/arm-none-eabi/include/newlib-nano" -I. -I$(TI_OCL_INSTALL)/usr/include
LDFLAGS += $(LDPROFILE_$(PROFILE)) -mfloat-abi=hard -nostartfiles -static -Wl,--gc-sections
index 11a867bd11d9fa66b57a2c4b24e5fd8721dcafb3..6c47969518a4935475cab3340fa7200405f701c8 100644 (file)
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#define DIM 500
#define CTR_X -0.743644177934177585953534617147
#define CTR_Y 0.131826205602324997290253350002
-#define RANGE 3.0
+#define RANGE 3.0
#define ZOOM 1.25
/******************************************************************************
bool sdl_initialized = true;
SDL_Surface *data_sf;
SDL_Surface *screen;
- if (SDL_Init(SDL_INIT_VIDEO) >= 0 &&
+ if (SDL_Init(SDL_INIT_VIDEO) >= 0 &&
SDL_SetVideoMode(DIM, DIM, 24, SDL_HWSURFACE))
{
- data_sf = SDL_CreateRGBSurfaceFrom(rgb, DIM, DIM, 24, DIM * 3,
+ data_sf = SDL_CreateRGBSurfaceFrom(rgb, DIM, DIM, 24, DIM * 3,
0x000000ff, 0x0000ff00, 0x00ff0000, 0);
screen = SDL_GetVideoSurface();
std::string title("Mandelbrot");
SDL_WM_SetCaption(title.c_str(), NULL );
}
- else
+ else
sdl_initialized = false;
/*-------------------------------------------------------------------------
* Begin OpenCL Setup code in try block to handle any errors
*------------------------------------------------------------------------*/
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
int numDevices = devices.size();
std::vector<Event> ev(numDevices);
-
+
CommandQueue* Q[MAX_DEVICES];
Buffer buffer (context, CL_MEM_WRITE_ONLY, sizeof(rgb));
{
devices[d].getInfo(CL_DEVICE_NAME, &str);
cout << " " << str << endl;
-
+
Q[d] = new CommandQueue(context, devices[d]);
}
cout << endl;
istreambuf_iterator<char>());
Program::Sources source(1, make_pair(kSrc.c_str(), kSrc.length()));
Program program = Program(context, source);
- program.build(devices);
+ program.build(devices);
/*---------------------------------------------------------------------
* Setup the invariant arguments to the kernel
{
int bufChunk = sizeof(rgb) / numDevices;
- Q[c]->enqueueNDRangeKernel(kernel,
+ Q[c]->enqueueNDRangeKernel(kernel,
NDRange(0, c * DIM / numDevices), // offset
NDRange(DIM, DIM / numDevices), // global size
NDRange(DIM, 1)); // WG size
- Q[c]->enqueueReadBuffer(buffer, CL_FALSE,
- c * bufChunk, bufChunk,
+ Q[c]->enqueueReadBuffer(buffer, CL_FALSE,
+ c * bufChunk, bufChunk,
&rgb[c * bufChunk], NULL, &ev[c]);
}
clock_gettime(CLOCK_MONOTONIC, &tp_end);
double elapsed = clock_diff (&tp_start, &tp_end);
total_elapsed += elapsed;
- printf("Frame: %d, \tFPS: %5.2f, \tZoom: %.3g\n" , frame,
+ printf("Frame: %d, \tFPS: %5.2f, \tZoom: %.3g\n" , frame,
1.0/elapsed, RANGE/range);
/*------------------------------------------------------------------
* Display the image if SDL successfully initialized.
- * NOTE: Requires X server to display
+ * NOTE: Requires X server to display
*-----------------------------------------------------------------*/
if(sdl_initialized)
{
SDL_Event event;
SDL_PollEvent(&event);
if (event.type == SDL_QUIT) { SDL_Quit(); exit(0); }
-
+
if (SDL_BlitSurface(data_sf, NULL, screen, NULL) == 0)
SDL_UpdateRect(screen, 0, 0, 0, 0);
}
- }
+ }
printf("Total Time Generating frames: %8.4f secs\n", total_elapsed);
/*---------------------------------------------------------------------
* Cleanup OpenCL objects
*--------------------------------------------------------------------*/
- for (int d = 0; d < numDevices; d++) delete Q[d];
+ for (int d = 0; d < numDevices; d++) delete Q[d];
}
/*-------------------------------------------------------------------------
* Let exception handling deal with any OpenCL error cases
*------------------------------------------------------------------------*/
- catch (Error err)
+ catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
}
index 1a54ba1a4ea2f9cb2ff1296929f074e47e81464f..fe2de94e14956cbc441ee379131344c99dbe0146 100644 (file)
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
#define DIM 500
#define CTR_X -0.743644177934177585953534617147
#define CTR_Y 0.131826205602324997290253350002
-#define RANGE 3.0
+#define RANGE 3.0
#define ZOOM 1.25
/******************************************************************************
static double clock_diff (struct timespec *t1, struct timespec *t2)
{ return t2->tv_sec - t1->tv_sec + (t2->tv_nsec - t1->tv_nsec) / 1e9; }
-extern void mandelbrot_cpu(unsigned char *buf, int dim,
+extern void mandelbrot_cpu(unsigned char *buf, int dim,
double ctr_x, double ctr_y, double range, int max_iterations);
/******************************************************************************
SDL_Surface *screen;
if ( (SDL_Init(SDL_INIT_VIDEO) >= 0) &&
- (SDL_SetVideoMode(DIM, DIM, 24, SDL_HWSURFACE)))
+ (SDL_SetVideoMode(DIM, DIM, 24, SDL_HWSURFACE)))
{
sdl_initialized = true;
- data_sf = SDL_CreateRGBSurfaceFrom(rgb, DIM, DIM, 24, DIM * 3,
+ data_sf = SDL_CreateRGBSurfaceFrom(rgb, DIM, DIM, 24, DIM * 3,
0x000000ff, 0x0000ff00, 0x00ff0000, 0);
screen = SDL_GetVideoSurface();
std::string title("Mandelbrot Native");
double elapsed = clock_diff (&tp_start, &tp_end);
total_elapsed += elapsed;
- printf("Frame: %d, \tFPS: %5.2f, \tZoom: %.3g\n" , frame,
+ printf("Frame: %d, \tFPS: %5.2f, \tZoom: %.3g\n" , frame,
1.0/elapsed, RANGE/range);
/*------------------------------------------------------------------
*-----------------------------------------------------------------*/
if (sdl_initialized)
{
- SDL_Event event;
- SDL_PollEvent(&event);
- if (event.type == SDL_QUIT) { SDL_Quit(); exit(0); }
-
- if (SDL_BlitSurface(data_sf, NULL, screen, NULL) == 0)
- SDL_UpdateRect(screen, 0, 0, 0, 0);
+ SDL_Event event;
+ SDL_PollEvent(&event);
+ if (event.type == SDL_QUIT) { SDL_Quit(); exit(0); }
+
+ if (SDL_BlitSurface(data_sf, NULL, screen, NULL) == 0)
+ SDL_UpdateRect(screen, 0, 0, 0, 0);
}
- }
+ }
printf("Total Time Generating frames: %8.4f secs\n", total_elapsed);
}
index 7a207554bce8935f071d05a7b8270168d0eeae6b..58a1f2c7ebfb3c35a53bdd76943a4c3a92ce8c42 100644 (file)
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
index 5dce775730b8ab328eea386f270fb926d9a5b3b3..90f84254b7f8df9ca286fcd609c88427d235dab6 100644 (file)
--- a/examples/matmpy/main.cpp
+++ b/examples/matmpy/main.cpp
using std::endl;
#define DIM 256
-const int mat_N = DIM;
-const int mat_K = DIM;
-const int mat_M = DIM;
+const int mat_N = DIM;
+const int mat_K = DIM;
+const int mat_M = DIM;
#ifndef _TI_RTOS
float A [mat_N * mat_K];
static void print_mat(float *mat, int rows, int cols);
static void print_result(float *mat, float *gold, int rows, int cols);
static float dotprod(const float * A, const float * B, int n);
-static void cpu_mat_mpy(const float *A, const float *B, float *C,
+static void cpu_mat_mpy(const float *A, const float *B, float *C,
int N, int K, int M);
/******************************************************************************
for (int i=0; i < mat_K * mat_M; ++i) B[i] = rand() % 5 + 1;
for (int i=0; i < mat_N * mat_M; ++i) C[i] = 0.0;
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
Program program = Program(context, devices, binary);
program.build(devices);
#endif
-
+
Buffer bufB (context, CL_MEM_READ_ONLY, mat_size);
Buffer bufGold(context, CL_MEM_READ_ONLY, mat_size);
Kernel kernel (program, "ocl_matmpy");
use_msmc = 0;
break;
}
- std::vector<Buffer> bufA(nDev, Buffer(context,
+ std::vector<Buffer> bufA(nDev, Buffer(context,
CL_MEM_READ_ONLY|use_msmc, AChunk));
std::vector<Buffer> bufC(nDev, Buffer(context, CL_MEM_WRITE_ONLY, CChunk));
std::vector<Event> ev(nDev, Event());
clock_gettime(CLOCK_MONOTONIC, &tp_start);
for (int d = 0; d < nDev; d++)
{
- Q[d]->enqueueWriteBuffer(bufA[d], CL_FALSE, 0, AChunk,
+ Q[d]->enqueueWriteBuffer(bufA[d], CL_FALSE, 0, AChunk,
&A[d*AChunk/sizeof(float)]);
Q[d]->enqueueWriteBuffer(bufB, CL_FALSE, 0, mat_size, B);
*-------------------------------------------------------------------*/
kernel.setArg(0, bufA[d]);
kernel.setArg(2, bufC[d]);
- Q[d]->enqueueNDRangeKernel(kernel, NullRange, NDRange(mat_M/nDev),
+ Q[d]->enqueueNDRangeKernel(kernel, NullRange, NDRange(mat_M/nDev),
NDRange(1));
- Q[d]->enqueueReadBuffer(bufC[d], CL_FALSE, 0, CChunk,
+ Q[d]->enqueueReadBuffer(bufC[d], CL_FALSE, 0, CChunk,
&C[d*CChunk/sizeof(float)], NULL, &ev[d]);
}
- for (int d = 0; d < Q.size(); d++) ev[d].wait();
+ for (cl_uint d = 0; d < Q.size(); d++) ev[d].wait();
clock_gettime(CLOCK_MONOTONIC, &tp_end);
/*---------------------------------------------------------------------
* Cleanup OpenCL objects
*--------------------------------------------------------------------*/
- for (int d = 0; d < Q.size(); d++) delete Q[d];
+ for (cl_uint d = 0; d < Q.size(); d++) delete Q[d];
double elapsed = clock_diff (&tp_start, &tp_end);
printf("OpenCL dispatching to %d DSP(S): %6.4f secs\n", nDev, elapsed);
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
int x = i / mat_M;
int y = i % mat_M;
- std::cout << "Error at [" << x << "][" << y << "] : "
- << Golden[i] << " != "
+ std::cout << "Error at [" << x << "][" << y << "] : "
+ << Golden[i] << " != "
<< C[i] << std::endl;
RETURN(-1);
}
/******************************************************************************
* cpu_mat_mpy
******************************************************************************/
-void cpu_mat_mpy(const float * A, const float * B, float * C, int mat_N,
+void cpu_mat_mpy(const float * A, const float * B, float * C, int mat_N,
int mat_K, int mat_M)
{
#pragma omp parallel for
}
/******************************************************************************
-* clock_diff
+* clock_diff
******************************************************************************/
static double clock_diff (struct timespec *t1, struct timespec *t2)
{ return t2->tv_sec - t1->tv_sec + (t2->tv_nsec - t1->tv_nsec) / 1e9; }
index dc1ee78293c8cb2ebc2be36d9b2e66708be3ba3f..70ffc9ab4fc1c14f240794541fe5ceeff72f46eb 100644 (file)
#include "gaussRandom.h"
#include "utilityRoutines.h"
-#include "generateRandomGaissian.h"
+#include "generateRandomGaussian.h"
#define MAX_ELEMENTS (32*1024)
/*----------------------------------------------------------------------
* Let exception handling deal with any OpenCL error cases
*--------------------------------------------------------------------*/
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what ()
<< "(" << ocl_decode_error (err.err ()) << ")" << endl;
diff --git a/examples/monte_carlo/generateRandomGaissian.h b/examples/monte_carlo/generateRandomGaussian.h
similarity index 96%
rename from examples/monte_carlo/generateRandomGaissian.h
rename to examples/monte_carlo/generateRandomGaussian.h
index 5a54185d7451882572ab092264852622f94d1f18..4a6a068f1ad13de0c3c129daef159907a50abecd 100644 (file)
rename from examples/monte_carlo/generateRandomGaissian.h
rename to examples/monte_carlo/generateRandomGaussian.h
index 5a54185d7451882572ab092264852622f94d1f18..4a6a068f1ad13de0c3c129daef159907a50abecd 100644 (file)
-static void longPseudoRandom(struct initial_t value[],
- int index,
+static void longPseudoRandom(struct initial_t value[],
+ int index,
int N,
float *p_out )
{
unsigned long mulV ;
unsigned long addV ;
unsigned long l_l, l_h ;
- unsigned long long aux1 ,aux2 ;
+ unsigned long long aux1 ;
//unsigned long a1,a2,a3,a4 ;
float w, x1, x2, y1, y2 ;
x2 = x_aux + y_aux ;
x2 = 2.0 * x2 -1.0 ;
w = x1 * x1 + x2 * x2 ;
- if (w < 1)
+ if (w < 1)
{
counter-- ;
x_aux = -log(w) ;
y1 = x1 * b_aux ; //w ;
y2 = x2 * b_aux ; //w ;
// printf (" results %d %f %f %f \n", counter,x1,x2,w) ;
- //printf("\n %d -> w %f -log %f two_oneOver %f \n",
+ //printf("\n %d -> w %f -log %f two_oneOver %f \n",
//counter,w,x_aux,y_aux) ;
- //printf(" multiply %f sqrt %f y1 %f y2 %f \n",
+ //printf(" multiply %f sqrt %f y1 %f y2 %f \n",
//a_aux, b_aux, y1, y2 ) ;
*p_out++ = y1 ;
*p_out++ = y2 ;
index d017ac2ba2b98c14569650785d5bbdff58c35ede..329dadb1eb6ca1869afa6088eb430162541d97c6 100644 (file)
}
+#if 0
+/* Debug routines*/
+
static void
printLong (unsigned long x)
{
printf (" %x %e \n", (unsigned int) x, x);
}
-
-
static void
fprintLongLong (FILE * p, int *i, unsigned long long v)
{
*i = ii;
}
-
-
-
-
static void
write_X_Y_W_to_file (FILE * fp, float *scratch1, float *scratch2, int N)
{
}
}
+#endif
diff --git a/examples/null/main.cpp b/examples/null/main.cpp
index 6adecfe10c54e86d1c01550a56b65a58b41bffc2..86437caa3a9b810d0abfd24f94d3a22407b05af0 100644 (file)
--- a/examples/null/main.cpp
+++ b/examples/null/main.cpp
#endif
struct timespec t0, t1;
- try
+ try
{
Context context (CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
sizeof(kernel_dsp_bin)));
Program program = Program(context, devices, binary);
#endif
- program.build(devices);
+ program.build(devices);
Kernel kernel(program, "Null");
KernelFunctor null = kernel.bind(Q, NDRange(1), NDRange(1));
ocl_event_times(ev, "Null Kernel Exec");
}
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
}
- cout << "Done!" << endl;
+ cout << "Done!" << endl;
RETURN(0);
}
index 28c483ede6d4baa0a888faee34d904c247cd3d55..d1d06e09e34cc12b1d8f375d059d673807ecf0ee 100644 (file)
int main(int argc, char *argv[])
{
- cl_int err = CL_SUCCESS;
int bufsize = sizeof(srcA);
- for (int i=0; i < NumElements; ++i)
- {
- srcA[i] = srcB[i] = i<<2;
- Golden[i] = srcB[i] + srcA[i];
+ for (int i=0; i < NumElements; ++i)
+ {
+ srcA[i] = srcB[i] = i<<2;
+ Golden[i] = srcB[i] + srcA[i];
dst[i] = 0;
}
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
Q.enqueueWriteBuffer(bufA, CL_FALSE, 0, bufsize, srcA, NULL, &ev1);
Q.enqueueWriteBuffer(bufB, CL_FALSE, 0, bufsize, srcB, NULL, &ev2);
- Q.enqueueNDRangeKernel(kernel, NullRange, NDRange(NumVecElements),
+ Q.enqueueNDRangeKernel(kernel, NullRange, NDRange(NumVecElements),
NDRange(WorkGroupSize), NULL, &ev3);
Q.enqueueReadBuffer (bufDst, CL_TRUE, 0, bufsize, dst, NULL, &ev4);
ocl_event_times(ev3, "Kernel Exec");
ocl_event_times(ev4, "Read BufDst");
}
- catch (Error err)
+ catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
for (int i=0; i < NumElements; ++i)
- if (Golden[i] != dst[i])
+ if (Golden[i] != dst[i])
{ cout << "Failed at Element " << i << endl; return -1; }
- cout << "Success!" << endl;
+ cout << "Success!" << endl;
}
index 5c00907b6726150fc0ca3b54cbb03eb7033bf712..7c5d97c7e3d90d2787598ef3cce4b7a3dc3a9c22 100644 (file)
cl_short *Golden = (cl_short*) __malloc_ddr(bufsize);
#endif
- for (int i=0; i < NumElements; ++i)
- {
- srcA[i] = srcB[i] = i<<2;
- Golden[i] = srcB[i] + srcA[i];
+ for (int i=0; i < NumElements; ++i)
+ {
+ srcA[i] = srcB[i] = i<<2;
+ Golden[i] = srcB[i] + srcA[i];
dst[i] = 0;
}
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
Q.enqueueWriteBuffer(bufA, CL_FALSE, 0, bufsize, srcA, NULL, &ev1);
Q.enqueueWriteBuffer(bufB, CL_FALSE, 0, bufsize, srcB, NULL, &ev2);
- Q.enqueueNDRangeKernel(kernel, NullRange, NDRange(NumVecElements),
+ Q.enqueueNDRangeKernel(kernel, NullRange, NDRange(NumVecElements),
NDRange(WorkGroupSize), NULL, &ev3);
Q.enqueueReadBuffer (bufDst, CL_TRUE, 0, bufsize, dst, NULL, &ev4);
ocl_event_times(ev3, "Kernel Exec");
ocl_event_times(ev4, "Read BufDst");
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
}
for (int i=0; i < NumElements; ++i)
- if (Golden[i] != dst[i])
+ if (Golden[i] != dst[i])
{ cout << "Failed at Element " << i << endl; RETURN(-1); }
#ifdef _TI_RTOS
__free_ddr(Golden);
#endif
- cout << "Success!" << endl;
+ cout << "Success!" << endl;
RETURN(0);
}
diff --git a/examples/ooo/ooo.cpp b/examples/ooo/ooo.cpp
index 3f46d12121583c8357daff03151e1b38cafc0651..63c65da8f5904bb35f1ca498d85d6a7c935edeb4 100644 (file)
--- a/examples/ooo/ooo.cpp
+++ b/examples/ooo/ooo.cpp
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
const char *stage_names[] = {"PRODUCE","WRITE ","COMPUTE","READ ","CONSUME"};
-const char * kernStr =
+const char * kernStr =
"kernel void compute(global int* buf, int size) \n"
"{\n"
" for (int i = 0; i< size; ++i) \n"
void cpu_produce(void *args)
{
arguments_t *p = (arguments_t *)args;
- for (int i = 0; i < p->elms; ++i)
+ for (cl_uint i = 0; i < p->elms; ++i)
p->ptr[i] = p->val;
}
void cpu_consume(void *args)
{
arguments_t *p = (arguments_t *)args;
- int i;
+ cl_uint i;
- for (i = 0; i < p->elms; ++i)
- if (p->ptr[i] != p->val)
+ for (i = 0; i < p->elms; ++i)
+ if (p->ptr[i] != p->val)
{
std::cout << "Iteration " << p->iter << ": "
- << p->ptr[i] << " != " << p->val
+ << p->ptr[i] << " != " << p->val
<< std::endl << std::endl;
incorrect_results = true;
break;
struct timespec tp_start, tp_end;
- try
+ try
{
/*------------------------------------------------------------------------
* One time OpenCL Setup
*-----------------------------------------------------------------------*/
- Context context(CL_DEVICE_TYPE_ALL);
+ Context context(CL_DEVICE_TYPE_ALL);
std::vector<Device> devices(context.getInfo<CL_CONTEXT_DEVICES>());
CommandQueue *QcpuIO = NULL;
CommandQueue *QdspOO = NULL;
std::vector<Device> dspDevices;
- for (int d = 0; d < devices.size(); d++)
+ for (cl_uint d = 0; d < devices.size(); d++)
{
- cl_device_type type;
- devices[d].getInfo(CL_DEVICE_TYPE, &type);
-
- if (type & CL_DEVICE_TYPE_CPU)
- {
- QcpuIO = new CommandQueue(context, devices[d], PROFILE);
- QcpuOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
- }
- else if (type & CL_DEVICE_TYPE_ACCELERATOR)
+ cl_device_type type;
+ devices[d].getInfo(CL_DEVICE_TYPE, &type);
+
+ if (type & CL_DEVICE_TYPE_CPU)
+ {
+ QcpuIO = new CommandQueue(context, devices[d], PROFILE);
+ QcpuOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
+ }
+ else if (type & CL_DEVICE_TYPE_ACCELERATOR)
{
- QdspOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
- dspDevices.push_back(devices[d]);
+ QdspOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
+ dspDevices.push_back(devices[d]);
}
}
if (QcpuIO == NULL)
{
- std::cout <<
- "CPU devices are not fully supported in the current" << std::endl <<
- "OpenCL implementation (native kernel support only)." << std::endl <<
- "As a result, CPU devices are not enabled by" << std::endl <<
- "default. This example uses OpenCL CPU native" << std::endl <<
- "kernels and can be run with the CPU device enabled." << std::endl <<
+ std::cout <<
+ "CPU devices are not fully supported in the current" << std::endl <<
+ "OpenCL implementation (native kernel support only)." << std::endl <<
+ "As a result, CPU devices are not enabled by" << std::endl <<
+ "default. This example uses OpenCL CPU native" << std::endl <<
+ "kernels and can be run with the CPU device enabled." << std::endl <<
"To enable a CPU device define the environment variable" << std::endl <<
"'TI_OCL_CPU_DEVICE_ENABLE' before running the example." << std::endl;
- exit(-1);
+ exit(-1);
}
assert(QdspOO != NULL);
* Define a Buffer for each possible in flight task
*-----------------------------------------------------------------------*/
std::vector<BufUP> bufs;
- for (int i = 0; i < inflight; ++i)
+ for (int i = 0; i < inflight; ++i)
bufs.push_back(BufUP(new Buffer(context, CL_MEM_READ_WRITE, size)));
/*------------------------------------------------------------------------
- * Define a 3-D vector of OpenCL Events. 1st dim is for the number of
+ * Define a 3-D vector of OpenCL Events. 1st dim is for the number of
* in flight tasks, the second dim is for the processing stages of a single
- * task. The 3rd dim is an artifact of the c++ binding for event wait
+ * task. The 3rd dim is an artifact of the c++ binding for event wait
* lists. All enqueue API's take a wait list which is a vector<Event>*, and
* they take an Event*. All events in the wait list vector must complete,
- * before this event will execute. The single event argument is for the
- * event that will be set as a result of this enqueue.
+ * before this event will execute. The single event argument is for the
+ * event that will be set as a result of this enqueue.
*-----------------------------------------------------------------------*/
vecVecVecEv evt(inflight, vecVecEv(STAGES, vecEv(1)));
/*------------------------------------------------------------------------
* Enqueue a dummy DSP kernel call to force the OpenCL lazy execution
- * to go ahead and compile the kernel and load it. This will prevent the
- * compile and load times from skewing the reported numbers. This is not
+ * to go ahead and compile the kernel and load it. This will prevent the
+ * compile and load times from skewing the reported numbers. This is not
* needed by the algorithm and is purely a tactic to get consistent numbers
* from the the running of the bulk of this algorithm
*-----------------------------------------------------------------------*/
/*---------------------------------------------------------------------
* Native kernels are only passed a single pointer, so define a structure
- * that contains the actual arguments, populate that and then create
+ * that contains the actual arguments, populate that and then create
* a C++ binding native argument class that has the pointer and a size.
*--------------------------------------------------------------------*/
arguments_t proArgs = { ary, elements, i, i };
/*---------------------------------------------------------------------
* Since we are reusing N sets of buffers in this loop, we need to make
- * sure than iteration I does not start until after iteration I-N
+ * sure than iteration I does not start until after iteration I-N
* completes. Iterations < N can start immediately.
*--------------------------------------------------------------------*/
vecEv *start_waits = (i < inflight) ? 0 : &evt[circIdx][CNS];
evt[circIdx][CMP][0] = nullEv;
evt[circIdx][RD ][0] = nullEv;
- QcpuOO->enqueueNativeKernel(cpu_produce, proNargs, 0, 0,
+ QcpuOO->enqueueNativeKernel(cpu_produce, proNargs, 0, 0,
start_waits, &evt[circIdx][PRD][0]);
evt[circIdx][CNS][0] = nullEv;
- QdspOO->enqueueWriteBuffer (buf, CL_FALSE, 0, size, ary,
+ QdspOO->enqueueWriteBuffer (buf, CL_FALSE, 0, size, ary,
&evt[circIdx][PRD], &evt[circIdx][WRT][0]);
- QdspOO->enqueueTask (K,
+ QdspOO->enqueueTask (K,
&evt[circIdx][WRT], &evt[circIdx][CMP][0]);
- QdspOO->enqueueReadBuffer (buf, CL_FALSE, 0, size, ary,
+ QdspOO->enqueueReadBuffer (buf, CL_FALSE, 0, size, ary,
&evt[circIdx][CMP], &evt[circIdx][RD ][0]);
- QcpuIO->enqueueNativeKernel(cpu_consume, conNargs, 0, 0,
+ QcpuIO->enqueueNativeKernel(cpu_consume, conNargs, 0, 0,
&evt[circIdx][RD ], &evt[circIdx][CNS][0]);
}
/*------------------------------------------------------------------------
* After the running is complete, report timing for each step
*-----------------------------------------------------------------------*/
-#if PROFILE
+#if PROFILE
cl_ulong ref;
evt[0][0][0].getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &ref);
#endif
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "("
<< ocl_decode_error(err.err()) << ")"
index 695a3c2ad63f16bf0f56c0888002afd613e66e56..715b3f39c8291c3d6e09e3af9caa48386ffd98de 100644 (file)
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
void cpu_produce(void *args)
{
arguments_t *p = (arguments_t *)args;
- for (int i = 0; i < p->elms; ++i)
+ for (cl_uint i = 0; i < p->elms; ++i)
p->ptr[i] = p->val;
}
void cpu_consume(void *args)
{
arguments_t *p = (arguments_t *)args;
- int i;
- for (i = 0; i < p->elms; ++i)
- if (p->ptr[i] != p->val)
+ for (cl_uint i = 0; i < p->elms; ++i)
+ if (p->ptr[i] != p->val)
{
std::cout << "Iteration " << p->iter << ": "
- << p->ptr[i] << " != " << p->val
+ << p->ptr[i] << " != " << p->val
<< std::endl << std::endl;
incorrect_results = true;
break;
struct timespec tp_start, tp_end;
- try
+ try
{
/*------------------------------------------------------------------------
* One time OpenCL Setup
*-----------------------------------------------------------------------*/
- Context context(CL_DEVICE_TYPE_ACCELERATOR);
+ Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices(context.getInfo<CL_CONTEXT_DEVICES>());
CommandQueue *QdspIO = NULL;
CommandQueue *QdspOO = NULL;
std::vector<Device> dspDevices;
- for (int d = 0; d < devices.size(); d++)
+ for (cl_uint d = 0; d < devices.size(); d++)
{
cl_device_type type;
devices[d].getInfo(CL_DEVICE_TYPE, &type);
* Define a Buffer for each possible in flight task
*-----------------------------------------------------------------------*/
std::vector<BufUP> bufs;
- for (int i = 0; i < inflight; ++i)
+ for (int i = 0; i < inflight; ++i)
bufs.push_back(BufUP(new Buffer(context, CL_MEM_READ_WRITE, size)));
/*------------------------------------------------------------------------
- * Define a 3-D vector of OpenCL Events. 1st dim is for the number of
+ * Define a 3-D vector of OpenCL Events. 1st dim is for the number of
* in flight tasks, the second dim is for the processing stages of a single
- * task. The 3rd dim is an artifact of the c++ binding for event wait
+ * task. The 3rd dim is an artifact of the c++ binding for event wait
* lists. All enqueue API's take a wait list which is a vector<Event>*, and
* they take an Event*. All events in the wait list vector must complete,
- * before this event will execute. The single event argument is for the
- * event that will be set as a result of this enqueue.
+ * before this event will execute. The single event argument is for the
+ * event that will be set as a result of this enqueue.
*-----------------------------------------------------------------------*/
vecVecVecEv evt(inflight, vecVecEv(STAGES, vecEv(1)));
/*------------------------------------------------------------------------
* Enqueue a dummy DSP kernel call to force the OpenCL lazy execution
- * to go ahead and compile the kernel and load it. This will prevent the
- * compile and load times from skewing the reported numbers. This is not
+ * to go ahead and compile the kernel and load it. This will prevent the
+ * compile and load times from skewing the reported numbers. This is not
* needed by the algorithm and is purely a tactic to get consistent numbers
* from the the running of the bulk of this algorithm
*-----------------------------------------------------------------------*/
/*---------------------------------------------------------------------
* Since we are reusing N sets of buffers in this loop, we need to make
- * sure than iteration I does not start until after iteration I-N
+ * sure than iteration I does not start until after iteration I-N
* completes. Iterations < N can start immediately.
*--------------------------------------------------------------------*/
vecEv *start_waits = (i < inflight) ? 0 : &evt[circIdx][CNS];
}
evt[circIdx][WRT][0] = nullEv;
- QdspOO->enqueueWriteBuffer(buf, CL_FALSE, 0, size, ary,
+ QdspOO->enqueueWriteBuffer(buf, CL_FALSE, 0, size, ary,
&evt[circIdx][PRD], &evt[circIdx][WRT][0]);
evt[circIdx][CMP][0] = nullEv;
- QdspOO->enqueueTask (K,
+ QdspOO->enqueueTask (K,
&evt[circIdx][WRT], &evt[circIdx][CMP][0]);
evt[circIdx][RD ][0] = nullEv;
- QdspIO->enqueueReadBuffer (buf, CL_FALSE, 0, size, ary,
+ QdspIO->enqueueReadBuffer (buf, CL_FALSE, 0, size, ary,
&evt[circIdx][CMP], &evt[circIdx][RD ][0]);
evt[circIdx][RD ][0].setCallback(CL_COMPLETE, cpu_consume_callback,
conarg);
/*------------------------------------------------------------------------
* After the running is complete, report timing for each step
*-----------------------------------------------------------------------*/
-#if PROFILE
+#if PROFILE
cl_ulong ref;
evt[0][0][0].getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &ref);
}
#endif
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
index 3cfd8a243bb93b93af1a5528e27199965383c465..ad520938813e21ee561d729ffb20b0f6119d4f28 100644 (file)
--- a/examples/ooo_map/ooo.cpp
+++ b/examples/ooo_map/ooo.cpp
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
typedef std::pair<void*, ::size_t> native_arg_t;
typedef std::unique_ptr<Buffer> BufUP;
typedef enum { WMP, PRD, WUM, CMP, RMP, CNS, RUM, STAGES } stage;
-const char *stage_names[] = {"MAP4WRT ", "PRODUCE ", "UNMAP4WRT ",
- "COMPUTE ", "MAP4READ ", "CONSUME ",
+const char *stage_names[] = {"MAP4WRT ", "PRODUCE ", "UNMAP4WRT ",
+ "COMPUTE ", "MAP4READ ", "CONSUME ",
"UNMAP4READ"};
typedef struct { int *ptr; unsigned elms; int val; int iter; } arguments_t;
void cpu_produce(void *args)
{
arguments_t *p = (arguments_t *)args;
- for (int i = 0; i < p->elms; ++i)
+ for (cl_uint i = 0; i < p->elms; ++i)
p->ptr[i] = p->val;
}
void cpu_consume(void *args)
{
arguments_t *p = (arguments_t *)args;
- int i;
- for (i = 0; i < p->elms; ++i)
- if (p->ptr[i] != p->val)
+ for (cl_uint i = 0; i < p->elms; ++i)
+ if (p->ptr[i] != p->val)
{
std::cout << "Iteration " << p->iter << ": Element: " << i << " : "
- << p->ptr[i] << " != " << p->val
+ << p->ptr[i] << " != " << p->val
<< std::endl << std::endl;
incorrect_results = true;
break;
struct timespec tp_start, tp_end;
- try
+ try
{
/*------------------------------------------------------------------------
* One time OpenCL Setup
*-----------------------------------------------------------------------*/
- Context context(CL_DEVICE_TYPE_ALL);
+ Context context(CL_DEVICE_TYPE_ALL);
std::vector<Device> devices(context.getInfo<CL_CONTEXT_DEVICES>());
-
+
CommandQueue *QcpuIO = NULL;
CommandQueue *QcpuOO = NULL;
CommandQueue *QdspOO = NULL;
std::vector<Device> dspDevices;
- for (int d = 0; d < devices.size(); d++)
+ for (cl_uint d = 0; d < devices.size(); d++)
{
- cl_device_type type;
- devices[d].getInfo(CL_DEVICE_TYPE, &type);
-
- if (type & CL_DEVICE_TYPE_CPU)
- {
- QcpuIO = new CommandQueue(context, devices[d], PROFILE);
- QcpuOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
- }
- else if (type & CL_DEVICE_TYPE_ACCELERATOR)
- {
- QdspOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
- dspDevices.push_back(devices[d]);
- }
+ cl_device_type type;
+ devices[d].getInfo(CL_DEVICE_TYPE, &type);
+
+ if (type & CL_DEVICE_TYPE_CPU)
+ {
+ QcpuIO = new CommandQueue(context, devices[d], PROFILE);
+ QcpuOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
+ }
+ else if (type & CL_DEVICE_TYPE_ACCELERATOR)
+ {
+ QdspOO = new CommandQueue(context, devices[d], PROFILE|OOOEXEC);
+ dspDevices.push_back(devices[d]);
+ }
}
if (QcpuIO == NULL)
{
- std::cout <<
+ std::cout <<
"CPU devices are not fully supported in the current" << std::endl <<
- "OpenCL implementation (native kernel support only)." << std::endl <<
+ "OpenCL implementation (native kernel support only)." << std::endl <<
"As a result, CPU devices are not enabled by " << std::endl <<
"default. This example uses OpenCL CPU native" << std::endl <<
- "kernels and can be run with the CPU device enabled." << std::endl <<
+ "kernels and can be run with the CPU device enabled." << std::endl <<
"To enable a CPU device define the environment variable" << std::endl <<
"'TI_OCL_CPU_DEVICE_ENABLE' before running the example." << std::endl;
exit(-1);
* Define a Buffer for each possible in flight task
*-----------------------------------------------------------------------*/
std::vector<BufUP> bufs;
- for (int i = 0; i < inflight; ++i)
+ for (int i = 0; i < inflight; ++i)
bufs.push_back(BufUP(new Buffer(context, CL_MEM_READ_WRITE, size)));
/*------------------------------------------------------------------------
- * Define a 3-D vector of OpenCL Events. 1st dim is for the number of
+ * Define a 3-D vector of OpenCL Events. 1st dim is for the number of
* in flight tasks, the second dim is for the processing stages of a single
- * task. The 3rd dim is an artifact of the c++ binding for event wait
+ * task. The 3rd dim is an artifact of the c++ binding for event wait
* lists. All enqueue API's take a wait list which is a vector<Event>*, and
* they take an Event*. All events in the wait list vector must complete,
- * before this event will execute. The single event argument is for the
- * event that will be set as a result of this enqueue.
+ * before this event will execute. The single event argument is for the
+ * event that will be set as a result of this enqueue.
*-----------------------------------------------------------------------*/
vecVecVecEv evt(inflight, vecVecEv(STAGES, vecEv(1)));
*--------------------------------------------------------------------*/
int circIdx = i % inflight;
Buffer &buf(*bufs[circIdx]);
- int *ary(arys [circIdx]);
Event nullEv;
K.setArg(0, buf);
/*---------------------------------------------------------------------
* Since we are reusing N sets of buffers in this loop, we need to make
- * sure than iteration I does not start until after iteration I-N
+ * sure than iteration I does not start until after iteration I-N
* completes. Iterations < N can start immediately.
*--------------------------------------------------------------------*/
int eIdx = circIdx;
evt[circIdx][RMP][0] = nullEv;
evt[circIdx][CNS][0] = nullEv;
- int *p = (int*)QdspOO->enqueueMapBuffer(buf, CL_FALSE, CL_MAP_WRITE,
+ int *p = (int*)QdspOO->enqueueMapBuffer(buf, CL_FALSE, CL_MAP_WRITE,
0, size, start_waits, &evt[eIdx][WMP][0]);
evt[circIdx][RUM][0] = nullEv;
/*---------------------------------------------------------------------
* Native kernels are only passed a single pointer, so define a structure
- * that contains the actual arguments, populate that and then create
+ * that contains the actual arguments, populate that and then create
* a C++ binding native argument class that has the pointer and a size.
*--------------------------------------------------------------------*/
arguments_t proArgs = { p, elements, i, i };
native_arg_t proNargs(&proArgs, sizeof(proArgs));
- QcpuOO->enqueueNativeKernel(cpu_produce, proNargs, 0, 0,
+ QcpuOO->enqueueNativeKernel(cpu_produce, proNargs, 0, 0,
&evt[eIdx][WMP], &evt[eIdx][PRD][0]);
- QdspOO->enqueueUnmapMemObject(buf, p,
+ QdspOO->enqueueUnmapMemObject(buf, p,
&evt[eIdx][PRD], &evt[eIdx][WUM][0]);
- QdspOO->enqueueTask(K,
+ QdspOO->enqueueTask(K,
&evt[eIdx][WUM], &evt[eIdx][CMP][0]);
p = (int*)QdspOO->enqueueMapBuffer(buf, CL_FALSE, CL_MAP_READ, 0, size,
arguments_t conArgs = { p, elements, i+1, i };
native_arg_t conNargs(&conArgs, sizeof(conArgs));
- QcpuIO->enqueueNativeKernel (cpu_consume, conNargs, 0, 0,
+ QcpuIO->enqueueNativeKernel (cpu_consume, conNargs, 0, 0,
&evt[eIdx][RMP], &evt[eIdx][CNS][0]);
- QdspOO->enqueueUnmapMemObject (buf, p,
+ QdspOO->enqueueUnmapMemObject (buf, p,
&evt[eIdx][CNS], &evt[eIdx][RUM][0]);
}
/*------------------------------------------------------------------------
* After the running is complete, report timing for each step
*-----------------------------------------------------------------------*/
-#if PROFILE
+#if PROFILE
cl_ulong ref;
evt[0][0][0].getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &ref);
#endif
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "("
<< ocl_decode_error(err.err()) << ")"
index b508df4ad60e6dd357d66fd0e607d2c122612e2f..93760fea9b9585c385c7fe28d5a71c255f92cf17 100644 (file)
std::vector<Platform> platforms;
Platform::get(&platforms);
- for (int p = 0; p < platforms.size(); p++)
+ for (unsigned int p = 0; p < platforms.size(); p++)
{
std::string str;
/*-------------------------------------------------------------------------
* Let exception handling deal with any OpenCL error cases
*------------------------------------------------------------------------*/
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
std::vector<Device> devices= context.getInfo<CL_CONTEXT_DEVICES>();
- for (int d = 0; d < devices.size(); d++)
+ for (unsigned int d = 0; d < devices.size(); d++)
{
devices[d].getInfo(CL_DEVICE_NAME, &str);
cout << " DEVICE: " << str << endl;
index dfac651649fd3c3e88821709cb0dccb9c3c87cd0..86a9fbd4d93f7146d9f5bdc7e3a974ea11f343ea 100644 (file)
--- a/examples/sgemm/main.cpp
+++ b/examples/sgemm/main.cpp
/* ---------------------------------------------------------------- */
HandleOptions(argc,argv);
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
if (NUMCOMPUNITS == 0) RETURN(-1);
int VALRANGE = 17;
- if (random_in)
+ if (random_in)
{
srand(time(NULL));
alpha = (float) (rand() % VALRANGE + 1);
K*N*sizeof(float));
float *C = (float*) Q.enqueueMapBuffer(bufC, CL_TRUE, CL_MAP_WRITE, 0,
M*N*sizeof(float));
- float *gold;
+ float *gold = nullptr;
cout << "Generating Input Data ..." << flush;
for (int i = 0; i < M*K; ++i)
- A[i] = random_in ? (float)(rand() % VALRANGE + 1) : 1 + (i & 7);
+ A[i] = random_in ? (float)(rand() % VALRANGE + 1) : 1 + (i & 7);
for (int i = 0; i < K*N; ++i)
- B[i] = random_in ? (float)(rand() % VALRANGE + 1) : 1 + (i & 11);
+ B[i] = random_in ? (float)(rand() % VALRANGE + 1) : 1 + (i & 11);
for (int i = 0; i < M*N; ++i)
C[i] = random_in ? (float)(rand() % VALRANGE + 1) : 1 + (i & 5);
cout << "Complete" << endl;
* C'[nxm] = B'[nxk] * A'[kxm],
* where ptrC' === ptrC, ptrA' === ptrA, ptrB' === ptrB
* So, all we need to do is to: swap(m, n), swap(a, b)
- * ld_Transpose(a)_col = lda_row = k,
- * ld_Transpose(b)_col = ldb_row = n,
- * ld_Transpose(c)_col = ldc_row = n,
+ * ld_Transpose(a)_col = lda_row = k,
+ * ld_Transpose(b)_col = ldb_row = n,
+ * ld_Transpose(c)_col = ldc_row = n,
*---------------------------------------------------------------------*/
if (order == CblasRowMajor)
matmul(N, M, K, alpha, bufB, N, bufA, K, beta, bufC, N,
if (bufMsmc != NULL) delete bufMsmc;
/*----------------------------------------------------------------------
- * If checking results against a host matmul.
+ * If checking results against a host matmul.
* This can be time consuming for large sizes.
*---------------------------------------------------------------------*/
C = (float*) Q.enqueueMapBuffer(bufC, CL_TRUE, CL_MAP_READ, 0,
gflops, dsp_elapsed);
Q.enqueueUnmapMemObject(bufA, A);
Q.enqueueUnmapMemObject(bufB, B);
- PrintMatrix(gold,M,N,order);
+ PrintMatrix(gold,M,N,order);
errs = CheckForErrors(C, gold, M, N, K, order);
#ifndef _TI_RTOS
free(gold);
#endif
}
- PrintMatrix(C,M,N,order);
+ PrintMatrix(C,M,N,order);
Q.enqueueUnmapMemObject(bufC, C); Q.finish();
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
void PrintUsageAndExit()
{
cout << "Matrix C[M,N] = A[M,K] * B [K,N]" << endl
- << "Default value of M,N,K is " << M << endl
+ << "Default value of M,N,K is " << M << endl
<< "Usage: sgemm [options] " << endl
<< "Options: " << endl
<< "-M arg : Number of rows for array C and A" << endl
<< "-d : Do not check results against host computation" << endl
<< "-r : Generate random inputs" << endl
<< "-or : Use Row-Major storage (default is Col-Major)" << endl
- << "-h : Show help message"
+ << "-h : Show help message"
<< endl;
exit(0);
}
int c;
if (argc == 1) return;
-
+
while ((c = getopt (argc, argv, "o:M:K:N:hdrx")) != -1)
switch(c)
{
if (!M && !N && !K)
{
M = N = K = roundDownPower2(sqrt(global_mem / 3 / sizeof(float)));
-#ifdef _TI_RTOS
if (M >= 2048) M = N = K = 2048;
-#endif
}
NUMAPANELS = L2_BUF_SIZE / 2 / APanelSz;
if (calc_check)
{
- cout << "M,N,K = " << M << ", "
+ cout << "M,N,K = " << M << ", "
<< N << ", "
<< K << endl;
cout << "MSMC_BUF_SIZE = " << MSMC_BUF_SIZE << endl;
index b1a30479899f42e1335ff10f47637d5a9643a93c..d672ce9d2785b7a62f819500d24171e7192c0e65 100644 (file)
Q.enqueueReadBuffer(buf, CL_TRUE, 0, size, ary);
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
index ef9f43179641682bf46685734af3cf9a4a1c9a02..7d16fb249b4310e2688220b093471d230f7a0ba5 100644 (file)
delete tIOQ;
delete tOOQ;
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
exit(-1);
}
}
- catch (Error err)
+ catch (Error& err)
{
cl_int status;
ev.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS, &status);
ev = k(eargs, buf, size, timeout_flag);
ev.setCallback(CL_COMPLETE, ev_complete_func, data);
}
- catch (Error err)
+ catch (Error& err)
{
cl_int status;
ev.getInfo(CL_EVENT_COMMAND_EXECUTION_STATUS, &status);
index 6837ada656143740b72eca5d48618b564f353c07..5ca30769a62510fdd73ab80066f9ae3dcfbb31a0 100644 (file)
--- a/examples/vecadd/main.cpp
+++ b/examples/vecadd/main.cpp
int main(int argc, char *argv[])
{
#endif
- cl_int err = CL_SUCCESS;
int bufsize = sizeof(cl_short) * NumElements;
cl_short *srcA = (cl_short *)__malloc_ddr(bufsize);
__free_ddr(srcA);
__free_ddr(srcB);
}
- catch (Error err)
+ catch (Error& err)
{
cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
<< ocl_decode_error(err.err()) << ")" << endl;
index 27ee1a6c511e3969827a86eb793615c119f228b3..496b7aaed13a5e838d5f6af96b9ae554d091f6df 100644 (file)
int main(int argc, char *argv[])
{
- cl_int err = CL_SUCCESS;
int bufsize = NumElements * sizeof(cl_short);
int num_errors = 0;
int d = 0;
int start_time, end_time;
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
int num_devices = devices.size();
- if (NumVecElements % num_devices != 0 ||
+ if (NumVecElements % num_devices != 0 ||
(NumVecElements / num_devices) % WorkGroupSize != 0)
{
cerr << "ERROR: Cannot evenly distribute data across devices!" << endl;
Program::Sources source(1, std::make_pair(kernelStr,strlen(kernelStr)));
Program program = Program(context, source);
- program.build(devices);
+ program.build(devices);
Kernel kernel(program, "VectorAdd");
for (d = 0; d < num_devices; ++d)
/* Method 1: Use ReadBuffer/WriteBuffer APIs */
{
- cout << "=== Method 1: Using ReadBuffer/WriteBuffer APIs ===" << endl;
+ cout << "=== Method 1: Using ReadBuffer/WriteBuffer APIs ===" << endl;
cl_short *srcA = (cl_short *) malloc(bufsize);
cl_short *srcB = (cl_short *) malloc(bufsize);
cl_short *dst = (cl_short *) malloc(bufsize);
}
start_time = gettime_ms();
- for (int i=0; i < NumElements; ++i)
- {
- srcA[i] = srcB[i] = i<<2;
- Golden[i] = srcB[i] + srcA[i];
+ for (int i=0; i < NumElements; ++i)
+ {
+ srcA[i] = srcB[i] = i<<2;
+ Golden[i] = srcB[i] + srcA[i];
dst[i] = 0;
}
for (d = 0; d < num_devices; ++d)
ev4s[d]->wait();
for (int i=0; i < NumElements; ++i)
- if (Golden[i] != dst[i])
- {
+ if (Golden[i] != dst[i])
+ {
num_errors += 1;
if (num_errors < 10)
- cout << "Failed at Element " << i << ": "
- << Golden[i] << " != " << dst[i] << endl;
+ cout << "Failed at Element " << i << ": "
+ << Golden[i] << " != " << dst[i] << endl;
}
end_time = gettime_ms();
cout << "Method 1: " << end_time - start_time << " micro seconds" << endl;
ocl_event_times(*ev4s[d], "Read BufDst");
}
if (num_errors) cout << "Fail with " << num_errors << " errors!" << endl;
- else cout << "Success!" << endl;
+ else cout << "Success!" << endl;
free(srcA);
free(srcB);
/* Method 2: Use MapBuffer/UnmapMemObject APIs */
{
- cout << "\n\n=== Method 2: Using MapBuffer/UnmapBuffer APIs ===" << endl;
+ cout << "\n\n=== Method 2: Using MapBuffer/UnmapBuffer APIs ===" << endl;
cl_short *h_bufA, *h_bufB;
cl_short *mGolden = (cl_short *) malloc(bufsize);
cl_short **h_dst = (cl_short **) malloc(num_devices * sizeof(cl_short *));
cout << "Unable to allocate memory for data! (MapBuffer API)" << endl;
exit(-1);
}
-
+
start_time = gettime_ms();
std::vector<Buffer*> mbufAs, mbufBs, mbufDs;
std::vector<CommandQueue*> mQs;
}
for (d = 0; d < num_devices; ++d)
{
- cl_short *h_bufA = (cl_short *) mQs[d]->enqueueMapBuffer(*mbufAs[d],
- CL_FALSE, CL_MAP_WRITE, 0, d_bufsize, NULL, mev1s[d]);
- cl_short *h_bufB = (cl_short *) mQs[d]->enqueueMapBuffer(*mbufBs[d],
- CL_FALSE, CL_MAP_WRITE, 0, d_bufsize, NULL, mev2s[d]);
+ h_bufA = (cl_short *) mQs[d]->enqueueMapBuffer(*mbufAs[d],
+ CL_FALSE, CL_MAP_WRITE, 0, d_bufsize, NULL, mev1s[d]);
+ h_bufB = (cl_short *) mQs[d]->enqueueMapBuffer(*mbufBs[d],
+ CL_FALSE, CL_MAP_WRITE, 0, d_bufsize, NULL, mev2s[d]);
mev1s[d]->wait();
for (int i = 0; i < d_Elements; i++)
h_bufA[i] = (d * d_Elements + i)<<2;
{
mev6s[d]->wait();
for (int i = 0; i < d_Elements; ++i)
- if (mGolden[d*d_Elements + i] != h_dst[d][i])
- {
+ if (mGolden[d*d_Elements + i] != h_dst[d][i])
+ {
num_errors += 1;
if (num_errors < 10)
- cout << "Failed at Element " << i << ": "
+ cout << "Failed at Element " << i << ": "
<< mGolden[d*d_Elements + i] << " != " << h_dst[d][i] << endl;
}
mQs[d]->enqueueUnmapMemObject(*mbufDs[d], h_dst[d], NULL, mev7s[d]);
ocl_event_times(*mev7s[d], "Unmap BufDst");
}
if (num_errors) cout << "Fail with " << num_errors << " errors!" << endl;
- else cout << "Success!" << endl;
+ else cout << "Success!" << endl;
free(mGolden);
free(h_dst);
} /* end Mehtod 2 */
}
- catch (Error err)
+ catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
}
index 951534bc5c0a9ac5a7809e806b2ea9ea09026b90..a4f9471f689b71d72b0bb2d3c92423b871894b71 100644 (file)
int main(int argc, char *argv[])
{
- cl_int err = CL_SUCCESS;
int num_errors = 0;
- try
+ try
{
Context context (CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
-
+
int d = 0;
std::string str;
devices[d].getInfo(CL_DEVICE_NAME, &str);
{
devices[d].getInfo(CL_DEVICE_GLOBAL_EXT1_MEM_SIZE_TI, &gmem_size_ext1);
devices[d].getInfo(CL_DEVICE_GLOBAL_EXT2_MEM_SIZE_TI, &gmem_size_ext2);
- } catch (Error memsz_err)
+ } catch (Error& memsz_err)
{ /* Ext Mem Size not available for this device */ }
cl_uint bufsize = get_bufsize(gmem_size, gmem_size_ext1, gmem_size_ext2);
Program::Sources source(1, std::make_pair(kernelStr,strlen(kernelStr)));
Program program = Program(context, source);
- program.build(devices);
+ program.build(devices);
Kernel kernel(program, "VectorAdd");
cl_uint remain_vec_elements = remain_elements / VectorElements;
cl_uint WorkGroupSize = remain_vec_elements / NumWorkGroups;
cl_uint chunk_element_start = chunk * CHUNKSIZE / sizeof(cl_short);
-
+
printf("Chunk %d: short elements 0x%x to 0x%x\n", chunk,
chunk_element_start, chunk_element_start + remain_elements);
-
- short * srcA = (short*) Q.enqueueMapBuffer(bufA, CL_TRUE, CL_MAP_WRITE,
+
+ short * srcA = (short*) Q.enqueueMapBuffer(bufA, CL_TRUE, CL_MAP_WRITE,
chunk * CHUNKSIZE, remain_size, NULL, &ev1);
short * srcB = (short*) Q.enqueueMapBuffer(bufB, CL_TRUE, CL_MAP_WRITE,
chunk * CHUNKSIZE, remain_size, NULL, &ev2);
printf("Unable to map src buffers into host's memory, exiting.\n");
return -1;
}
-
- for (int i=0; i < remain_elements; ++i)
+
+ for (cl_uint i=0; i < remain_elements; ++i)
{
- srcA[i] = srcB[i] = ((chunk * CHUNKSIZE/2) +(i+1)) % 12345;
+ srcA[i] = srcB[i] = ((chunk * CHUNKSIZE/2) +(i+1)) % 12345;
}
-
+
Q.enqueueUnmapMemObject(bufA, srcA, NULL, &ev3);
Q.enqueueUnmapMemObject(bufB, srcB, NULL, &ev4);
-
+
cl_buffer_region region;
region.origin = chunk*CHUNKSIZE;
region.size = remain_size;
CL_BUFFER_CREATE_TYPE_REGION, ®ion);
Buffer subC = bufDst.createSubBuffer(CL_MEM_WRITE_ONLY,
CL_BUFFER_CREATE_TYPE_REGION, ®ion);
-
+
kernel.setArg(0, subA);
kernel.setArg(1, subB);
kernel.setArg(2, subC);
-
+
std::vector<Event> vec_ev5(1);
- Q.enqueueNDRangeKernel(kernel, NullRange, NDRange(remain_vec_elements),
+ Q.enqueueNDRangeKernel(kernel, NullRange, NDRange(remain_vec_elements),
NDRange(WorkGroupSize), NULL, &vec_ev5[0]);
-
+
ev3.wait(); // otherwise, we may run short of host address space
short * dst = (short*)Q.enqueueMapBuffer(bufDst, CL_TRUE, CL_MAP_READ,
chunk * CHUNKSIZE, remain_size, &vec_ev5, &ev6);
printf("Unable to map dst buffer into host's memory, exiting.\n");
return -1;
}
-
- for (int i=0; i < remain_elements; ++i)
+
+ for (cl_uint i=0; i < remain_elements; ++i)
{
cl_short da = ((chunk * CHUNKSIZE/2)+i+1) % 12345;
- if (da+da != dst[i])
- {
- cout << "Element " << i << ": "
- << da+da << " <==> " << dst[i] << endl;
+ if (da+da != dst[i])
+ {
+ cout << "Element " << i << ": "
+ << da+da << " <==> " << dst[i] << endl;
num_errors += 1;
break;
}
}
-
+
Q.enqueueUnmapMemObject(bufDst, dst, NULL, &ev7);
ev7.wait();
-
+
ocl_event_times(ev1, "Map BufA ");
ocl_event_times(ev2, "Map BufB ");
ocl_event_times(ev3, "UnMap BufA ");
}
}
- catch (Error err)
+ catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
- if (num_errors == 0) cout << "Success!" << endl;
+ if (num_errors == 0) cout << "Success!" << endl;
else { cout << "Results failed to verify!" << endl; return -1; }
}
index 6be3f97f6f2c79659863548f97bf5bf0f15b6d79..98de8bfaaf5867be9665b8e4d985e5591e015c03 100644 (file)
int main(int argc, char *argv[])
{
- cl_int err = CL_SUCCESS;
int num_errors = 0;
- try
+ try
{
Context context (CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
-
+
int d = 0;
std::string str;
devices[d].getInfo(CL_DEVICE_NAME, &str);
{
devices[d].getInfo(CL_DEVICE_GLOBAL_EXT1_MEM_SIZE_TI, &gmem_size_ext1);
devices[d].getInfo(CL_DEVICE_GLOBAL_EXT2_MEM_SIZE_TI, &gmem_size_ext2);
- } catch (Error memsz_err)
+ } catch (Error& memsz_err)
{ /* Ext Mem Size not available for this device */ }
cl_uint bufsize = get_bufsize(gmem_size, gmem_size_ext1, gmem_size_ext2);
istreambuf_iterator<char>());
Program::Sources source(1, make_pair(kSrc.c_str(), kSrc.length()));
Program program = Program(context, source);
- program.build(devices, "vadd_openmp.obj");
+ program.build(devices, "vadd_openmp.obj");
Kernel kernel(program, "vadd_wrapper");
cl_uint remain_size = bufsize - chunk * CHUNKSIZE;
if (remain_size > CHUNKSIZE) remain_size = CHUNKSIZE;
cl_uint remain_elements = remain_size / sizeof(float);
- cl_uint remain_vec_elements = remain_elements / VectorElements;
+ //cl_uint remain_vec_elements = remain_elements / VectorElements;
cl_uint chunk_element_start = chunk * CHUNKSIZE / sizeof(float);
-
+
printf("Chunk %d: float elements 0x%x to 0x%x\n", chunk,
chunk_element_start, chunk_element_start + remain_elements);
-
- float * srcA = (float*) Q.enqueueMapBuffer(bufA, CL_TRUE, CL_MAP_WRITE,
+
+ float * srcA = (float*) Q.enqueueMapBuffer(bufA, CL_TRUE, CL_MAP_WRITE,
chunk * CHUNKSIZE, remain_size, NULL, &ev1);
float * srcB = (float*) Q.enqueueMapBuffer(bufB, CL_TRUE, CL_MAP_WRITE,
chunk * CHUNKSIZE, remain_size, NULL, &ev2);
-
- for (int i=0; i < remain_elements; ++i)
+
+ for (cl_uint i=0; i < remain_elements; ++i)
{
- srcA[i] = srcB[i] = (((chunk * CHUNKSIZE/2) +(i+1)) % 12345)*0.3;
+ srcA[i] = srcB[i] = (((chunk * CHUNKSIZE/2) +(i+1)) % 12345)*0.3;
}
-
+
Q.enqueueUnmapMemObject(bufA, srcA, NULL, &ev3);
Q.enqueueUnmapMemObject(bufB, srcB, NULL, &ev4);
-
+
cl_buffer_region region;
region.origin = chunk*CHUNKSIZE;
region.size = remain_size;
CL_BUFFER_CREATE_TYPE_REGION, ®ion);
Buffer subC = bufDst.createSubBuffer(CL_MEM_WRITE_ONLY,
CL_BUFFER_CREATE_TYPE_REGION, ®ion);
-
+
kernel.setArg(0, subA);
kernel.setArg(1, subB);
kernel.setArg(2, subC);
kernel.setArg(3, remain_elements);
-
+
std::vector<Event> vec_ev5(1);
//Q.enqueueNDRangeKernel(kernel,NullRange,NDRange(remain_vec_elements),
// NDRange(WorkGroupSize), NULL, &vec_ev5[0]);
Q.enqueueTask(kernel, NULL, &vec_ev5[0]);
-
+
ev3.wait(); // otherwise, we may run short of host address space
float * dst = (float*)Q.enqueueMapBuffer(bufDst, CL_TRUE, CL_MAP_READ,
chunk * CHUNKSIZE, remain_size, &vec_ev5, &ev6);
-
- for (int i=0; i < remain_elements; ++i)
+
+ for (cl_uint i=0; i < remain_elements; ++i)
{
float da = (((chunk * CHUNKSIZE/2)+i+1) % 12345)*0.3;
- if (da+da - dst[i] < -EPISILON || da+da - dst[i] > EPISILON)
- {
- cout << "Element " << i << ": "
- << da+da << " <==> " << dst[i] << endl;
+ if (da+da - dst[i] < -EPISILON || da+da - dst[i] > EPISILON)
+ {
+ cout << "Element " << i << ": "
+ << da+da << " <==> " << dst[i] << endl;
num_errors += 1;
break;
}
}
-
+
Q.enqueueUnmapMemObject(bufDst, dst, NULL, &ev7);
ev7.wait(); // wait to get profiling info
-
+
ocl_event_times(ev1, "Map BufA ");
ocl_event_times(ev2, "Map BufB ");
ocl_event_times(ev3, "UnMap BufA ");
}
}
- catch (Error err)
+ catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
- if (num_errors == 0) cout << "Success!" << endl;
+ if (num_errors == 0) cout << "Success!" << endl;
else { cout << "Results failed to verify!" << endl; return -1; }
}
index 484716430c5b175c1400ebbe996f5d7cb25cd0a9..6164fc70649bae61d07196e39bb7c7944ddc3b6d 100644 (file)
int main(int argc, char *argv[])
{
#endif
- cl_int err = CL_SUCCESS;
int bufsize = sizeof(Golden);
int num_errors = 0;
const int print_nerrors = 12;
- for (int i=0; i < NumElements; ++i)
+ for (int i=0; i < NumElements; ++i)
{
- srcA[i] = i * 1.0;
- srcB[i] = ((i+7) % 253 )* 1.0;
+ srcA[i] = i * 1.0;
+ srcB[i] = ((i+7) % 253 )* 1.0;
Golden[i] = srcA[i] + srcB[i];
}
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
-
+
int d = 0;
std::string str;
devices[d].getInfo(CL_DEVICE_NAME, &str);
istreambuf_iterator<char>());
Program::Sources source(1, make_pair(kSrc.c_str(), kSrc.length()));
Program program = Program(context, source);
- program.build(devices, "vadd_openmp.obj");
+ program.build(devices, "vadd_openmp.obj");
#else
Program::Binaries binary(1, make_pair(vadd_wrapper_dsp_bin,
sizeof(vadd_wrapper_dsp_bin)));
for (int i=0; i < NumElements; ++i)
{
- if (Golden[i] - dst[i] < -EPISILON || Golden[i] - dst[i] > EPISILON)
- {
+ if (Golden[i] - dst[i] < -EPISILON || Golden[i] - dst[i] > EPISILON)
+ {
if((num_errors += 1) < print_nerrors)
printf("Error %d: %f <==> %f\n", i, Golden[i], dst[i]);
}
ocl_event_times(vec_ev5[0], "Kernel ");
ocl_event_times(ev6, "Read BufDst");
}
- catch (Error err)
+ catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
- if (num_errors == 0) cout << "PASS!" << endl;
+ if (num_errors == 0) cout << "PASS!" << endl;
else { cout << "FAIL with " << num_errors << " errors!\n"; RETURN(-1); }
RETURN(0);
index 75c09eb45acf12b72bfb133ceb4181c1a9e328fa..4abfdbd48b413809664de7b31851748a0563f5db 100644 (file)
int main(int argc, char *argv[])
{
#endif
- cl_int err = CL_SUCCESS;
int bufsize = sizeof(Golden);
int num_errors = 0;
const int print_nerrors = 12;
- for (int i=0; i < NumElements; ++i)
+ for (int i=0; i < NumElements; ++i)
{
- srcA[i] = i * 1.0;
- srcB[i] = ((i+7) % 257 )* 1.0;
+ srcA[i] = i * 1.0;
+ srcB[i] = ((i+7) % 257 )* 1.0;
Golden[i] = srcA[i] + srcB[i];
}
- try
+ try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
-
+
int d = 0;
std::string str;
devices[d].getInfo(CL_DEVICE_NAME, &str);
istreambuf_iterator<char>());
Program::Sources source(1, make_pair(kSrc.c_str(), kSrc.length()));
Program program = Program(context, source);
- program.build(devices, "vadd_openmp.obj");
+ program.build(devices, "vadd_openmp.obj");
#else
Program::Binaries binary(1, make_pair(vadd_wrapper_dsp_bin,
sizeof(vadd_wrapper_dsp_bin)));
for (int i=0; i < NumElements; ++i)
{
- if (Golden[i] - dst[i] < -EPISILON || Golden[i] - dst[i] > EPISILON)
- {
+ if (Golden[i] - dst[i] < -EPISILON || Golden[i] - dst[i] > EPISILON)
+ {
if((num_errors += 1) < print_nerrors)
printf("Error %d: %f <==> %f\n", i, Golden[i], dst[i]);
}
ocl_event_times(vec_ev5[0], "Kernel ");
ocl_event_times(ev6, "Read BufDst");
}
- catch (Error err)
+ catch (Error& err)
{ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
if (num_errors > 0)
- {
+ {
cout << "FAIL with " << num_errors << " errors!\n";
RETURN (-1);
}
- else cout << "PASS!" << endl;
+ else cout << "PASS!" << endl;
RETURN (0);
}
index a1600ab913d4d1ba88496a1e23ff7271f921b3de..5ea4001029a9f2c8003d0c798f4c958ba3eeab76 100644 (file)
* Run Vecadd on root device and each sub device using different
* command queues separately
*------------------------------------------------------------------------*/
- for(auto i=0; i<n_devices; i++)
+ for(cl_uint i=0; i<n_devices; i++)
{
elapsed_times[i] = RunKernelOnQ(&Qs[i], &kernel,
&bufA, srcA,
*Golden != nullptr);
srand(time(NULL));
- for (auto i = 0; i < n_elems; ++i)
+ for (cl_uint i = 0; i < n_elems; ++i)
{
(*srcA)[i] = rand() % 100 + 1;
(*srcB)[i] = rand() % 100 + 1;
cl_uint num_Qs)
{
int errcode;
- for (auto i = 0; i < num_Qs; i++)
+ for (cl_uint i = 0; i < num_Qs; i++)
{
(*Qs)[i] = clCreateCommandQueue(*context, /* context */
devices[i],/* device */
cl_short* dst,
cl_uint n_elem)
{
- for (auto i = 0; i < n_elem; ++i)
+ for (cl_uint i = 0; i < n_elem; ++i)
{
if (golden[i] != dst[i])
{
cl_short* Golden,
cl_uint n_elems)
{
- cl_uint bufsize = sizeof(cl_short) * n_elems;
double secs = 0;
tick();
*------------------------------------------------------------------------*/
void ResetResultArray(cl_short* results, cl_uint n_elems)
{
- for (auto i = 0; i < n_elems; i++) results[i] = 0;
+ for (cl_uint i = 0; i < n_elems; i++) results[i] = 0;
}
/*-------------------------------------------------------------------------
void CleanUpQs(cl_command_queue* Qs, cl_uint numQs)
{
int errcode;
- for(auto i=0; i<numQs; i++)
+ for(cl_uint i=0; i<numQs; i++)
{
errcode = clFlush(Qs[i]);
assert(errcode == CL_SUCCESS);
void CleanUpSubDevices(cl_device_id* subdevices, cl_uint num)
{
int errcode;
- for (auto i = 0; i < num; i++)
+ for (cl_uint i = 0; i < num; i++)
{
errcode = clReleaseDevice(subdevices[i]);
assert(errcode == CL_SUCCESS);
<< "s"
<< endl;
- for (auto i = SUB_DEVICE_IDX_START; i < n_devices; i++)
+ for (cl_uint i = SUB_DEVICE_IDX_START; i < n_devices; i++)
{
cout << "Sub Device"
<< i