index c7b722c99f62207992d525497ec3b9d868c8f0e1..00ef898646608cb49cec7c263b2b39a19f0c829f 100644 (file)
using namespace cl;
using namespace std;
+/*----------------------------------------------------------------------------------------------------------------------*/
static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned char *data_out, int width, int height, int sstride, int dstride)
{
cl_int err = CL_SUCCESS;
logfile << "Entered oclconv_test, width=" << width << " height=" << height << " dstride=" << dstride << " sstride=" << sstride << '\n';
logfile.close();
#endif
+
try
{
Context context(CL_DEVICE_TYPE_ACCELERATOR);
#endif
return 0;
}
+/*----------------------------------------------------------------------------------------------------------------------*/
+static bool canny_first_call = true;
+static Context canny_ctx(CL_DEVICE_TYPE_ACCELERATOR);
+static CommandQueue *canny_Q;
+static Buffer *canny_gradX, *canny_gradY, *canny_mag, *canny_scratch, *canny_numItems;
+static Kernel *canny_K;
+static Buffer *canny_input, *canny_output;
+
+/******************************************************************************
+ * Canny Edge Detection - called on ARM, but algorithm dispatched to 1 DSP
+ *
+ * Note: Assumes arguments are invariant from call 1 to call N. If this is
+ * not the case, then move buffer creation back to the every frame section
+ * rather than being cached in frame 0.
+ *
+ * Note: Also assumes total size is not overly large as it allocates temp
+ * buffers in MSMC
+ *****************************************************************************/
+static int ocl_canny(unsigned char *data_in, unsigned char *data_out, unsigned short height, unsigned short width)
+{
+ int numelem = (int)height*(int)width;
+ try
+ {
+ Event canny_ev, canny_ev1, canny_ev2;
+ /*---------------------------------------------------------------------
+ * Cache as much OpenCL plumbing on the first call, so the cost is not
+ * repeatedfor every frame.
+ *--------------------------------------------------------------------*/
+ if (canny_first_call)
+ {
+ canny_first_call = false;
+ std::vector<Device> devices = canny_ctx.getInfo<CL_CONTEXT_DEVICES>();
+ devices.resize(1); // resize to 1 since we are only running on 1 DSP
+ canny_Q = new CommandQueue(canny_ctx, devices[0]);
+
+ canny_input = new Buffer(canny_ctx, CL_MEM_READ_ONLY, numelem);
+ canny_output = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem);
+ canny_gradX = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
+ canny_gradY = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
+ canny_mag = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
+ canny_scratch = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem);
+ canny_numItems= new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, sizeof(int));
+
+ /*---------------------------------------------------------------------
+ * Compile the Kernel Source for the devices
+ *--------------------------------------------------------------------*/
+ Program::Binaries binary(1, make_pair(conv_dsp_bin, sizeof(conv_dsp_bin)));
+ Program program(canny_ctx, devices, binary);
+ program.build(devices);
+ canny_K = new Kernel(program, "canny_tiocl");
+
+ canny_K->setArg(0, *canny_input);
+ canny_K->setArg(1, *canny_gradX);
+ canny_K->setArg(2, *canny_gradY);
+ canny_K->setArg(3, *canny_mag);
+ canny_K->setArg(4, *canny_output);
+ canny_K->setArg(5, *canny_scratch);
+ canny_K->setArg(6, *canny_numItems);
+ canny_K->setArg(7, width);
+ canny_K->setArg(8, height);
+ }
+
+ canny_Q->enqueueWriteBuffer(*canny_input, CL_FALSE, 0, numelem, data_in, NULL, &canny_ev1);
+ canny_Q->enqueueTask(*canny_K, 0, &canny_ev);
+ canny_Q->enqueueReadBuffer (*canny_output, CL_TRUE, 0, numelem, data_out, NULL, &canny_ev2);
+ }
+ catch (cl::Error err)
+ {
+ cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl;
+ return (-1);
+ }
+ return 0;
+}
+/*----------------------------------------------------------------------------------------------------------------------*/
#ifdef __cplusplus
extern "C" {
return 0;
}
break;
+ case 3: /* vlib canny */
+ /* filter size is ignored */
+ retval = ocl_canny (data_in, data_out, width, height); /* input and output stride assumed to be == width */
+ break;
default:
break;
}