/*  * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/ * * *  Redistribution and use in source and binary forms, with or without  *  modification, are permitted provided that the following conditions  *  are met:  *  *    Redistributions of source code must retain the above copyright *    notice, this list of conditions and the following disclaimer. * *    Redistributions in binary form must reproduce the above copyright  *    notice, this list of conditions and the following disclaimer in the *    documentation and/or other materials provided with the   *    distribution. * *    Neither the name of Texas Instruments Incorporated nor the names of *    its contributors may be used to endorse or promote products derived *    from this software without specific prior written permission. * *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR  *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.  * */ #define __CL_ENABLE_EXCEPTIONS #include #include #include #include #include "ocl_util.h" #include "conv.dsp_h" //#define VERBOSE using namespace cl; using namespace std; /*----------------------------------------------------------------------------------------------------------------------*/ static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned char *data_out, int width, int height, int sstride, int dstride) { cl_int err = CL_SUCCESS; int bufsize = sstride * height * sizeof(unsigned char); #ifdef VERBOSE ofstream logfile; logfile.open ("/home/root/oclconv_log.txt", ios::out | ios::app); logfile << "Entered oclconv_test, width=" << width << " height=" << height << " dstride=" << dstride << " sstride=" << sstride << '\n'; logfile.close(); #endif try { Context context(CL_DEVICE_TYPE_ACCELERATOR); std::vector devices = context.getInfo(); devices.resize(1); // resize to 1 since we are only running on 1 DSP Buffer bufA (context, CL_MEM_READ_ONLY, bufsize); Buffer bufDst (context, CL_MEM_WRITE_ONLY, bufsize); Program::Binaries binary(1, make_pair(conv_dsp_bin,sizeof(conv_dsp_bin))); Program program = Program(context, devices, binary); program.build(devices); Kernel kernel(program, kernelName); kernel.setArg(0, bufA); kernel.setArg(1, bufDst); kernel.setArg(2, width); kernel.setArg(3, height); kernel.setArg(4, dstride); kernel.setArg(5, sstride); Event ev1,ev2,ev3,ev4; CommandQueue Q(context, devices[0], CL_QUEUE_PROFILING_ENABLE); Q.enqueueWriteBuffer(bufA, CL_FALSE, 0, bufsize, data_in, NULL, &ev1); Q.enqueueTask (kernel, NULL, &ev3); Q.enqueueReadBuffer (bufDst, CL_TRUE, 0, bufsize, data_out, NULL, &ev4); } catch (Error err) { cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; return -1; } #ifdef VERBOSE logfile.open ("/home/root/oclconv_log.txt", ios::out | ios::app); logfile << "Success!" << endl; logfile.close(); #endif return 0; } /*----------------------------------------------------------------------------------------------------------------------*/ static bool canny_first_call = true; static Context canny_ctx(CL_DEVICE_TYPE_ACCELERATOR); static CommandQueue *canny_Q; static Buffer *canny_gradX, *canny_gradY, *canny_mag, *canny_scratch, *canny_numItems; static Kernel *canny_K; static Buffer *canny_input, *canny_output; /****************************************************************************** * Canny Edge Detection - called on ARM, but algorithm dispatched to 1 DSP * * Note: Assumes arguments are invariant from call 1 to call N. If this is * not the case, then move buffer creation back to the every frame section * rather than being cached in frame 0. * * Note: Also assumes total size is not overly large as it allocates temp * buffers in MSMC *****************************************************************************/ static int ocl_canny(unsigned char *data_in, unsigned char *data_out, unsigned short height, unsigned short width) { int numelem = (int)height*(int)width; try { Event canny_ev, canny_ev1, canny_ev2; /*--------------------------------------------------------------------- * Cache as much OpenCL plumbing on the first call, so the cost is not * repeatedfor every frame. *--------------------------------------------------------------------*/ if (canny_first_call) { canny_first_call = false; std::vector devices = canny_ctx.getInfo(); devices.resize(1); // resize to 1 since we are only running on 1 DSP canny_Q = new CommandQueue(canny_ctx, devices[0]); canny_input = new Buffer(canny_ctx, CL_MEM_READ_ONLY, numelem); canny_output = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem); canny_gradX = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short)); canny_gradY = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short)); canny_mag = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short)); canny_scratch = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem); canny_numItems= new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, sizeof(int)); /*--------------------------------------------------------------------- * Compile the Kernel Source for the devices *--------------------------------------------------------------------*/ Program::Binaries binary(1, make_pair(conv_dsp_bin, sizeof(conv_dsp_bin))); Program program(canny_ctx, devices, binary); program.build(devices); canny_K = new Kernel(program, "canny_tiocl"); canny_K->setArg(0, *canny_input); canny_K->setArg(1, *canny_gradX); canny_K->setArg(2, *canny_gradY); canny_K->setArg(3, *canny_mag); canny_K->setArg(4, *canny_output); canny_K->setArg(5, *canny_scratch); canny_K->setArg(6, *canny_numItems); canny_K->setArg(7, width); canny_K->setArg(8, height); } canny_Q->enqueueWriteBuffer(*canny_input, CL_FALSE, 0, numelem, data_in, NULL, &canny_ev1); canny_Q->enqueueTask(*canny_K, 0, &canny_ev); canny_Q->enqueueReadBuffer (*canny_output, CL_TRUE, 0, numelem, data_out, NULL, &canny_ev2); } catch (cl::Error err) { cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; return (-1); } return 0; } /*----------------------------------------------------------------------------------------------------------------------*/ #ifdef __cplusplus extern "C" { #endif int oclconv_kernel(int kernel_type, int filter_size, char *arbkernel, unsigned char *data_in, unsigned char *data_out, int width, int height, int dstride, int sstride) { int retval = -1; switch(kernel_type) { case 0: /* Median */ if(filter_size == 5) { retval = oclconv_imgproc("Median2x2", data_in, data_out, width, height, sstride, dstride); } else if(filter_size == 9) { retval = oclconv_imgproc("Median3x3", data_in, data_out, width, height, sstride, dstride); } break; case 1: /* Sobel */ if(filter_size == 9) { retval = oclconv_imgproc("Sobel3x3", data_in, data_out, width, height, sstride, dstride); } break; case 2: /* conv */ if(filter_size == 25) { retval = oclconv_imgproc("Conv5x5", data_in, data_out, width, height, sstride, dstride); return 0; } break; case 3: /* vlib canny */ /* filter size is ignored */ retval = ocl_canny (data_in, data_out, width, height); /* input and output stride assumed to be == width */ break; case 4: /* user defined kernel */ retval = oclconv_imgproc (arbkernel, data_in, data_out, width, height, sstride, dstride); break; default: break; } return retval; } #ifdef __cplusplus } #endif