1 /*
2 * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/
3 *
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the
15 * distribution.
16 *
17 * Neither the name of Texas Instruments Incorporated nor the names of
18 * its contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 */
35 #define __CL_ENABLE_EXCEPTIONS
36 #include <CL/cl.hpp>
37 #include <iostream>
38 #include <fstream>
39 #include <cstdlib>
40 #include "ocl_util.h"
41 #include "conv.dsp_h"
43 //#define VERBOSE
44 using namespace cl;
45 using namespace std;
47 /*----------------------------------------------------------------------------------------------------------------------*/
48 static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned char *data_out, int width, int height, int sstride, int dstride)
49 {
50 cl_int err = CL_SUCCESS;
51 int bufsize = sstride * height * sizeof(unsigned char);
52 #ifdef VERBOSE
53 ofstream logfile;
54 logfile.open ("/home/root/oclconv_log.txt", ios::out | ios::app);
55 logfile << "Entered oclconv_test, width=" << width << " height=" << height << " dstride=" << dstride << " sstride=" << sstride << '\n';
56 logfile.close();
57 #endif
59 try
60 {
61 Context context(CL_DEVICE_TYPE_ACCELERATOR);
62 std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
63 devices.resize(1); // resize to 1 since we are only running on 1 DSP
65 Buffer bufA (context, CL_MEM_READ_ONLY, bufsize);
66 Buffer bufDst (context, CL_MEM_WRITE_ONLY, bufsize);
68 Program::Binaries binary(1, make_pair(conv_dsp_bin,sizeof(conv_dsp_bin)));
69 Program program = Program(context, devices, binary);
70 program.build(devices);
71 Kernel kernel(program, kernelName);
72 kernel.setArg(0, bufA);
73 kernel.setArg(1, bufDst);
74 kernel.setArg(2, width);
75 kernel.setArg(3, height);
76 kernel.setArg(4, dstride);
77 kernel.setArg(5, sstride);
79 Event ev1,ev2,ev3,ev4;
81 CommandQueue Q(context, devices[0], CL_QUEUE_PROFILING_ENABLE);
83 Q.enqueueWriteBuffer(bufA, CL_FALSE, 0, bufsize, data_in, NULL, &ev1);
84 Q.enqueueTask (kernel, NULL, &ev3);
85 Q.enqueueReadBuffer (bufDst, CL_TRUE, 0, bufsize, data_out, NULL, &ev4);
86 }
87 catch (Error err)
88 { cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; return -1; }
89 #ifdef VERBOSE
90 logfile.open ("/home/root/oclconv_log.txt", ios::out | ios::app);
91 logfile << "Success!" << endl;
92 logfile.close();
93 #endif
94 return 0;
95 }
96 /*----------------------------------------------------------------------------------------------------------------------*/
97 static bool canny_first_call = true;
98 static Context canny_ctx(CL_DEVICE_TYPE_ACCELERATOR);
99 static CommandQueue *canny_Q;
100 static Buffer *canny_gradX, *canny_gradY, *canny_mag, *canny_scratch, *canny_numItems;
101 static Kernel *canny_K;
102 static Buffer *canny_input, *canny_output;
104 /******************************************************************************
105 * Canny Edge Detection - called on ARM, but algorithm dispatched to 1 DSP
106 *
107 * Note: Assumes arguments are invariant from call 1 to call N. If this is
108 * not the case, then move buffer creation back to the every frame section
109 * rather than being cached in frame 0.
110 *
111 * Note: Also assumes total size is not overly large as it allocates temp
112 * buffers in MSMC
113 *****************************************************************************/
114 static int ocl_canny(unsigned char *data_in, unsigned char *data_out, unsigned short height, unsigned short width)
115 {
116 int numelem = (int)height*(int)width;
117 try
118 {
119 Event canny_ev, canny_ev1, canny_ev2;
120 /*---------------------------------------------------------------------
121 * Cache as much OpenCL plumbing on the first call, so the cost is not
122 * repeatedfor every frame.
123 *--------------------------------------------------------------------*/
124 if (canny_first_call)
125 {
126 canny_first_call = false;
128 std::vector<Device> devices = canny_ctx.getInfo<CL_CONTEXT_DEVICES>();
129 devices.resize(1); // resize to 1 since we are only running on 1 DSP
130 canny_Q = new CommandQueue(canny_ctx, devices[0]);
132 canny_input = new Buffer(canny_ctx, CL_MEM_READ_ONLY, numelem);
133 canny_output = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem);
134 canny_gradX = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
135 canny_gradY = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
136 canny_mag = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
137 canny_scratch = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem);
138 canny_numItems= new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, sizeof(int));
140 /*---------------------------------------------------------------------
141 * Compile the Kernel Source for the devices
142 *--------------------------------------------------------------------*/
143 Program::Binaries binary(1, make_pair(conv_dsp_bin, sizeof(conv_dsp_bin)));
144 Program program(canny_ctx, devices, binary);
145 program.build(devices);
146 canny_K = new Kernel(program, "canny_tiocl");
148 canny_K->setArg(0, *canny_input);
149 canny_K->setArg(1, *canny_gradX);
150 canny_K->setArg(2, *canny_gradY);
151 canny_K->setArg(3, *canny_mag);
152 canny_K->setArg(4, *canny_output);
153 canny_K->setArg(5, *canny_scratch);
154 canny_K->setArg(6, *canny_numItems);
155 canny_K->setArg(7, width);
156 canny_K->setArg(8, height);
157 }
159 canny_Q->enqueueWriteBuffer(*canny_input, CL_FALSE, 0, numelem, data_in, NULL, &canny_ev1);
160 canny_Q->enqueueTask(*canny_K, 0, &canny_ev);
161 canny_Q->enqueueReadBuffer (*canny_output, CL_TRUE, 0, numelem, data_out, NULL, &canny_ev2);
162 }
163 catch (cl::Error err)
164 {
165 cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl;
166 return (-1);
167 }
168 return 0;
169 }
170 /*----------------------------------------------------------------------------------------------------------------------*/
172 #ifdef __cplusplus
173 extern "C" {
174 #endif
175 int oclconv_kernel(int kernel_type, int filter_size, char *arbkernel,
176 unsigned char *data_in, unsigned char *data_out,
177 int width, int height, int dstride, int sstride)
178 {
179 int retval = -1;
180 switch(kernel_type)
181 {
182 case 0: /* Median */
183 if(filter_size == 5) {
184 retval = oclconv_imgproc("Median2x2", data_in, data_out, width, height, sstride, dstride);
185 } else if(filter_size == 9) {
186 retval = oclconv_imgproc("Median3x3", data_in, data_out, width, height, sstride, dstride);
187 }
188 break;
189 case 1: /* Sobel */
190 if(filter_size == 9) {
191 retval = oclconv_imgproc("Sobel3x3", data_in, data_out, width, height, sstride, dstride);
192 }
193 break;
194 case 2: /* conv */
195 if(filter_size == 25) {
196 retval = oclconv_imgproc("Conv5x5", data_in, data_out, width, height, sstride, dstride);
197 return 0;
198 }
199 break;
200 case 3: /* vlib canny */
201 /* filter size is ignored */
202 retval = ocl_canny (data_in, data_out, width, height); /* input and output stride assumed to be == width */
203 break;
204 case 4: /* user defined kernel */
205 retval = oclconv_imgproc (arbkernel, data_in, data_out, width, height, sstride, dstride);
206 break;
207 default:
208 break;
209 }
210 return retval;
211 }
212 #ifdef __cplusplus
213 }
214 #endif