aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gstdsp66videokernel.c1
-rw-r--r--src/gstdsp66videokernel.h3
-rw-r--r--src/kernels/oclconv/conv.cl5
-rw-r--r--src/kernels/oclconv/oclconv.cpp80
4 files changed, 88 insertions, 1 deletions
diff --git a/src/gstdsp66videokernel.c b/src/gstdsp66videokernel.c
index 48c7764..8174710 100644
--- a/src/gstdsp66videokernel.c
+++ b/src/gstdsp66videokernel.c
@@ -68,6 +68,7 @@ static const GEnumValue dsp66_video_kerneltype[] = {
68 {GST_DSP66_VIDEO_KERNELTYPE_MEDIAN, "Kernel median", "0"}, 68 {GST_DSP66_VIDEO_KERNELTYPE_MEDIAN, "Kernel median", "0"},
69 {GST_DSP66_VIDEO_KERNELTYPE_SOBEL, "Kernel sobel", "1"}, 69 {GST_DSP66_VIDEO_KERNELTYPE_SOBEL, "Kernel sobel", "1"},
70 {GST_DSP66_VIDEO_KERNELTYPE_CONV, "Kernel conv", "2"}, 70 {GST_DSP66_VIDEO_KERNELTYPE_CONV, "Kernel conv", "2"},
71 {GST_DSP66_VIDEO_KERNELTYPE_CANNY, "Kernel canny", "3"},
71 {0, NULL, NULL}, 72 {0, NULL, NULL},
72}; 73};
73 74
diff --git a/src/gstdsp66videokernel.h b/src/gstdsp66videokernel.h
index 9d79b15..b78354b 100644
--- a/src/gstdsp66videokernel.h
+++ b/src/gstdsp66videokernel.h
@@ -52,7 +52,8 @@ typedef enum
52typedef enum { 52typedef enum {
53 GST_DSP66_VIDEO_KERNELTYPE_MEDIAN = 0, 53 GST_DSP66_VIDEO_KERNELTYPE_MEDIAN = 0,
54 GST_DSP66_VIDEO_KERNELTYPE_SOBEL = 1, 54 GST_DSP66_VIDEO_KERNELTYPE_SOBEL = 1,
55 GST_DSP66_VIDEO_KERNELTYPE_CONV = 2 55 GST_DSP66_VIDEO_KERNELTYPE_CONV = 2,
56 GST_DSP66_VIDEO_KERNELTYPE_CANNY = 3
56} GstDsp66VideoKernelType; 57} GstDsp66VideoKernelType;
57 58
58struct _GstDsp66VideoKernel { 59struct _GstDsp66VideoKernel {
diff --git a/src/kernels/oclconv/conv.cl b/src/kernels/oclconv/conv.cl
index 6c86e52..dab866e 100644
--- a/src/kernels/oclconv/conv.cl
+++ b/src/kernels/oclconv/conv.cl
@@ -31,6 +31,11 @@
31void IMG_median_3x3_8 (const unsigned char *restrict in_data, int cols, unsigned char * restrict out_data); 31void IMG_median_3x3_8 (const unsigned char *restrict in_data, int cols, unsigned char * restrict out_data);
32void IMG_sobel_3x3_8 (const unsigned char *restrict in_data, unsigned char *restrict out_data, int rows, int cols); 32void IMG_sobel_3x3_8 (const unsigned char *restrict in_data, unsigned char *restrict out_data, int rows, int cols);
33void IMG_conv_3x3_i8_c8s (const unsigned char *restrict in_data, unsigned char *restrict out_data, int cols, const char *restrict mask, int shift); 33void IMG_conv_3x3_i8_c8s (const unsigned char *restrict in_data, unsigned char *restrict out_data, int cols, const char *restrict mask, int shift);
34void VLIB_Canny_Edge_Detection (ARGS);
35kernel void canny_tiocl(ARGS)
36{
37 VLIB_Canny_Edge_Detection(pInput, pBufGradX, pBufGradY, pBufMag, pBufOut, pScratch, numItems, width, height);
38}
34 39
35kernel void Median3x3(global const uchar* src, global uchar *dest, 40kernel void Median3x3(global const uchar* src, global uchar *dest,
36 const int width, const int height, 41 const int width, const int height,
diff --git a/src/kernels/oclconv/oclconv.cpp b/src/kernels/oclconv/oclconv.cpp
index c7b722c..00ef898 100644
--- a/src/kernels/oclconv/oclconv.cpp
+++ b/src/kernels/oclconv/oclconv.cpp
@@ -37,6 +37,7 @@
37using namespace cl; 37using namespace cl;
38using namespace std; 38using namespace std;
39 39
40/*----------------------------------------------------------------------------------------------------------------------*/
40static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned char *data_out, int width, int height, int sstride, int dstride) 41static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned char *data_out, int width, int height, int sstride, int dstride)
41{ 42{
42 cl_int err = CL_SUCCESS; 43 cl_int err = CL_SUCCESS;
@@ -47,6 +48,7 @@ static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned ch
47 logfile << "Entered oclconv_test, width=" << width << " height=" << height << " dstride=" << dstride << " sstride=" << sstride << '\n'; 48 logfile << "Entered oclconv_test, width=" << width << " height=" << height << " dstride=" << dstride << " sstride=" << sstride << '\n';
48 logfile.close(); 49 logfile.close();
49#endif 50#endif
51
50 try 52 try
51 { 53 {
52 Context context(CL_DEVICE_TYPE_ACCELERATOR); 54 Context context(CL_DEVICE_TYPE_ACCELERATOR);
@@ -84,7 +86,81 @@ static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned ch
84#endif 86#endif
85 return 0; 87 return 0;
86} 88}
89/*----------------------------------------------------------------------------------------------------------------------*/
90static bool canny_first_call = true;
91static Context canny_ctx(CL_DEVICE_TYPE_ACCELERATOR);
92static CommandQueue *canny_Q;
93static Buffer *canny_gradX, *canny_gradY, *canny_mag, *canny_scratch, *canny_numItems;
94static Kernel *canny_K;
95static Buffer *canny_input, *canny_output;
96
97/******************************************************************************
98 * Canny Edge Detection - called on ARM, but algorithm dispatched to 1 DSP
99 *
100 * Note: Assumes arguments are invariant from call 1 to call N. If this is
101 * not the case, then move buffer creation back to the every frame section
102 * rather than being cached in frame 0.
103 *
104 * Note: Also assumes total size is not overly large as it allocates temp
105 * buffers in MSMC
106 *****************************************************************************/
107static int ocl_canny(unsigned char *data_in, unsigned char *data_out, unsigned short height, unsigned short width)
108{
109 int numelem = (int)height*(int)width;
110 try
111 {
112 Event canny_ev, canny_ev1, canny_ev2;
113 /*---------------------------------------------------------------------
114 * Cache as much OpenCL plumbing on the first call, so the cost is not
115 * repeatedfor every frame.
116 *--------------------------------------------------------------------*/
117 if (canny_first_call)
118 {
119 canny_first_call = false;
87 120
121 std::vector<Device> devices = canny_ctx.getInfo<CL_CONTEXT_DEVICES>();
122 devices.resize(1); // resize to 1 since we are only running on 1 DSP
123 canny_Q = new CommandQueue(canny_ctx, devices[0]);
124
125 canny_input = new Buffer(canny_ctx, CL_MEM_READ_ONLY, numelem);
126 canny_output = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem);
127 canny_gradX = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
128 canny_gradY = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
129 canny_mag = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short));
130 canny_scratch = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem);
131 canny_numItems= new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, sizeof(int));
132
133 /*---------------------------------------------------------------------
134 * Compile the Kernel Source for the devices
135 *--------------------------------------------------------------------*/
136 Program::Binaries binary(1, make_pair(conv_dsp_bin, sizeof(conv_dsp_bin)));
137 Program program(canny_ctx, devices, binary);
138 program.build(devices);
139 canny_K = new Kernel(program, "canny_tiocl");
140
141 canny_K->setArg(0, *canny_input);
142 canny_K->setArg(1, *canny_gradX);
143 canny_K->setArg(2, *canny_gradY);
144 canny_K->setArg(3, *canny_mag);
145 canny_K->setArg(4, *canny_output);
146 canny_K->setArg(5, *canny_scratch);
147 canny_K->setArg(6, *canny_numItems);
148 canny_K->setArg(7, width);
149 canny_K->setArg(8, height);
150 }
151
152 canny_Q->enqueueWriteBuffer(*canny_input, CL_FALSE, 0, numelem, data_in, NULL, &canny_ev1);
153 canny_Q->enqueueTask(*canny_K, 0, &canny_ev);
154 canny_Q->enqueueReadBuffer (*canny_output, CL_TRUE, 0, numelem, data_out, NULL, &canny_ev2);
155 }
156 catch (cl::Error err)
157 {
158 cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl;
159 return (-1);
160 }
161 return 0;
162}
163/*----------------------------------------------------------------------------------------------------------------------*/
88 164
89#ifdef __cplusplus 165#ifdef __cplusplus
90extern "C" { 166extern "C" {
@@ -114,6 +190,10 @@ int oclconv_kernel(int kernel_type, int filter_size,
114 return 0; 190 return 0;
115 } 191 }
116 break; 192 break;
193 case 3: /* vlib canny */
194 /* filter size is ignored */
195 retval = ocl_canny (data_in, data_out, width, height); /* input and output stride assumed to be == width */
196 break;
117 default: 197 default:
118 break; 198 break;
119 } 199 }