aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDjordje Senicic2016-02-26 19:21:43 -0600
committerDjordje Senicic2016-02-26 19:21:43 -0600
commitde1a2383f3598509794d710e79eb4909a29c4b04 (patch)
treec2550e50e40b70e02681a82dba4cb12b12907aef
parent7808287706054e287a113fe3291741758454fabd (diff)
downloadgst-plugin-dsp66-de1a2383f3598509794d710e79eb4909a29c4b04.tar.gz
gst-plugin-dsp66-de1a2383f3598509794d710e79eb4909a29c4b04.tar.xz
gst-plugin-dsp66-de1a2383f3598509794d710e79eb4909a29c4b04.zip
Add OpenCL video kernels for median and sobel img processing functions. They are invoked via C-to-CPP connector
-rw-r--r--src/kernels/make.inc79
-rw-r--r--src/kernels/oclconv/conv.cl99
-rw-r--r--src/kernels/oclconv/oclconv.cpp152
3 files changed, 330 insertions, 0 deletions
diff --git a/src/kernels/make.inc b/src/kernels/make.inc
new file mode 100644
index 0000000..797e64b
--- /dev/null
+++ b/src/kernels/make.inc
@@ -0,0 +1,79 @@
1DSP_INCLUDE = -I$(TI_OCL_CGT_INSTALL)/include
2DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/cgt-c6x/include
3DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/opencl
4TI_IMGLIB_DIR=${TARGET_ROOTDIR}/usr/share/ti/ti-imglib-c66x-tree
5TI_VLIB_DIR=${TARGET_ROOTDIR}/usr/share/ti/ti-vlib-c66x-tree
6
7CPP = g++
8CL6X = cl6x -mv6600 --abi=eabi $(DSP_INCLUDE)
9CLOCL = clocl
10
11LIBS = -lOpenCL -locl_util
12
13UNAME_M :=$(shell uname -m)
14
15# ----------------------------------------------------------------------------
16# If TI_OCL_INSTALL is set, setup make from that location
17# ----------------------------------------------------------------------------
18ifneq ($(TI_OCL_INSTALL),)
19 CPP = g++
20 CPP_FLAGS += -I$(TI_OCL_INSTALL)/usr/include
21 LD_FLAGS += -L$(TI_OCL_INSTALL)/usr/lib
22 LIBS += -lbfd
23
24# ----------------------------------------------------------------------------
25# Otherwise, if making on x86 assume cross compile for Arm host
26# ----------------------------------------------------------------------------
27else ifneq (,$(findstring 86, $(UNAME_M)))
28 .DEFAULT_GOAL := cross
29
30 # In a cross compile environment we are assuming that the EVM file system
31 # is located on the build host and necessary ARM libraries are installed
32 # on that file system.
33 ifneq ($(MAKECMDGOALS),clean)
34 ifeq ($(TARGET_ROOTDIR),)
35 $(error Environment variable TARGET_ROOTDIR must be defined. Set it to point at the EVM root file system)
36 endif
37 endif
38
39 # gcc ARM cross compiler will not, by default, search the host's
40 # /usr/include. Explicitly specify here to find dependent vendor headers
41 cross: override CPP = arm-linux-gnueabihf-g++
42 cross: CPP_FLAGS += -I$(TARGET_ROOTDIR)/usr/include -idirafter /usr/include
43
44 # If cross-compilineg, provide path to dependent ARM libraries on the
45 # target filesystem
46 cross: LD_FLAGS = -L$(TARGET_ROOTDIR)/lib -L$(TARGET_ROOTDIR)/usr/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/lib -Wl,-rpath-link,$(TARGET_ROOTDIR)/usr/lib
47endif
48
49%.o: %.cpp
50 @$(CPP) -c $(CPP_FLAGS) $<
51 @echo Compiling $<
52
53%.o: %.c
54 @$(CPP) -c $(CPP_FLAGS) $<
55 @echo Compiling $<
56
57%.obj: %.c
58 @$(CL6X) -c $(CL6X_FLAGS) $<
59 @echo Compiling $<
60
61%.out: %.cl
62 @$(CLOCL) $(CLOCL_FLAGS) $^
63 @echo Compiling $<
64
65%.dsp_h: %.cl
66 @$(CLOCL) -t $(CLOCL_FLAGS) $^
67 @echo Compiling $<
68
69$(EXE):
70
71cross: $(EXE)
72
73clean::
74 @rm -f $(EXE) *.o *.obj *.out *.asm *.if *.opt *.bc *.objc *.map *.bin *.dsp_h
75
76test: clean $(EXE)
77 @echo Running $(EXE)
78 @./$(EXE) >> /dev/null
79 @if [ $$? -ne 0 ] ; then echo "FAILED !!!" ; fi
diff --git a/src/kernels/oclconv/conv.cl b/src/kernels/oclconv/conv.cl
new file mode 100644
index 0000000..bc9aced
--- /dev/null
+++ b/src/kernels/oclconv/conv.cl
@@ -0,0 +1,99 @@
1/******************************************************************************
2 * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28#define PIX_SORT(a,b) { if ((a)>(b)) PIX_SWAP((a),(b)); }
29#define PIX_SWAP(a,b) { unsigned char temp=(a);(a)=(b);(b)=temp; }
30
31void IMG_median_3x3_8 (const unsigned char *restrict in_data, int cols, unsigned char * restrict out_data);
32void IMG_sobel_3x3_8 (const unsigned char *restrict in_data, unsigned char * restrict out_data, int rows, int cols);
33
34kernel void Median3x3(global const uchar* src, global uchar *dest,
35 const int width, const int height,
36 const int dstride, const int sstride)
37{
38 int i;
39 const int max_X = width - 1;
40 const int max_Y = height - 1;
41 for (i = 0; i < max_Y; i++) {
42 IMG_median_3x3_8 ((const unsigned char *)src, max_X, (const unsigned char *)dest);
43 src += sstride;
44 dest += dstride;
45 }
46}
47
48kernel void Sobel3x3(global const uchar* src, global uchar *dest,
49 const int width, const int height,
50 const int dstride, const int sstride)
51{
52 IMG_sobel_3x3_8((const unsigned char *)src, (const unsigned char *)dest, width, height);
53}
54
55kernel void Median2x2 (global const uchar* src, global uchar *dest,
56 const int width, const int height,
57 const int dstride, const int sstride)
58{
59/***
60 int id = get_global_id(0);
61 c[id] = a[id] + b[id];
62***/
63 unsigned char p[5];
64 int i, j, k;
65
66 /* copy the top and bottom rows into the result array */
67 for (i = 0; i < width; i++) {
68 dest[i] = src[i];
69 dest[(height - 1) * dstride + i] = src[(height - 1) * sstride + i];
70 }
71
72 /* process the interior pixels */
73 for (k = 2; k < height; k++) {
74 dest += dstride;
75 src += sstride;
76
77 dest[0] = src[0];
78 for (j = 2, i = 1; j < width; j++, i++) {
79 p[0] = src[i - sstride];
80 p[1] = src[i - 1];
81 p[2] = src[i];
82 p[3] = src[i + 1];
83 p[4] = src[i + sstride];
84 PIX_SORT (p[0], p[1]);
85 PIX_SORT (p[3], p[4]);
86 PIX_SORT (p[0], p[3]);
87 PIX_SORT (p[1], p[4]);
88 PIX_SORT (p[1], p[2]);
89 PIX_SORT (p[2], p[3]);
90 PIX_SORT (p[1], p[2]);
91 /* debug line */
92 if(k > 24 && k < 32) dest[i] = 0;
93 else
94 dest[i] = p[2];
95 }
96 dest[i] = src[i];
97 }
98}
99/* nothing past this point */
diff --git a/src/kernels/oclconv/oclconv.cpp b/src/kernels/oclconv/oclconv.cpp
new file mode 100644
index 0000000..7d070e1
--- /dev/null
+++ b/src/kernels/oclconv/oclconv.cpp
@@ -0,0 +1,152 @@
1/******************************************************************************
2 * Copyright (c) 2013-2014, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28#define __CL_ENABLE_EXCEPTIONS
29#include <CL/cl.hpp>
30#include <iostream>
31#include <fstream>
32#include <cstdlib>
33#include "ocl_util.h"
34#include "conv.dsp_h"
35
36//#define VERBOSE
37using namespace cl;
38using namespace std;
39
40static const int NumElements = 1920 * 1200; /* Maximum resolution */
41
42static cl_uchar src [NumElements];
43static cl_uchar dst [NumElements];
44
45static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned char *data_out, int width, int height, int sstride, int dstride)
46{
47 cl_int err = CL_SUCCESS;
48 int bufsize = sizeof(src);
49#ifdef VERBOSE
50 ofstream logfile;
51 logfile.open ("/home/root/oclconv_log.txt", ios::out | ios::app);
52 logfile << "Entered oclconv_test, width=" << width << " height=" << height << " dstride=" << dstride << " sstride=" << sstride << '\n';
53 logfile.close();
54#endif
55 unsigned char *ptr_src = src;
56 unsigned char *ptr_data_in = data_in;
57 for (int y=0; y < height; y++)
58 {
59 memcpy (ptr_src, ptr_data_in, width);
60 ptr_src += sstride;
61 ptr_data_in += sstride;
62 }
63#ifdef VERBOSE
64 logfile.open ("/home/root/oclconv_log.txt", ios::out | ios::app);
65 logfile << "Transfer done, starting openCL specific commands" << '\n';
66 logfile.close();
67#endif
68 try
69 {
70 Context context(CL_DEVICE_TYPE_ACCELERATOR);
71 std::vector<Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
72 devices.resize(1); // resize to 1 since we are only running on 1 DSP
73
74 Buffer bufA (context, CL_MEM_READ_ONLY, bufsize);
75 Buffer bufDst (context, CL_MEM_WRITE_ONLY, bufsize);
76
77 Program::Binaries binary(1, make_pair(conv_dsp_bin,sizeof(conv_dsp_bin)));
78 Program program = Program(context, devices, binary);
79 program.build(devices);
80 Kernel kernel(program, kernelName);
81 kernel.setArg(0, bufA);
82 kernel.setArg(1, bufDst);
83 kernel.setArg(2, width);
84 kernel.setArg(3, height);
85 kernel.setArg(4, dstride);
86 kernel.setArg(5, sstride);
87
88 Event ev1,ev2,ev3,ev4;
89
90 CommandQueue Q(context, devices[0], CL_QUEUE_PROFILING_ENABLE);
91
92 Q.enqueueWriteBuffer(bufA, CL_FALSE, 0, bufsize, src, NULL, &ev1);
93 //Q.enqueueNDRangeKernel(kernel, NullRange, NDRange(NumVecElements),
94 // NDRange(WorkGroupSize), NULL, &ev3);
95 Q.enqueueTask (kernel, NULL, &ev3);
96 Q.enqueueReadBuffer (bufDst, CL_TRUE, 0, bufsize, dst, NULL, &ev4);
97 }
98 catch (Error err)
99 { cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; }
100#ifdef VERBOSE
101 logfile.open ("/home/root/oclconv_log.txt", ios::out | ios::app);
102 logfile << "OpenCL done, start transfer to output array!" << '\n';
103 logfile.close();
104#endif
105
106 unsigned char *ptr_dst = dst;
107 unsigned char *ptr_data_out = data_out;
108 for (int y=0; y < height; y++)
109 {
110 memcpy (ptr_data_out, ptr_dst, width);
111 ptr_dst += dstride;
112 ptr_data_out += dstride;
113 }
114
115#ifdef VERBOSE
116 logfile.open ("/home/root/oclconv_log.txt", ios::out | ios::app);
117 logfile << "Success!" << endl;
118 logfile.close();
119#endif
120 return 0;
121}
122
123#ifdef __cplusplus
124extern "C" {
125#endif
126int oclconv_kernel(int kernel_type, int filter_size,
127 unsigned char *data_in, unsigned char *data_out,
128 int width, int height, int dstride, int sstride)
129{
130 int retval = -1;
131 switch(kernel_type)
132 {
133 case 0: /* Median */
134 if(filter_size == 5) {
135 retval = oclconv_imgproc("Median2x2", data_in, data_out, width, height, sstride, dstride);
136 } else if(filter_size == 9) {
137 retval = oclconv_imgproc("Median3x3", data_in, data_out, width, height, sstride, dstride);
138 }
139 break;
140 case 1: /* Sobel */
141 if(filter_size == 9) {
142 retval = oclconv_imgproc("Sobel3x3", data_in, data_out, width, height, sstride, dstride);
143 }
144 break;
145 default:
146 break;
147 }
148 return retval;
149}
150#ifdef __cplusplus
151}
152#endif