diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/gstdsp66videokernel.c | 1 | ||||
-rw-r--r-- | src/gstdsp66videokernel.h | 3 | ||||
-rw-r--r-- | src/kernels/oclconv/conv.cl | 5 | ||||
-rw-r--r-- | src/kernels/oclconv/oclconv.cpp | 80 |
4 files changed, 88 insertions, 1 deletions
diff --git a/src/gstdsp66videokernel.c b/src/gstdsp66videokernel.c index 48c7764..8174710 100644 --- a/src/gstdsp66videokernel.c +++ b/src/gstdsp66videokernel.c | |||
@@ -68,6 +68,7 @@ static const GEnumValue dsp66_video_kerneltype[] = { | |||
68 | {GST_DSP66_VIDEO_KERNELTYPE_MEDIAN, "Kernel median", "0"}, | 68 | {GST_DSP66_VIDEO_KERNELTYPE_MEDIAN, "Kernel median", "0"}, |
69 | {GST_DSP66_VIDEO_KERNELTYPE_SOBEL, "Kernel sobel", "1"}, | 69 | {GST_DSP66_VIDEO_KERNELTYPE_SOBEL, "Kernel sobel", "1"}, |
70 | {GST_DSP66_VIDEO_KERNELTYPE_CONV, "Kernel conv", "2"}, | 70 | {GST_DSP66_VIDEO_KERNELTYPE_CONV, "Kernel conv", "2"}, |
71 | {GST_DSP66_VIDEO_KERNELTYPE_CANNY, "Kernel canny", "3"}, | ||
71 | {0, NULL, NULL}, | 72 | {0, NULL, NULL}, |
72 | }; | 73 | }; |
73 | 74 | ||
diff --git a/src/gstdsp66videokernel.h b/src/gstdsp66videokernel.h index 9d79b15..b78354b 100644 --- a/src/gstdsp66videokernel.h +++ b/src/gstdsp66videokernel.h | |||
@@ -52,7 +52,8 @@ typedef enum | |||
52 | typedef enum { | 52 | typedef enum { |
53 | GST_DSP66_VIDEO_KERNELTYPE_MEDIAN = 0, | 53 | GST_DSP66_VIDEO_KERNELTYPE_MEDIAN = 0, |
54 | GST_DSP66_VIDEO_KERNELTYPE_SOBEL = 1, | 54 | GST_DSP66_VIDEO_KERNELTYPE_SOBEL = 1, |
55 | GST_DSP66_VIDEO_KERNELTYPE_CONV = 2 | 55 | GST_DSP66_VIDEO_KERNELTYPE_CONV = 2, |
56 | GST_DSP66_VIDEO_KERNELTYPE_CANNY = 3 | ||
56 | } GstDsp66VideoKernelType; | 57 | } GstDsp66VideoKernelType; |
57 | 58 | ||
58 | struct _GstDsp66VideoKernel { | 59 | struct _GstDsp66VideoKernel { |
diff --git a/src/kernels/oclconv/conv.cl b/src/kernels/oclconv/conv.cl index 6c86e52..dab866e 100644 --- a/src/kernels/oclconv/conv.cl +++ b/src/kernels/oclconv/conv.cl | |||
@@ -31,6 +31,11 @@ | |||
31 | void IMG_median_3x3_8 (const unsigned char *restrict in_data, int cols, unsigned char * restrict out_data); | 31 | void IMG_median_3x3_8 (const unsigned char *restrict in_data, int cols, unsigned char * restrict out_data); |
32 | void IMG_sobel_3x3_8 (const unsigned char *restrict in_data, unsigned char *restrict out_data, int rows, int cols); | 32 | void IMG_sobel_3x3_8 (const unsigned char *restrict in_data, unsigned char *restrict out_data, int rows, int cols); |
33 | void IMG_conv_3x3_i8_c8s (const unsigned char *restrict in_data, unsigned char *restrict out_data, int cols, const char *restrict mask, int shift); | 33 | void IMG_conv_3x3_i8_c8s (const unsigned char *restrict in_data, unsigned char *restrict out_data, int cols, const char *restrict mask, int shift); |
34 | void VLIB_Canny_Edge_Detection (ARGS); | ||
35 | kernel void canny_tiocl(ARGS) | ||
36 | { | ||
37 | VLIB_Canny_Edge_Detection(pInput, pBufGradX, pBufGradY, pBufMag, pBufOut, pScratch, numItems, width, height); | ||
38 | } | ||
34 | 39 | ||
35 | kernel void Median3x3(global const uchar* src, global uchar *dest, | 40 | kernel void Median3x3(global const uchar* src, global uchar *dest, |
36 | const int width, const int height, | 41 | const int width, const int height, |
diff --git a/src/kernels/oclconv/oclconv.cpp b/src/kernels/oclconv/oclconv.cpp index c7b722c..00ef898 100644 --- a/src/kernels/oclconv/oclconv.cpp +++ b/src/kernels/oclconv/oclconv.cpp | |||
@@ -37,6 +37,7 @@ | |||
37 | using namespace cl; | 37 | using namespace cl; |
38 | using namespace std; | 38 | using namespace std; |
39 | 39 | ||
40 | /*----------------------------------------------------------------------------------------------------------------------*/ | ||
40 | static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned char *data_out, int width, int height, int sstride, int dstride) | 41 | static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned char *data_out, int width, int height, int sstride, int dstride) |
41 | { | 42 | { |
42 | cl_int err = CL_SUCCESS; | 43 | cl_int err = CL_SUCCESS; |
@@ -47,6 +48,7 @@ static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned ch | |||
47 | logfile << "Entered oclconv_test, width=" << width << " height=" << height << " dstride=" << dstride << " sstride=" << sstride << '\n'; | 48 | logfile << "Entered oclconv_test, width=" << width << " height=" << height << " dstride=" << dstride << " sstride=" << sstride << '\n'; |
48 | logfile.close(); | 49 | logfile.close(); |
49 | #endif | 50 | #endif |
51 | |||
50 | try | 52 | try |
51 | { | 53 | { |
52 | Context context(CL_DEVICE_TYPE_ACCELERATOR); | 54 | Context context(CL_DEVICE_TYPE_ACCELERATOR); |
@@ -84,7 +86,81 @@ static int oclconv_imgproc(char *kernelName, unsigned char *data_in, unsigned ch | |||
84 | #endif | 86 | #endif |
85 | return 0; | 87 | return 0; |
86 | } | 88 | } |
89 | /*----------------------------------------------------------------------------------------------------------------------*/ | ||
90 | static bool canny_first_call = true; | ||
91 | static Context canny_ctx(CL_DEVICE_TYPE_ACCELERATOR); | ||
92 | static CommandQueue *canny_Q; | ||
93 | static Buffer *canny_gradX, *canny_gradY, *canny_mag, *canny_scratch, *canny_numItems; | ||
94 | static Kernel *canny_K; | ||
95 | static Buffer *canny_input, *canny_output; | ||
96 | |||
97 | /****************************************************************************** | ||
98 | * Canny Edge Detection - called on ARM, but algorithm dispatched to 1 DSP | ||
99 | * | ||
100 | * Note: Assumes arguments are invariant from call 1 to call N. If this is | ||
101 | * not the case, then move buffer creation back to the every frame section | ||
102 | * rather than being cached in frame 0. | ||
103 | * | ||
104 | * Note: Also assumes total size is not overly large as it allocates temp | ||
105 | * buffers in MSMC | ||
106 | *****************************************************************************/ | ||
107 | static int ocl_canny(unsigned char *data_in, unsigned char *data_out, unsigned short height, unsigned short width) | ||
108 | { | ||
109 | int numelem = (int)height*(int)width; | ||
110 | try | ||
111 | { | ||
112 | Event canny_ev, canny_ev1, canny_ev2; | ||
113 | /*--------------------------------------------------------------------- | ||
114 | * Cache as much OpenCL plumbing on the first call, so the cost is not | ||
115 | * repeatedfor every frame. | ||
116 | *--------------------------------------------------------------------*/ | ||
117 | if (canny_first_call) | ||
118 | { | ||
119 | canny_first_call = false; | ||
87 | 120 | ||
121 | std::vector<Device> devices = canny_ctx.getInfo<CL_CONTEXT_DEVICES>(); | ||
122 | devices.resize(1); // resize to 1 since we are only running on 1 DSP | ||
123 | canny_Q = new CommandQueue(canny_ctx, devices[0]); | ||
124 | |||
125 | canny_input = new Buffer(canny_ctx, CL_MEM_READ_ONLY, numelem); | ||
126 | canny_output = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem); | ||
127 | canny_gradX = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short)); | ||
128 | canny_gradY = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short)); | ||
129 | canny_mag = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem*sizeof(short)); | ||
130 | canny_scratch = new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, numelem); | ||
131 | canny_numItems= new Buffer(canny_ctx, CL_MEM_WRITE_ONLY, sizeof(int)); | ||
132 | |||
133 | /*--------------------------------------------------------------------- | ||
134 | * Compile the Kernel Source for the devices | ||
135 | *--------------------------------------------------------------------*/ | ||
136 | Program::Binaries binary(1, make_pair(conv_dsp_bin, sizeof(conv_dsp_bin))); | ||
137 | Program program(canny_ctx, devices, binary); | ||
138 | program.build(devices); | ||
139 | canny_K = new Kernel(program, "canny_tiocl"); | ||
140 | |||
141 | canny_K->setArg(0, *canny_input); | ||
142 | canny_K->setArg(1, *canny_gradX); | ||
143 | canny_K->setArg(2, *canny_gradY); | ||
144 | canny_K->setArg(3, *canny_mag); | ||
145 | canny_K->setArg(4, *canny_output); | ||
146 | canny_K->setArg(5, *canny_scratch); | ||
147 | canny_K->setArg(6, *canny_numItems); | ||
148 | canny_K->setArg(7, width); | ||
149 | canny_K->setArg(8, height); | ||
150 | } | ||
151 | |||
152 | canny_Q->enqueueWriteBuffer(*canny_input, CL_FALSE, 0, numelem, data_in, NULL, &canny_ev1); | ||
153 | canny_Q->enqueueTask(*canny_K, 0, &canny_ev); | ||
154 | canny_Q->enqueueReadBuffer (*canny_output, CL_TRUE, 0, numelem, data_out, NULL, &canny_ev2); | ||
155 | } | ||
156 | catch (cl::Error err) | ||
157 | { | ||
158 | cerr << "ERROR: " << err.what() << "(" << err.err() << ")" << endl; | ||
159 | return (-1); | ||
160 | } | ||
161 | return 0; | ||
162 | } | ||
163 | /*----------------------------------------------------------------------------------------------------------------------*/ | ||
88 | 164 | ||
89 | #ifdef __cplusplus | 165 | #ifdef __cplusplus |
90 | extern "C" { | 166 | extern "C" { |
@@ -114,6 +190,10 @@ int oclconv_kernel(int kernel_type, int filter_size, | |||
114 | return 0; | 190 | return 0; |
115 | } | 191 | } |
116 | break; | 192 | break; |
193 | case 3: /* vlib canny */ | ||
194 | /* filter size is ignored */ | ||
195 | retval = ocl_canny (data_in, data_out, width, height); /* input and output stride assumed to be == width */ | ||
196 | break; | ||
117 | default: | 197 | default: |
118 | break; | 198 | break; |
119 | } | 199 | } |