diff options
author | Hongmei Gou | 2015-12-04 15:55:57 -0600 |
---|---|---|
committer | Hongmei Gou | 2015-12-04 15:55:57 -0600 |
commit | 057fbc5c2521796a050a5fd85868a4b41a6e9fa9 (patch) | |
tree | f83fbd8637cd3d6261e584a40f9478c59a4b1c29 | |
parent | 23350ff8a753c10ce7e5436bd845b98a9e8c636d (diff) | |
download | qt-opencv-opencl-opengl-multithreaded-057fbc5c2521796a050a5fd85868a4b41a6e9fa9.tar.gz qt-opencv-opencl-opengl-multithreaded-057fbc5c2521796a050a5fd85868a4b41a6e9fa9.tar.xz qt-opencv-opencl-opengl-multithreaded-057fbc5c2521796a050a5fd85868a4b41a6e9fa9.zip |
qt-opencv-multithreaded: opencl - add dsp related files
* Add dsp source file phillips.cl
* Add oclMakefile to generate header file from phillips.cl
Signed-off-by: Hongmei Gou <h-gou@ti.com>
3 files changed, 241 insertions, 1 deletions
diff --git a/video_analytics/qt-opencv-multithreaded/src/oclMakefile b/video_analytics/qt-opencv-multithreaded/src/oclMakefile new file mode 100644 index 0000000..7c6d2a1 --- /dev/null +++ b/video_analytics/qt-opencv-multithreaded/src/oclMakefile | |||
@@ -0,0 +1,13 @@ | |||
1 | DSP_INCLUDE = -I$(TI_OCL_CGT_INSTALL)/include | ||
2 | DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/cgt-c6x/include | ||
3 | DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/opencl | ||
4 | CL6X = cl6x -mv6600 --abi=eabi $(DSP_INCLUDE) | ||
5 | CLOCL = clocl | ||
6 | |||
7 | UNAME_M :=$(shell uname -m) | ||
8 | |||
9 | phillips.dsp_h: phillips.cl ${TI_DSPLIB_DIR}/packages/ti/dsplib/lib/dsplib.ae66 | ||
10 | $(CLOCL) --txt $^ | ||
11 | |||
12 | clean: | ||
13 | rm -f phillips.dsp_h | ||
diff --git a/video_analytics/qt-opencv-multithreaded/src/phillips.cl b/video_analytics/qt-opencv-multithreaded/src/phillips.cl new file mode 100644 index 0000000..24a06b6 --- /dev/null +++ b/video_analytics/qt-opencv-multithreaded/src/phillips.cl | |||
@@ -0,0 +1,226 @@ | |||
1 | /** | ||
2 | * @file phillips.cl | ||
3 | * | ||
4 | * @brief | ||
5 | * This file includes OpenCL wave surface simulation | ||
6 | * kernels that are dispatched to C66x cores | ||
7 | * | ||
8 | * \par | ||
9 | * ============================================================================ | ||
10 | * @n (C) Copyright 2014-2015, Texas Instruments, Inc. | ||
11 | * | ||
12 | * Redistribution and use in source and binary forms, with or without | ||
13 | * modification, are permitted provided that the following conditions | ||
14 | * are met: | ||
15 | * | ||
16 | * Redistributions of source code must retain the above copyright | ||
17 | * notice, this list of conditions and the following disclaimer. | ||
18 | * | ||
19 | * Redistributions in binary form must reproduce the above copyright | ||
20 | * notice, this list of conditions and the following disclaimer in the | ||
21 | * documentation and/or other materials provided with the | ||
22 | * distribution. | ||
23 | * | ||
24 | * Neither the name of Texas Instruments Incorporated nor the names of | ||
25 | * its contributors may be used to endorse or promote products derived | ||
26 | * from this software without specific prior written permission. | ||
27 | * | ||
28 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
29 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
30 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
31 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
32 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
33 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
34 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
35 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
36 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
37 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
38 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
39 | * | ||
40 | * \par | ||
41 | */ | ||
42 | |||
43 | void DSPF_sp_fftSPxSP(int N, local float *x, local float *w, local float *y, unsigned char *brev, int n_min, int offset, int n_max); | ||
44 | |||
45 | #define NXY 256 | ||
46 | kernel void ocl_DSPF_sp_fftSPxSP(int N, global float *x, global float *w, global float *y, int n_min, int offset, int n_max) | ||
47 | { | ||
48 | local float xloc[2*NXY]; | ||
49 | local float yloc[2*NXY]; | ||
50 | local float wloc[2*NXY]; | ||
51 | int i; | ||
52 | for(i = 0; i < 2*N; i ++) | ||
53 | { | ||
54 | xloc[i] = x[i]; | ||
55 | yloc[i] = y[i]; | ||
56 | wloc[i] = w[i]; | ||
57 | } | ||
58 | DSPF_sp_fftSPxSP (N, xloc, wloc, yloc, 0, n_min, offset, n_max); | ||
59 | for(i = 0; i < 2*N; i ++) | ||
60 | { | ||
61 | x[i] = xloc[i]; | ||
62 | y[i] = yloc[i]; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | void DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(global float2 *c, global float *twiddles) | ||
67 | { | ||
68 | local float input[2*NXY]; | ||
69 | local float output[2*NXY]; | ||
70 | local float twloc[2*NXY]; | ||
71 | int i,j; | ||
72 | int nx, ny, idx; | ||
73 | |||
74 | for(int i = 0; i < 2*NXY; i ++) twloc[i] = twiddles[i]; | ||
75 | /* Transform the rows */ | ||
76 | for (j=0;j<NXY;j++) | ||
77 | { | ||
78 | for (i=0;i<NXY;i++) | ||
79 | { | ||
80 | idx = j * NXY + i; | ||
81 | input[2 * i + 0] = c[idx].x; | ||
82 | input[2 * i + 1] = -c[idx].y; | ||
83 | } | ||
84 | DSPF_sp_fftSPxSP (NXY, input, twloc, output, 0, 4, 0, NXY); | ||
85 | for (i=0;i<NXY;i++) | ||
86 | { | ||
87 | idx = j * NXY + i; | ||
88 | c[idx].x = output[2*i+0] / (float)(NXY/NXY); | ||
89 | c[idx].y = -output[2*i+1] / (float)(NXY/NXY); | ||
90 | } | ||
91 | } | ||
92 | |||
93 | /* Transform the columns (transpose included) */ | ||
94 | for (i=0;i<NXY;i++) | ||
95 | { | ||
96 | for (j=0;j<NXY;j++) | ||
97 | { | ||
98 | idx = j * NXY + i; | ||
99 | input[2*j+0] = c[idx].x; | ||
100 | input[2*j+1] = -c[idx].y; | ||
101 | } | ||
102 | DSPF_sp_fftSPxSP (NXY, input, twloc, output, 0, 4, 0, NXY); | ||
103 | for (j=0;j<NXY;j++) | ||
104 | { | ||
105 | idx = j * NXY + i; | ||
106 | c[idx].x = output[2*j+0] / (float)(NXY); | ||
107 | c[idx].y = -output[2*j+1] / (float)(NXY); | ||
108 | } | ||
109 | } | ||
110 | } | ||
111 | |||
112 | kernel void ocl_DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(global float2 *spect, global float *tw) | ||
113 | { | ||
114 | DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(spect, tw); | ||
115 | } | ||
116 | |||
117 | |||
118 | #define OPENCL_PI_F 3.14159f | ||
119 | #define RAND_MAX 32768 | ||
120 | |||
121 | float phillips( float kx, float ky, float windSpeed, float windDirection, float A) | ||
122 | { | ||
123 | float fWindDir = windDirection * OPENCL_PI_F / 180.0f; | ||
124 | float L = windSpeed * windSpeed / 9.81f; | ||
125 | float w = L / 75; | ||
126 | float ksqr = kx * kx + ky * ky; | ||
127 | float kdotwhat = kx * cosf(fWindDir) + ky * sinf(fWindDir); | ||
128 | |||
129 | kdotwhat = max(0.0f, kdotwhat); | ||
130 | float result = (float) (A * (pow(2.7183f, -1.0f / (L * L * ksqr)) * (kdotwhat * kdotwhat)) / (ksqr * ksqr * ksqr)); | ||
131 | float damp = (float) expf(-ksqr * w * w); | ||
132 | damp = expf(-1.0 / (ksqr * L * L)); | ||
133 | result *= kdotwhat < 0.0f ? 0.25f : 1.0f; | ||
134 | return (result * damp); | ||
135 | } | ||
136 | |||
137 | kernel void ocl_generateHeightField(global float2 * h0, global float2 *h0_spectrum, global float *twiddles, unsigned int fftInputH, unsigned int fftInputW, unsigned int patchSize, float windSpeed, float windDir) | ||
138 | { | ||
139 | float fMultiplier, fAmplitude, fTheta; | ||
140 | |||
141 | for (unsigned int y = 0; y<fftInputH; y++) | ||
142 | { | ||
143 | for (unsigned int x = 0; x<fftInputW; x++) | ||
144 | { | ||
145 | float kx = OPENCL_PI_F * x / (float) patchSize; | ||
146 | float ky = 2.0f * OPENCL_PI_F * y / (float) patchSize; | ||
147 | float Er = 2.0f * rand() / (float) RAND_MAX - 1.0f; | ||
148 | float Ei = 2.0f * rand() / (float) RAND_MAX - 1.0f; | ||
149 | if (!((kx == 0.f) && (ky == 0.f))) { | ||
150 | fMultiplier = sqrt(phillips(kx,ky, windSpeed, windDir, 1.0)); | ||
151 | } else { | ||
152 | fMultiplier = 0.f; | ||
153 | } | ||
154 | #if 1 | ||
155 | /* randNormal emulation */ | ||
156 | fAmplitude = rand() / (float)RAND_MAX; | ||
157 | fAmplitude += rand() / (float)RAND_MAX; | ||
158 | fAmplitude += rand() / (float)RAND_MAX; | ||
159 | fAmplitude += rand() / (float)RAND_MAX; | ||
160 | fAmplitude *= 0.25f; | ||
161 | /* fTheta = rand() / (float) RAND_MAX * 2 * OPENCL_PI_F; */ | ||
162 | #else | ||
163 | fAmplitude = (rand() / (float)RAND_MAX + 0.5f) / sqrt(2.0f); | ||
164 | #endif | ||
165 | float h0_re = fMultiplier * fAmplitude * Er; | ||
166 | float h0_im = fMultiplier * fAmplitude * Ei; | ||
167 | float2 tmp = {h0_re, h0_im}; | ||
168 | int i = y*fftInputW+x; | ||
169 | h0_spectrum[i] = h0[i] = tmp; | ||
170 | } | ||
171 | } | ||
172 | DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(h0, twiddles); | ||
173 | |||
174 | } | ||
175 | |||
176 | /* Update spectrum in time, to simulate movement */ | ||
177 | // complex math functions | ||
178 | float2 __attribute__((always_inline)) conjugate(float2 arg) | ||
179 | { | ||
180 | return (float2)(arg.x, -arg.y); | ||
181 | } | ||
182 | |||
183 | float2 __attribute__((always_inline)) complex_mult(float2 arg1, float2 arg2) | ||
184 | { | ||
185 | return (float2)(arg1.x * arg2.x - arg1.y * arg2.y, arg1.x * arg2.y + arg1.y * arg2.x); | ||
186 | } | ||
187 | |||
188 | float2 __attribute__((always_inline)) complex_add(float2 arg1, float2 arg2) | ||
189 | { | ||
190 | return (float2)(arg1.x + arg2.x, arg1.y + arg2.y); | ||
191 | } | ||
192 | |||
193 | float2 __attribute__((always_inline)) complex_exp(float arg) | ||
194 | { | ||
195 | float s; | ||
196 | float c; | ||
197 | s = sincos(arg, &c); | ||
198 | return (float2)(c,s); | ||
199 | } | ||
200 | |||
201 | kernel void ocl_updateHeightMap( global float *twiddles, global float2* h0, global float2* ht, float t, unsigned int patchSize) | ||
202 | { | ||
203 | for(unsigned int y = 0; y < NXY; y ++) | ||
204 | { | ||
205 | for(unsigned int x = 0; x < NXY; x ++) | ||
206 | { | ||
207 | unsigned int i = y*NXY+x; | ||
208 | float2 k; | ||
209 | k.x = OPENCL_PI_F * x / (float) patchSize; | ||
210 | k.y = 2.0f * OPENCL_PI_F * y / (float) patchSize; | ||
211 | // calculate dispersion w(k) | ||
212 | float k_len = sqrtf(k.x*k.x + k.y*k.y); | ||
213 | float w = sqrtf(9.81f * k_len); | ||
214 | |||
215 | float2 h0_k = h0[i]; | ||
216 | float2 h0_mk = h0[(((NXY-1)-y)*NXY)+x]; | ||
217 | //float2 h_tilda = complex_add (complex_mult( h0_k, complex_exp(w * t)), complex_mult(conjugate(h0_mk), complex_exp(-w * t))); | ||
218 | float2 h_tilda = complex_mult( h0_k, complex_exp(w * t)) + complex_mult(conjugate(h0_mk), complex_exp(-w * t)); | ||
219 | // output frequency-space complex values | ||
220 | ht[i] = h_tilda; | ||
221 | } | ||
222 | } | ||
223 | DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(ht, twiddles); | ||
224 | } | ||
225 | |||
226 | /* nothing past this point */ | ||
diff --git a/video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro b/video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro index 916752f..58c0066 100644 --- a/video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro +++ b/video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro | |||
@@ -40,7 +40,8 @@ HEADERS += \ | |||
40 | SharedImageBuffer.h \ | 40 | SharedImageBuffer.h \ |
41 | Buffer.h \ | 41 | Buffer.h \ |
42 | Gesture.h \ | 42 | Gesture.h \ |
43 | WaveSimulationThread.h | 43 | WaveSimulationThread.h \ |
44 | phillips.dsp_h | ||
44 | 45 | ||
45 | FORMS += \ | 46 | FORMS += \ |
46 | MainWindow.ui \ | 47 | MainWindow.ui \ |