summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHongmei Gou2015-12-04 15:55:57 -0600
committerHongmei Gou2015-12-04 15:55:57 -0600
commit057fbc5c2521796a050a5fd85868a4b41a6e9fa9 (patch)
treef83fbd8637cd3d6261e584a40f9478c59a4b1c29
parent23350ff8a753c10ce7e5436bd845b98a9e8c636d (diff)
downloadqt-opencv-opencl-opengl-multithreaded-057fbc5c2521796a050a5fd85868a4b41a6e9fa9.tar.gz
qt-opencv-opencl-opengl-multithreaded-057fbc5c2521796a050a5fd85868a4b41a6e9fa9.tar.xz
qt-opencv-opencl-opengl-multithreaded-057fbc5c2521796a050a5fd85868a4b41a6e9fa9.zip
qt-opencv-multithreaded: opencl - add dsp related files
* Add dsp source file phillips.cl * Add oclMakefile to generate header file from phillips.cl Signed-off-by: Hongmei Gou <h-gou@ti.com>
-rw-r--r--video_analytics/qt-opencv-multithreaded/src/oclMakefile13
-rw-r--r--video_analytics/qt-opencv-multithreaded/src/phillips.cl226
-rw-r--r--video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro3
3 files changed, 241 insertions, 1 deletions
diff --git a/video_analytics/qt-opencv-multithreaded/src/oclMakefile b/video_analytics/qt-opencv-multithreaded/src/oclMakefile
new file mode 100644
index 0000000..7c6d2a1
--- /dev/null
+++ b/video_analytics/qt-opencv-multithreaded/src/oclMakefile
@@ -0,0 +1,13 @@
1DSP_INCLUDE = -I$(TI_OCL_CGT_INSTALL)/include
2DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/cgt-c6x/include
3DSP_INCLUDE += -I$(TARGET_ROOTDIR)/usr/share/ti/opencl
4CL6X = cl6x -mv6600 --abi=eabi $(DSP_INCLUDE)
5CLOCL = clocl
6
7UNAME_M :=$(shell uname -m)
8
9phillips.dsp_h: phillips.cl ${TI_DSPLIB_DIR}/packages/ti/dsplib/lib/dsplib.ae66
10 $(CLOCL) --txt $^
11
12clean:
13 rm -f phillips.dsp_h
diff --git a/video_analytics/qt-opencv-multithreaded/src/phillips.cl b/video_analytics/qt-opencv-multithreaded/src/phillips.cl
new file mode 100644
index 0000000..24a06b6
--- /dev/null
+++ b/video_analytics/qt-opencv-multithreaded/src/phillips.cl
@@ -0,0 +1,226 @@
1/**
2 * @file phillips.cl
3 *
4 * @brief
5 * This file includes OpenCL wave surface simulation
6 * kernels that are dispatched to C66x cores
7 *
8 * \par
9 * ============================================================================
10 * @n (C) Copyright 2014-2015, Texas Instruments, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 *
19 * Redistributions in binary form must reproduce the above copyright
20 * notice, this list of conditions and the following disclaimer in the
21 * documentation and/or other materials provided with the
22 * distribution.
23 *
24 * Neither the name of Texas Instruments Incorporated nor the names of
25 * its contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 *
40 * \par
41*/
42
43void DSPF_sp_fftSPxSP(int N, local float *x, local float *w, local float *y, unsigned char *brev, int n_min, int offset, int n_max);
44
45#define NXY 256
46kernel void ocl_DSPF_sp_fftSPxSP(int N, global float *x, global float *w, global float *y, int n_min, int offset, int n_max)
47{
48 local float xloc[2*NXY];
49 local float yloc[2*NXY];
50 local float wloc[2*NXY];
51 int i;
52 for(i = 0; i < 2*N; i ++)
53 {
54 xloc[i] = x[i];
55 yloc[i] = y[i];
56 wloc[i] = w[i];
57 }
58 DSPF_sp_fftSPxSP (N, xloc, wloc, yloc, 0, n_min, offset, n_max);
59 for(i = 0; i < 2*N; i ++)
60 {
61 x[i] = xloc[i];
62 y[i] = yloc[i];
63 }
64}
65
66void DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(global float2 *c, global float *twiddles)
67{
68 local float input[2*NXY];
69 local float output[2*NXY];
70 local float twloc[2*NXY];
71 int i,j;
72 int nx, ny, idx;
73
74 for(int i = 0; i < 2*NXY; i ++) twloc[i] = twiddles[i];
75 /* Transform the rows */
76 for (j=0;j<NXY;j++)
77 {
78 for (i=0;i<NXY;i++)
79 {
80 idx = j * NXY + i;
81 input[2 * i + 0] = c[idx].x;
82 input[2 * i + 1] = -c[idx].y;
83 }
84 DSPF_sp_fftSPxSP (NXY, input, twloc, output, 0, 4, 0, NXY);
85 for (i=0;i<NXY;i++)
86 {
87 idx = j * NXY + i;
88 c[idx].x = output[2*i+0] / (float)(NXY/NXY);
89 c[idx].y = -output[2*i+1] / (float)(NXY/NXY);
90 }
91 }
92
93 /* Transform the columns (transpose included) */
94 for (i=0;i<NXY;i++)
95 {
96 for (j=0;j<NXY;j++)
97 {
98 idx = j * NXY + i;
99 input[2*j+0] = c[idx].x;
100 input[2*j+1] = -c[idx].y;
101 }
102 DSPF_sp_fftSPxSP (NXY, input, twloc, output, 0, 4, 0, NXY);
103 for (j=0;j<NXY;j++)
104 {
105 idx = j * NXY + i;
106 c[idx].x = output[2*j+0] / (float)(NXY);
107 c[idx].y = -output[2*j+1] / (float)(NXY);
108 }
109 }
110}
111
112kernel void ocl_DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(global float2 *spect, global float *tw)
113{
114 DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(spect, tw);
115}
116
117
118#define OPENCL_PI_F 3.14159f
119#define RAND_MAX 32768
120
121float phillips( float kx, float ky, float windSpeed, float windDirection, float A)
122{
123 float fWindDir = windDirection * OPENCL_PI_F / 180.0f;
124 float L = windSpeed * windSpeed / 9.81f;
125 float w = L / 75;
126 float ksqr = kx * kx + ky * ky;
127 float kdotwhat = kx * cosf(fWindDir) + ky * sinf(fWindDir);
128
129 kdotwhat = max(0.0f, kdotwhat);
130 float result = (float) (A * (pow(2.7183f, -1.0f / (L * L * ksqr)) * (kdotwhat * kdotwhat)) / (ksqr * ksqr * ksqr));
131 float damp = (float) expf(-ksqr * w * w);
132 damp = expf(-1.0 / (ksqr * L * L));
133 result *= kdotwhat < 0.0f ? 0.25f : 1.0f;
134 return (result * damp);
135}
136
137kernel void ocl_generateHeightField(global float2 * h0, global float2 *h0_spectrum, global float *twiddles, unsigned int fftInputH, unsigned int fftInputW, unsigned int patchSize, float windSpeed, float windDir)
138{
139float fMultiplier, fAmplitude, fTheta;
140
141 for (unsigned int y = 0; y<fftInputH; y++)
142 {
143 for (unsigned int x = 0; x<fftInputW; x++)
144 {
145 float kx = OPENCL_PI_F * x / (float) patchSize;
146 float ky = 2.0f * OPENCL_PI_F * y / (float) patchSize;
147 float Er = 2.0f * rand() / (float) RAND_MAX - 1.0f;
148 float Ei = 2.0f * rand() / (float) RAND_MAX - 1.0f;
149 if (!((kx == 0.f) && (ky == 0.f))) {
150 fMultiplier = sqrt(phillips(kx,ky, windSpeed, windDir, 1.0));
151 } else {
152 fMultiplier = 0.f;
153 }
154#if 1
155 /* randNormal emulation */
156 fAmplitude = rand() / (float)RAND_MAX;
157 fAmplitude += rand() / (float)RAND_MAX;
158 fAmplitude += rand() / (float)RAND_MAX;
159 fAmplitude += rand() / (float)RAND_MAX;
160 fAmplitude *= 0.25f;
161 /* fTheta = rand() / (float) RAND_MAX * 2 * OPENCL_PI_F; */
162#else
163 fAmplitude = (rand() / (float)RAND_MAX + 0.5f) / sqrt(2.0f);
164#endif
165 float h0_re = fMultiplier * fAmplitude * Er;
166 float h0_im = fMultiplier * fAmplitude * Ei;
167 float2 tmp = {h0_re, h0_im};
168 int i = y*fftInputW+x;
169 h0_spectrum[i] = h0[i] = tmp;
170 }
171 }
172 DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(h0, twiddles);
173
174}
175
176/* Update spectrum in time, to simulate movement */
177// complex math functions
178float2 __attribute__((always_inline)) conjugate(float2 arg)
179{
180 return (float2)(arg.x, -arg.y);
181}
182
183float2 __attribute__((always_inline)) complex_mult(float2 arg1, float2 arg2)
184{
185 return (float2)(arg1.x * arg2.x - arg1.y * arg2.y, arg1.x * arg2.y + arg1.y * arg2.x);
186}
187
188float2 __attribute__((always_inline)) complex_add(float2 arg1, float2 arg2)
189{
190 return (float2)(arg1.x + arg2.x, arg1.y + arg2.y);
191}
192
193float2 __attribute__((always_inline)) complex_exp(float arg)
194{
195float s;
196float c;
197 s = sincos(arg, &c);
198 return (float2)(c,s);
199}
200
201kernel void ocl_updateHeightMap( global float *twiddles, global float2* h0, global float2* ht, float t, unsigned int patchSize)
202{
203 for(unsigned int y = 0; y < NXY; y ++)
204 {
205 for(unsigned int x = 0; x < NXY; x ++)
206 {
207 unsigned int i = y*NXY+x;
208 float2 k;
209 k.x = OPENCL_PI_F * x / (float) patchSize;
210 k.y = 2.0f * OPENCL_PI_F * y / (float) patchSize;
211 // calculate dispersion w(k)
212 float k_len = sqrtf(k.x*k.x + k.y*k.y);
213 float w = sqrtf(9.81f * k_len);
214
215 float2 h0_k = h0[i];
216 float2 h0_mk = h0[(((NXY-1)-y)*NXY)+x];
217 //float2 h_tilda = complex_add (complex_mult( h0_k, complex_exp(w * t)), complex_mult(conjugate(h0_mk), complex_exp(-w * t)));
218 float2 h_tilda = complex_mult( h0_k, complex_exp(w * t)) + complex_mult(conjugate(h0_mk), complex_exp(-w * t));
219 // output frequency-space complex values
220 ht[i] = h_tilda;
221 }
222 }
223 DSPF_sp_ifft2D_FIXEDSIZE_SPxSP(ht, twiddles);
224}
225
226/* nothing past this point */
diff --git a/video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro b/video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro
index 916752f..58c0066 100644
--- a/video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro
+++ b/video_analytics/qt-opencv-multithreaded/src/qt-opencv-multithreaded.pro
@@ -40,7 +40,8 @@ HEADERS += \
40 SharedImageBuffer.h \ 40 SharedImageBuffer.h \
41 Buffer.h \ 41 Buffer.h \
42 Gesture.h \ 42 Gesture.h \
43 WaveSimulationThread.h 43 WaveSimulationThread.h \
44 phillips.dsp_h
44 45
45FORMS += \ 46FORMS += \
46 MainWindow.ui \ 47 MainWindow.ui \