Updated the Readme
[dante-enablement/k2g-dante-enablement.git] / dante-loopback-dsp.patch
1 --- /dev/null
2 +++ /home/root/dep/example-dsp/audio/Makefile.opcl
3 @@ -0,0 +1,6 @@
4 +all:
5 +       $(MAKE) -C lib -f Makefile.opcl
6 +       $(MAKE) -C loopback -f Makefile.opcl
7 +clean:
8 +       $(MAKE) -C lib clean -f Makefile.opcl
9 +       $(MAKE) -C loopback clean -f Makefile.opcl
10 --- /dev/null
11 +++ /home/root/dep/example-dsp/audio/lib/Makefile.opcl
12 @@ -0,0 +1,52 @@
13 +CXX ?= g++
14 +
15 +# path #
16 +SRC_PATH = src
17 +BUILD_PATH = build
18 +
19 +# executable # 
20 +LIB_NAME = libdep_audio.a
21 +
22 +# extensions #
23 +SRC_EXT = cpp
24 +
25 +SOURCES = $(shell find $(SRC_PATH) -name '*.$(SRC_EXT)' | sort -k 1nr | cut -f2-)
26 +OBJECTS = $(SOURCES:$(SRC_PATH)/%.$(SRC_EXT)=$(BUILD_PATH)/%.o)
27 +DEPS = $(OBJECTS:.o=.d)
28 +
29 +# flags #
30 +COMPILE_FLAGS = -Wno-error=nonnull -fPIC -Wall -Werror -Wno-error=unknown-pragmas -Wno-error=unused-function -Wno-error=strict-aliasing -fno-aggressive-loop-optimizations  -O2 -g -DNDEBUG -fPIC   -std=gnu++11
31 +INCLUDES = -I include
32 +
33 +.PHONY: default_target
34 +default_target: release
35 +
36 +.PHONY: release
37 +release: export CXXFLAGS := $(CXXFLAGS) $(COMPILE_FLAGS) -lOpenCL -locl_util -DOPENCL_OFFLOAD
38 +release: dirs
39 +       @$(MAKE) all
40 +
41 +.PHONY: dirs
42 +dirs:
43 +       @echo "Creating directories"
44 +       @mkdir -p $(dir $(OBJECTS))
45 +
46 +.PHONY: clean
47 +clean:
48 +       @$(RM) -r $(BUILD_PATH)
49 +       @$(RM) -r $(LIB_NAME)
50 +
51 +# checks the executable and symlinks to the output
52 +.PHONY: all
53 +all: $(LIB_NAME)
54 +
55 +# Creation of the executable
56 +$(LIB_NAME): $(OBJECTS)
57 +       ar -r $(LIB_NAME) $(OBJECTS)
58 +
59 +# Source file rules
60 +# After the first compilation they will be joined with the rules from the
61 +# dependency files to provide header dependencies
62 +$(BUILD_PATH)/%.o: $(SRC_PATH)/%.$(SRC_EXT)
63 +       @echo "Compiling: $< -> $@"
64 +       $(CXX) $(CXXFLAGS) $(INCLUDES) -MP -MMD -c $< -o $@
65 --- /home/root/dep/example/audio/lib/include/dante/Buffers.hpp
66 +++ /home/root/dep/example-dsp/audio/lib/include/dante/Buffers.hpp
67 @@ -93,6 +93,11 @@
68  
69         void * get() const;
70  
71 +#ifdef OPENCL_OFFLOAD
72 +        void * get_cmemTxData() const;
73 +        void * get_cmemRxData() const;
74 +#endif
75 +
76         static std::string getErrorMessage(int err);
77  
78  private:
79 @@ -130,6 +135,14 @@
80         // Get a pointer to the start of the audio buffer for the given Dante RX channel 
81         void * getDanteRxChannel(unsigned int index) const;
82  
83 +#ifdef OPENCL_OFFLOAD
84 +        // Get a pointer to the start of the audio buffer for the given Dante TX channel
85 +        void * getDanteTxChannel_cmem(void) const;                             
86 +                                                                                        
87 +        // Get a pointer to the start of the audio buffer for the given Dante RX channel
88 +        void * getDanteRxChannel_cmem(void) const;                             
89 +#endif
90 +
91  private:
92         bool mGlobalNamespace;
93         SharedMemory mSharedMemory;
94 @@ -141,6 +154,10 @@
95         const timing_object_subheader_t * mTimingObjectSubheader;
96         std::vector<void *> mDanteTxChannels;
97         std::vector<void *> mDanteRxChannels;
98 +#ifdef OPENCL_OFFLOAD
99 +        void * mDanteTxChannels_cmem;
100 +        void * mDanteRxChannels_cmem;
101 +#endif
102  };
103  
104  
105 --- /home/root/dep/example/audio/lib/src/DanteBuffers.cpp
106 +++ /home/root/dep/example-dsp/audio/lib/src/DanteBuffers.cpp
107 @@ -1,5 +1,10 @@
108  #include "dante/Buffers.hpp"
109  
110 +#ifdef OPENCL_OFFLOAD
111 +#include <iostream>
112 +#include "ocl_util.h"
113 +#endif
115  #ifndef _WIN32
116  #include <errno.h>
117  #endif
118 @@ -7,7 +12,11 @@
119  namespace Dante
120  {
121  
122 -Buffers::Buffers() : mGlobalNamespace(), mSharedMemory(), mDanteTxSharedMemory(), mDanteRxSharedMemory(), mHeader(), mTimingObjectSubheader(), mDanteTxChannels(), mDanteRxChannels()
123 +Buffers::Buffers() : mGlobalNamespace(), mSharedMemory(), mDanteTxSharedMemory(), mDanteRxSharedMemory(), mHeader(),
124 +                    mTimingObjectSubheader(), mDanteTxChannels(), mDanteRxChannels()
125 +#ifdef OPENCL_OFFLOAD
126 +                    , mDanteTxChannels_cmem(), mDanteRxChannels_cmem()
127 +#endif
128  {
129  
130  }
131 @@ -30,6 +39,7 @@
132  #endif
133         }
134         mGlobalNamespace = globalNamespace;
136         int result = mSharedMemory.connect(name, false);
137         if (result)
138         {
139 @@ -37,15 +47,26 @@
140                 return result;
141         }
142         uint8_t * buf8 = (uint8_t *) mSharedMemory.get();
144         mHeader = (const buffer_header_t *) mSharedMemory.get();
146 +#ifdef OPENCL_OFFLOAD
147 +        std::cerr << "Buffer::mData/buf8 " << "(" << ((void*)buf8) << ")" << std::endl;
148 +        std::cerr << "Buffer1::mHeader " << "(" << mSharedMemory.get() << ")" << std::endl;
149 +        std::cerr << "Buffer2::cmemTxData " << "(" << mSharedMemory.get_cmemTxData() << ")" << std::endl;
150 +        std::cerr << "Buffer2::cmemRxData " << "(" << mSharedMemory.get_cmemRxData() << ")" << std::endl;
151 +#endif
153         if (mHeader->metadata.timing_object_subheader_offset_bytes)
154         {
155                 mTimingObjectSubheader = (timing_object_subheader_t *) (buf8 + mHeader->metadata.timing_object_subheader_offset_bytes);
156         }
158         mDanteTxChannels.resize(mHeader->audio.num_tx_channels);
159         mDanteRxChannels.resize(mHeader->audio.num_rx_channels);
160         uint8_t * tx0 = nullptr;
161         uint8_t * rx0 = nullptr;
163         if (mHeader->metadata.flags & DANTE_BUFFERS_FLAG__SEPARATE_CHANNEL_MEMORY)
164         {
165                 result = mDanteTxSharedMemory.connect(name + "Tx", false);
166 @@ -62,20 +83,34 @@
167                 }
168                 tx0 = ((uint8_t *) mDanteTxSharedMemory.get()) + mHeader->metadata.first_tx_channel_offset_bytes;
169                 rx0 = ((uint8_t *) mDanteRxSharedMemory.get()) + mHeader->metadata.first_rx_channel_offset_bytes;
170 +#ifdef OPENCL_OFFLOAD
171 +               // To do list - need separate cmem area for separate channel memory
172 +#endif
173         }
174         else
175         {
176                 tx0 = buf8 + mHeader->metadata.first_tx_channel_offset_bytes;
177                 rx0 = buf8 + mHeader->metadata.first_rx_channel_offset_bytes;
178         }
180         for (unsigned int c = 0; c < mHeader->audio.num_tx_channels; c++)
181         {
182                 mDanteTxChannels[c] = (void *) (tx0 + c * mHeader->audio.bytes_per_channel);
183         }
185 +#ifdef OPENCL_OFFLOAD
186 +       mDanteTxChannels_cmem =  mSharedMemory.get_cmemTxData();
187 +#endif
189         for (unsigned int c = 0; c < mHeader->audio.num_rx_channels; c++)
190         {
191                 mDanteRxChannels[c] = (void *) (rx0 + c * mHeader->audio.bytes_per_channel);
192         }
194 +#ifdef OPENCL_OFFLOAD
195 +       mDanteRxChannels_cmem =  mSharedMemory.get_cmemRxData();
196 +#endif
198         return 0;
199  }
200  
201 @@ -114,11 +149,25 @@
202         return mDanteTxChannels[index];
203  }
204  
205 -// Get a pointer to the start of the audio buffer for the given RX channel 
206 +// Get a pointer to the start of the audio buffer for the given RX channel
207  void * Buffers::getDanteRxChannel(unsigned int index) const
208  {
209         return mDanteRxChannels[index];
210  }
212 +#ifdef OPENCL_OFFLOAD
213 +// Get a pointer to the start of the audio buffer for the given TX channel
214 +void * Buffers::getDanteTxChannel_cmem(void) const
215 +{
216 +        return (mSharedMemory.get_cmemTxData());
217 +}
219 +// Get a pointer to the start of the audio buffer for the given RX channel
220 +void * Buffers::getDanteRxChannel_cmem(void) const
221 +{
222 +        return (mSharedMemory.get_cmemRxData());
223 +}
224 +#endif
225  
226  };
227  
228 --- /home/root/dep/example/audio/lib/src/DanteRunner.cpp
229 +++ /home/root/dep/example-dsp/audio/lib/src/DanteRunner.cpp
230 @@ -46,9 +46,21 @@
231  
232         bool resetNeeded = true;        
233  
234 +#ifdef OPENCL_OFFLOAD
235 +        std::cerr << "In Runner .. mHeader " << mHeader << std::endl;
236 +        std::cerr << "In Runner .. time obj " << mTimingObjectSubheader << std::endl;
237 +#endif
239         mResetCount = mHeader->time.reset_count;
240         mPeriodCount = mHeader->time.period_count;
241  
242 +#ifdef OPENCL_OFFLOAD
243 +        std::cerr << "In Runner .. time cnt " << mHeader->time.reset_count << std::endl;
244 +        std::cerr << "In Runner .. magic " << mHeader->metadata.magic_marker << std::endl;
245 +        std::cerr << "In Runner .. rst cnt " << mResetCount << std::endl;
246 +        std::cerr << "In Runner .. period cnt " << mPeriodCount << std::endl;
247 +#endif
249         while (running && mHeader->metadata.magic_marker)
250         {
251                 if (mResetCount != mHeader->time.reset_count || resetNeeded)
252 @@ -60,6 +72,10 @@
253                         }
254                         mResetCount = mHeader->time.reset_count;
255                         mPeriodCount = mHeader->time.period_count;
256 +#ifdef OPENCL_OFFLOAD
257 +                        std::cerr << "calling Reset" << std::endl;
258 +#endif
260                         if (epochResetFn) epochResetFn();
261                         continue;
262                 }
263 --- /home/root/dep/example/audio/lib/src/DanteSharedMemory.cpp
264 +++ /home/root/dep/example-dsp/audio/lib/src/DanteSharedMemory.cpp
265 @@ -7,7 +7,11 @@
266  
267  #include <iostream>
268  
269 +#ifdef OPENCL_OFFLOAD
270 +#include "CL/cl.hpp"
271 +#endif
272  
274  namespace Dante
275  {
276  
277 @@ -18,7 +22,11 @@
278  class SharedMemory::Impl
279  {
280  public:
281 -       Impl() : mName(), mFileDesc(), mSize(), mData() {}
282 +       Impl() : mName(), mFileDesc(), mSize(), mData()
283 +#ifdef OPENCL_OFFLOAD
284 +               , cmemTxData(), cmemRxData()
285 +#endif
286 +                {}
287         ~Impl() { disconnect(); }
288  
289         int connect(const std::string & name, bool globalNamespace)
290 @@ -50,6 +58,7 @@
291                         disconnect();
292                         return err;
293                 }
295                 if (s.st_size == 0)
296                 {
297                         // Creator has not yet resized the memory, try again later
298 @@ -60,6 +69,8 @@
299  
300                 // Map the memory
301                 void * raw = mmap(NULL, mSize, (PROT_READ | PROT_WRITE), MAP_SHARED, mFileDesc, 0);
304                 if (raw == MAP_FAILED)
305                 {
306                         // Creator has not yet resized, try again later
307 @@ -69,11 +80,36 @@
308                 }
309                 mData = raw;
310  
311 +#ifdef OPENCL_OFFLOAD
312 +                uint32_t cmem_size = (16 * 1024 * 4 * ((const buffer_header_t *)mData)->audio.num_rx_channels);
313 +               //uint32_t cmem_size = (mSize - 
314 +               //      (((const buffer_header_t *)mData)->metadata.first_tx_channel_offset_bytes)) / 2;
316 +               cmemTxData = (void *) __malloc_ddr(cmem_size);
317 +                cmemRxData = (void *) __malloc_ddr(cmem_size);
319 +                std::cerr << "CMEM alloc:  " << "(" << cmemTxData << ") "
320 +                                            << "(" << cmemRxData << ") "
321 +                                            << "(" << (cmem_size)  << ")" << std::endl;
322 +#endif
323                 return 0;
324         }
325  
326         void disconnect()
327         {
329 +#ifdef OPENCL_OFFLOAD
330 +               if (cmemTxData)
331 +               {
332 +                       __free_ddr(cmemTxData);
333 +                       cmemTxData = NULL;
334 +               }
335 +                if (cmemRxData)
336 +                {
337 +                        __free_ddr(cmemRxData);
338 +                        cmemRxData = NULL;
339 +                }
340 +#endif
341                 if (mData)
342                 {
343                         if (mSize) 
344 @@ -99,11 +135,20 @@
345  
346         void * get() const { return mData; }
347  
348 +#ifdef OPENCL_OFFLOAD                   
349 +        void * get_cmemTxData() const { return cmemTxData; }
350 +        void * get_cmemRxData() const { return cmemRxData; }
351 +#endif
353  private:
354         std::string mName;
355         int mFileDesc;
356         size_t mSize;
357         void * mData;
358 +#ifdef OPENCL_OFFLOAD
359 +       void * cmemTxData;
360 +        void * cmemRxData;
361 +#endif
362  };
363  
364  SharedMemory::SharedMemory() 
365 @@ -130,6 +175,17 @@
366  {
367         return mImpl->get();
368  }
370 +#ifdef OPENCL_OFFLOAD
371 +void * SharedMemory::get_cmemTxData() const
372 +{
373 +        return mImpl->get_cmemTxData();
374 +}
375 +void * SharedMemory::get_cmemRxData() const
376 +{
377 +        return mImpl->get_cmemRxData();
378 +}
379 +#endif
380  
381  std::string SharedMemory::getErrorMessage(int err)
382  {
383 --- /home/root/dep/example/audio/loopback/DanteLoopback.cpp
384 +++ /home/root/dep/example-dsp/audio/loopback/DanteLoopback.cpp
385 @@ -13,6 +13,16 @@
386  #include <sys/sdt.h>
387  #endif
388  
389 +#ifdef OPENCL_OFFLOAD
390 +#define __CL_ENABLE_EXCEPTIONS
391 +#include <cstdlib>
392 +#include <utility>
393 +#include <fstream>
394 +#include "ocl_util.h"
395 +#include <CL/cl.hpp>
396 +#include "DspKernel.dsp_h"
397 +#endif
399  static bool g_running = true;
400  
401  static void signal_handler(int sig)
402 @@ -25,28 +35,112 @@
403  class Loopback
404  {
405  public:
406 -       Loopback(Dante::Buffers & buffers, int txLatencySamples) 
407 +       Loopback(Dante::Buffers & buffers, int txLatencySamples)
408                 : mBuffers(buffers), mTxLatencySamples(txLatencySamples), mSamplesPerPeriod(), mSamplesPerChannel(),
409                   mDanteTxHeadSamples(), mDanteRxHeadSamples(), mDanteTxChannels(), mDanteRxChannels(), mNumLoopbackChannels()
410         {}
411         ~Loopback() {}
412  
413 -       // Unrolled RX and TX
414 -       void workUnwrappedDanteRxDanteTx(unsigned int numSamples)
415 +#ifdef OPENCL_OFFLOAD
416 +       void ocl_init(void)
417         {
418 -               assert(mDanteTxHeadSamples + numSamples <= mSamplesPerChannel);
419 -               assert(mDanteRxHeadSamples + numSamples <= mSamplesPerChannel);
420 -               //std::cerr << "Transferring " << numSamples << " from " << mReadHead << " to " << mWriteHead << std::endl;
421 -               unsigned int numBytes = numSamples * sizeof(int32_t);
422 -               for (unsigned int c = 0; c < mNumLoopbackChannels; c++)
423 -               {
424 -                       // We copy Dante RX -> Dante RX
425 -                       memcpy(mDanteTxChannels[c] + mDanteTxHeadSamples, mDanteRxChannels[c] + mDanteRxHeadSamples, numBytes);
426 -               }
427 -               mDanteTxHeadSamples += numSamples;
428 -               mDanteRxHeadSamples += numSamples;
429 +               uint64_t t0, t1;
430 +               cText = cl::Context(CL_DEVICE_TYPE_ACCELERATOR);
431 +               devices = cText.getInfo<CL_CONTEXT_DEVICES>();
432 +               cQue = cl::CommandQueue(cText, devices[0]);
433 +               dspBinary = cl::Program::Binaries(1, std::make_pair(DspKernel_dsp_bin,
434 +                                             sizeof(DspKernel_dsp_bin)));
436 +               Prog = cl::Program(cText, devices, dspBinary);
437 +               Prog.build(devices);
439 +               std::cerr << "OpenCL Dummy call...." << std::endl;
440 +                kTemp = cl::Kernel(Prog, "null");
441 +                cl::KernelFunctor null = kTemp.bind(cQue, cl::NDRange(1), cl::NDRange(1));
443 +                t0 = Dante::getMonotonicValue();                               
444 +                null().wait();
445 +                t1 = Dante::getMonotonicValue();
447 +               std::cerr << "OpenCL Dummy call Elapsed (with load) " << (t1-t0)  << std::endl;
448 +               
449 +               Knl = cl::Kernel(Prog, "ocl_cmemcpy");
450         }
451 +#endif
452  
453 +        // Unrolled RX and TX
454 +#ifdef OPENCL_OFFLOAD
455 +        void workUnwrappedDanteRxDanteTx(unsigned int numSamples)
456 +        {
458 +                try
459 +                {
460 +                       assert(mDanteTxHeadSamples + numSamples <= mSamplesPerChannel);
461 +                       assert(mDanteRxHeadSamples + numSamples <= mSamplesPerChannel);
462 +                       unsigned int numBytes = numSamples * sizeof(int32_t);
464 +                        if (numBytes == 0)
465 +                        {
466 +                                return;
467 +                        }
469 +                       for (unsigned int c = 0; c < mNumLoopbackChannels; c++)
470 +                       {
471 +                               // Copy Dante Rx -> CMEM Rx
472 +                                memcpy((void*)(((uint8_t*)get_cmem_rx()) + (numBytes * c)),
473 +                                         ((void *)(mDanteRxChannels[c] + mDanteRxHeadSamples)), numBytes);
474 +                       }
476 +                       cl::Buffer bufA(cText, CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR,
477 +                               (unsigned int) (mNumLoopbackChannels * numBytes),
478 +                               (void *) mDanteTxChannels_cmem);
479 +                       cl::Buffer bufB(cText, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
480 +                               (unsigned int) (mNumLoopbackChannels * numBytes),
481 +                               (void *) mDanteRxChannels_cmem);
483 +                        // set up arguments
484 +                        Knl.setArg(0, bufA);
485 +                        Knl.setArg(1, bufB);
486 +                        Knl.setArg(2, numBytes);
488 +                       cl::Event E;
489 +                       cQue.enqueueNDRangeKernel(Knl, cl::NullRange, cl::NDRange(mNumLoopbackChannels), 
490 +                                                 cl::NDRange(1), NULL, &E);
491 +                       E.wait();
493 +                        for (unsigned int c = 0; c < mNumLoopbackChannels; c++)
494 +                        {
495 +                               memcpy(mDanteTxChannels[c] + mDanteTxHeadSamples,
496 +                                       (void *)(((uint8_t*)get_cmem_tx()) + (numBytes * c)) , numBytes);
497 +                        }
499 +                       mDanteTxHeadSamples += numSamples;
500 +                       mDanteRxHeadSamples += numSamples;
501 +                }
502 +                catch (cl::Error& err)
503 +                {
504 +                        std::cerr << "ERROR: " << err.what() << "(" << err.err() << ", "
505 +                                << ocl_decode_error(err.err()) << ")" << std::endl;
506 +                        exit(-1);
507 +                }
508 +       }
509 +#else // #ifdef OPENCL_OFFLOAD
510 +        void workUnwrappedDanteRxDanteTx(unsigned int numSamples)
511 +        {
512 +                assert(mDanteTxHeadSamples + numSamples <= mSamplesPerChannel);
513 +                assert(mDanteRxHeadSamples + numSamples <= mSamplesPerChannel);
514 +                //std::cerr << "Transferring " << numSamples << " from " << mReadHead << " to " << mWriteHead << std::
515 +                unsigned int numBytes = numSamples * sizeof(int32_t);
516 +                for (unsigned int c = 0; c < mNumLoopbackChannels; c++)
517 +                {
518 +                        // We copy Dante RX -> Dante RX
519 +                        memcpy(mDanteTxChannels[c] + mDanteTxHeadSamples, mDanteRxChannels[c] + mDanteRxHeadSamples, numBytes);               }
520 +                mDanteTxHeadSamples += numSamples;
521 +                mDanteRxHeadSamples += numSamples;
522 +        }
523 +#endif // #ifdef OPENCL_OFFLOAD else
526         void workUnwrappedDanteRx(unsigned int numSamples)
527         {
528                 // Unwrap the Dante TX loop
529 @@ -100,17 +194,28 @@
530                 auto metadata = mBuffers.getHeader();
531                 mSamplesPerPeriod = metadata->time.samples_per_period;
532                 mSamplesPerChannel = metadata->audio.samples_per_channel;
533 -               
534                 mDanteTxChannels.resize(mBuffers.getHeader()->audio.num_tx_channels);
535                 for (unsigned int i = 0; i < mDanteTxChannels.size(); i++)
536                 {
537                         mDanteTxChannels[i] = (int32_t *) mBuffers.getDanteTxChannel(i);
538                 }
540 +#ifdef OPENCL_OFFLOAD
541 +               mDanteTxChannels_cmem = (uint8_t *) mBuffers.getDanteTxChannel_cmem();
542 +                std::cerr << "Reset::mDanteTxChannels_cmem: "
543 +                        << get_cmem_tx() << std::endl;
544 +#endif
545                 mDanteRxChannels.resize(mBuffers.getHeader()->audio.num_rx_channels);
546                 for (unsigned int i = 0; i < mDanteRxChannels.size(); i++)
547                 {
548                         mDanteRxChannels[i] = (const int32_t *) mBuffers.getDanteRxChannel(i);
549                 }
551 +#ifdef OPENCL_OFFLOAD
552 +               mDanteRxChannels_cmem = (uint8_t *) mBuffers.getDanteRxChannel_cmem();
553 +                std::cerr << "Reset::mDanteRxChannels_cmem: "
554 +                        << get_cmem_rx() << std::endl;
555 +#endif
556                 if (mDanteTxChannels.size() < mDanteRxChannels.size())
557                 {
558                         mNumLoopbackChannels = (unsigned int) mDanteTxChannels.size();
559 @@ -124,6 +229,18 @@
560                 mDanteTxHeadSamples = (unsigned int) ((metadata->time.period_count*mSamplesPerPeriod + mTxLatencySamples) % mSamplesPerChannel);
561         }
562  
563 +#ifdef OPENCL_OFFLOAD
564 +       void *get_cmem_tx(void)
565 +       {
566 +               return mBuffers.getDanteTxChannel_cmem();
567 +       }
569 +       void *get_cmem_rx(void)
570 +       {
571 +               return mBuffers.getDanteRxChannel_cmem();
572 +       }
573 +#endif
575  private:
576         Dante::Buffers & mBuffers;
577         unsigned int mTxLatencySamples;
578 @@ -134,7 +251,19 @@
579         unsigned int mDanteRxHeadSamples;
580         std::vector<int32_t *> mDanteTxChannels;
581         std::vector<const int32_t *> mDanteRxChannels;
582 -       unsigned int mNumLoopbackChannels;
583 +        unsigned int mNumLoopbackChannels;
585 +#ifdef OPENCL_OFFLOAD
586 +        cl::Context                 cText;
587 +        std::vector<cl::Device>     devices;
588 +        cl::CommandQueue            cQue;
589 +       cl::Program::Binaries       dspBinary;
590 +        cl::Program                 Prog;
591 +        cl::Kernel                  Knl;
592 +        cl::Kernel                  kTemp;
593 +        uint8_t *                  mDanteTxChannels_cmem;
594 +        uint8_t *                  mDanteRxChannels_cmem;
595 +#endif
596  };
597  
598  #ifdef WIN32
599 @@ -182,15 +311,26 @@
600                         return err;
601                 }
602                 std::cerr << "POST: policy=" << policy << " priority=" << param.sched_priority << std::endl;
603 +#ifdef OPENCL_OFFLOAD
604 +               loopback.ocl_init();
605 +#endif
606         }
607  #endif
608         
609         signal(SIGINT, signal_handler);
610  
611 +#ifdef OPENCL_OFFLOAD
612 +        // Move buffers.connect() out of while loop so OpenCL can have a dummy call to
613 +        // force DSP kernel be compiled and loaded to DSP
614 +       std::cerr << "Connecting..." << std::endl;
615 +       int result = buffers.connect("DanteEP", false);
616 +#endif
617         while (g_running)
618         {
619 +#ifndef OPENCL_OFFLOAD
620                 std::cerr << "Connecting..." << std::endl;
621                 int result = buffers.connect("DanteEP", false);
622 +#endif
623                 if (result)
624                 {
625                         std::cerr << "Error connecting to shared memory: " << Dante::SharedMemory::getErrorMessage(result) << std::endl;
626 @@ -207,6 +347,10 @@
627                 auto _reset = [&loopback]() { loopback.reset(); };
628                 runner.run(g_running, _work, _reset);
629  
630 +#ifdef OPENCL_OFFLOAD
631 +                std::cerr << "Running.." << g_running << std::endl;
632 +#endif
634                 std::cerr << "Disconnecting..." << std::endl;
635                 buffers.disconnect();
636                 std::cerr << "Disconnected" << std::endl;
637 @@ -218,3 +362,4 @@
638  
639         return 0;
640  }
642 --- /dev/null
643 +++ /home/root/dep/example-dsp/audio/loopback/DspKernel.cl
644 @@ -0,0 +1,21 @@
645 +#define BYTES_PER_CHANNEL 192000
646 +#define CMEM_ALIGNMENT 0
648 +kernel void null() { }
650 +kernel void ocl_cmemcpy(global char *cmem_tx, global char *cmem_rx, unsigned int len)
651 +{
652 +        int chanNum = get_global_id(0);
653 +#if 0
655 +       printf ("id = %d\n", get_global_id(0));
656 +       printf ("size = %d\n", get_global_size(0));
658 +       printf ("cmem_tx = %p\n", cmem_tx + (chanNum * len));
659 +        printf ("cmem_rx = %p\n", cmem_rx + (chanNum * len));
661 +       printf ("len = %d\n", len);
662 +#endif
663 +       memcpy (cmem_tx + (chanNum * len), 
664 +               cmem_rx + (chanNum * len), len);
665 +}
666 --- /dev/null
667 +++ /home/root/dep/example-dsp/audio/loopback/Makefile.opcl
668 @@ -0,0 +1,46 @@
669 +CXX ?= g++
670 +CLOCL ?= /usr/bin/clocl
672 +# executable # 
673 +BIN_NAME = DanteLoopback
674 +DSP_H_NAME = DspKernel.dsp_h
676 +SOURCES = DanteLoopback.cpp
677 +OBJECTS = $(SOURCES:%.cpp=%.o)
678 +#DEPS = $(OBJECTS:.o=.d)
680 +# flags #
681 +COMPILE_FLAGS = -Wno-error=nonnull -fPIC -Wall -Werror -Wno-error=unknown-pragmas -Wno-error=unused-function -Wno-error=strict-aliasing -fno-aggressive-loop-optimizations  -O2 -g -DNDEBUG -fPIC   -std=gnu++11
682 +INCLUDES = -I ../lib/include
683 +LIBS = ../lib/libdep_audio.a -lrt -lpthread -lOpenCL -locl_util -DOPENCL_OFFLOAD
686 +default_target: all
688 +.PHONY: clean
689 +clean:
690 +       $(RM) *.d *.o *.out $(BIN_NAME) $(DSP_H_NAME)
692 +# checks the executable and symlinks to the output
693 +.PHONY: all
694 +all: export CXXFLAGS := $(CXXFLAGS) $(COMPILE_FLAGS) -DOPENCL_OFFLOAD
695 +all: $(DSP_H_NAME) $(BIN_NAME) 
697 +%.out: %.cl
698 +       @echo Compiling $<
699 +       $(CLOCL) $(CLOCLFLAGS) $^
701 +%.dsp_h: %.cl
702 +       echo Compiling $<
703 +       $(CLOCL) -t $(CLOCLFLAGS) $^
705 +# Creation of the executable
706 +$(BIN_NAME): $(OBJECTS)
707 +       $(CXX) -o $@ $(OBJECTS) $(LIBS)
709 +# Source file rules
710 +# After the first compilation they will be joined with the rules from the
711 +# dependency files to provide header dependencies
712 +%.o: %.cpp
713 +       @echo "Compiling: $< -> $@"
714 +       $(CXX) $(CXXFLAGS) $(INCLUDES) -MP -MMD -c $< -o $@