/****************************************************************************** * Copyright (c) 2015, Texas Instruments Incorporated - http://www.ti.com * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the name of Texas Instruments Incorporated nor the * names of its contributors may be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ //////////////////////////////////////////////////////////////////////////////// // This file shows an example of the ARM side code of an ARM+DSP library. //////////////////////////////////////////////////////////////////////////////// #include #include #include #include #include #include #define __CL_ENABLE_EXCEPTIONS #include using namespace std; using namespace cl; // Both cl and std namespace define size_t, so we must be explicit. #define size_t ::size_t #ifndef TEST_FAT_BINARY #include "ocl_util.h" #endif #include // Include the DSP header file generated by "clocl --text" utility #include "test_lib_ocl.dsp_h" Context* ocl_context = NULL; std::vector* ocl_devices = NULL; CommandQueue* ocl_Q = NULL; Program::Binaries* ocl_binary = NULL; Program* ocl_program = NULL; #define TEST_DEBUG_PRINT(...) {fprintf(stdout,"TEST DEBUG: ");fprintf(stdout,__VA_ARGS__);} #define TEST_MSMC_MEM_SIZE (4608*1024UL) //4.5M Bytes #define TEST_DDR_MEM_SIZE (10240*1024UL) //10M Bytes /*============================================================================== * This is the library API to be called by the application on the ARM side. * This function will offload processing task to DSP for acceleration. *============================================================================*/ void lib_kernel_arm(double *in1_ptr, double *in2_ptr, double *out_ptr, int data_size) { TEST_DEBUG_PRINT("Allocating memory to run DSP processing kernel.\n"); // Allocate scratch memory for DSP acceleration void *med_mem_ptr = __malloc_msmc(TEST_MSMC_MEM_SIZE); void *slow_mem_ptr = __malloc_ddr(TEST_DDR_MEM_SIZE); if(med_mem_ptr==NULL) { med_mem_ptr = __malloc_ddr(TEST_MSMC_MEM_SIZE); TEST_DEBUG_PRINT("MSMC Memory is NOT available on the device. Reallocate memory from DDR.\n"); } if(med_mem_ptr==NULL || slow_mem_ptr==NULL) { TEST_DEBUG_PRINT("Memory allocation error!\n"); exit(0); } else { TEST_DEBUG_PRINT("Memory allocated.\n"); TEST_DEBUG_PRINT("Medium memory allocated at: 0x%08x.\n", (unsigned int)med_mem_ptr); TEST_DEBUG_PRINT("Slow memory allocated at: 0x%08x.\n", (unsigned int)slow_mem_ptr); } // Prepare for OpenCL dispatching. cl_int err; // Create OpenCL Kernel from OpenCL Program. Kernel* ocl_kernel = new Kernel(*ocl_program, "lib_kernel_ocl", &err); if(err != CL_SUCCESS){ TEST_DEBUG_PRINT("Kernel \"lib_kernel_ocl\" creation failed!\n"); exit(0); } TEST_DEBUG_PRINT("Kernel \"lib_kernel_ocl\" created!\n"); // Assign kernel arguments Buffer mem_MSMC(*ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, TEST_MSMC_MEM_SIZE, med_mem_ptr); ocl_kernel->setArg(0, mem_MSMC); ocl_kernel->setArg(1, TEST_MSMC_MEM_SIZE); Buffer mem_DDR(*ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, TEST_DDR_MEM_SIZE, slow_mem_ptr); ocl_kernel->setArg(2, mem_DDR); ocl_kernel->setArg(3, TEST_DDR_MEM_SIZE); Buffer buf_input1(*ocl_context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, data_size*sizeof(double), in1_ptr); ocl_kernel->setArg(4, buf_input1); Buffer buf_input2(*ocl_context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, data_size*sizeof(double), in2_ptr); ocl_kernel->setArg(5, buf_input2); Buffer buf_output(*ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, data_size*sizeof(double), out_ptr); ocl_kernel->setArg(6, buf_output); ocl_kernel->setArg(7, data_size); // Dispatch the kernel to DSP TEST_DEBUG_PRINT("Enqueuing task and dispatching kernel \"lib_kernel_ocl\" to DSP.\n"); Event ocl_event; err = ocl_Q->enqueueTask(*ocl_kernel, 0, &ocl_event); if(err != CL_SUCCESS){ TEST_DEBUG_PRINT("Enqueuing task failed!\n"); exit(0); } // Wait fo the kernel to complete execution ocl_event.wait(); delete(ocl_kernel); ocl_kernel = NULL; TEST_DEBUG_PRINT("Finished executing kernel \"lib_kernel_ocl\".\n"); delete(ocl_Q); delete(ocl_program); delete(ocl_binary); delete(ocl_devices); delete(ocl_context); __free_msmc(med_mem_ptr); __free_ddr(slow_mem_ptr); } /* lib_kernel_arm */ /*============================================================================== * This function performs OpenCL initialization. *============================================================================*/ void lib_init_arm() { const unsigned char* bin; cl_int err; // Create the OpenCL program using the DSP binary #ifdef TEST_FAT_BINARY // DSP library object code converted to text in DSP header test_lib_ocl.dsp_h bin = (unsigned char *)test_lib_ocl_dsp_bin; const size_t bin_length = test_lib_ocl_dsp_bin_len; #else // DSP library object code archived in test_lib_ocl.out const char binary[] = "./test_lib_ocl.out"; unsigned int bin_length; bin_length = ocl_read_binary(binary, (char*&)bin); #endif /* FAT_BINARY */ // OpenCL initialization TEST_DEBUG_PRINT("Initializing OpenCL\n"); // Create an OpenCL context with the accelerator device ocl_context = new Context(CL_DEVICE_TYPE_ACCELERATOR); ocl_devices = new std::vector (ocl_context->getInfo()); ocl_binary = new Program::Binaries(1, std::make_pair(bin, bin_length)); ocl_program = new Program(*ocl_context, *ocl_devices, *ocl_binary); ocl_program->build(*ocl_devices); for (int d = 0; d < ocl_devices->size(); d++) { std::string str; ocl_devices[0][d].getInfo(CL_DEVICE_NAME, &str); cout << " DEVICE: " << str << endl; bool ti_dsp = (str.find("C66") != std::string::npos); cl_uint bignum; ocl_devices[0][d].getInfo(CL_DEVICE_MAX_CLOCK_FREQUENCY, &bignum); cout << " Frequency : " << (double) bignum / 1e3 << " GHz"<< endl; cl_ulong longnum; ocl_devices[0][d].getInfo(CL_DEVICE_GLOBAL_MEM_SIZE, &longnum); cout << " Glb Mem : " << setw(7) << longnum / 1024 << " KB" << endl; if (ti_dsp) { ocl_devices[0][d].getInfo(CL_DEVICE_MSMC_MEM_SIZE_TI, &longnum); cout << " Msmc Mem : " << setw(7) << longnum / 1024 << " KB" << endl; } ocl_devices[0][d].getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &longnum); cout << " Loc Mem : " << setw(7) << longnum / 1024 << " KB" << endl; } // Create an OpenCL command queue ocl_Q = new CommandQueue(*ocl_context, ocl_devices[0][0], CL_QUEUE_PROFILING_ENABLE, &err); if(err != CL_SUCCESS){ TEST_DEBUG_PRINT("Command queue creation failed!\n"); exit(0); } if(ocl_Q==NULL){ TEST_DEBUG_PRINT("Command queue creation failed!\n"); exit(0); } #ifndef TEST_FAT_BINARY delete [] bin; #endif /* FAT_BINARY */ TEST_DEBUG_PRINT("OpenCL initialized\n"); return; } /* lib_init_arm */ void lib_find_l2_start() { cl_int err; // Create OpenCL Kernel from OpenCL Program to print L2 SRAM start address. Kernel* ocl_kernel = new Kernel(*ocl_program, "ocl_find_l2_start", &err); if(err != CL_SUCCESS){ TEST_DEBUG_PRINT("OpencCL kernel \"ocl_find_l2_start\" creation failed!\n"); exit(0); } TEST_DEBUG_PRINT("OpenCL kernel \"ocl_find_l2_start\" created.\n"); // Assign kernel arguments ocl_kernel->setArg(0, __local(1024)); // L2 start address will be passed to OCL kernel // Dispatch the kernel to DSP TEST_DEBUG_PRINT("Enqueuing task and dispatching kernel \"ocl_find_l2_start\" to DSP.\n"); Event ocl_event; err = ocl_Q->enqueueTask(*ocl_kernel, 0, &ocl_event); if(err != CL_SUCCESS){ TEST_DEBUG_PRINT("Enqueuing task failed!\n"); exit(0); } // Wait fo the kernel to complete execution ocl_event.wait(); delete(ocl_kernel); ocl_kernel = NULL; TEST_DEBUG_PRINT("Finished kernel \"ocl_find_l2_start\".\n"); } /* Nothing past this point */