4ac5ddf2a8d118c52ac9a19e721d882c7f02024a
[dense-linear-algebra-libraries/linalg.git] / src / ti / linalg / blasblisacc / src / ti_cblas_initfini.c
1 /******************************************************************************
2 * Copyright (c) 2013-2015, Texas Instruments Incorporated - http://www.ti.com/
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Texas Instruments Incorporated nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28 #include "ti_cblas_acc.h"
29 #include "../../ticblas/ticblas.h"
30 #include <pthread.h>
32 #ifdef TI_CBLAS_FAT_BINARY
33 #include "ti_cblas_kernel.dsp_h"
34 #endif
36 /*==============================================================================
37 * This file contains functions of the ARM wrapper of ARM+DSP CBLAS library.
38 * It has the initialization and finalization routines.
39 *
40 * The standard CBLAS API for each BLAS function can be found in file
41 * ti_cblas_cblas_<func_name>.c, such as ti_cblas_cblas_dgemm.c for DGEMM.
42 *============================================================================*/
44 #define TI_CBLAS_INITFINI_SUCCESS 0
45 #define TI_CBLAS_INITFINI_OCL_ERR 1
46 #define TI_CBLAS_INITFINI_BLI_ERR 2
48 /* Global variables */
49 Context* ti_cblas_ocl_context = NULL;
50 std::vector<Device>* ti_cblas_ocl_devices = NULL;
51 CommandQueue* ti_cblas_ocl_Q = NULL;
52 Program::Binaries* ti_cblas_ocl_binary = NULL;
53 Program* ti_cblas_ocl_program = NULL;
55 int ti_cblas_init_done = 0; /* flag to check if init is complete */
56 int ti_cblas_disable_debug = 0; /* runtime toggle to disable debug */
57 int ti_cblas_offload = TI_CBLAS_OFFLOAD_SIZE;
58 int TI_CBLAS_L1_OFFLOAD = TI_CBLAS_OFFLOAD_NONE;
59 int TI_CBLAS_L2_OFFLOAD = TI_CBLAS_OFFLOAD_NONE;
60 int TI_CBLAS_L3_OFFLOAD = TI_CBLAS_OFFLOAD_NONE;
62 pthread_cond_t CV;
63 pthread_mutex_t MUTEX;
65 /*============================================================================
66 * Function purpose: report error encoutered in ARM wrapper code.
67 *============================================================================*/
68 void ti_cblas_error(const char* msg, int code)
69 {
70 fprintf(stderr, "TI CBLAS wrapper ERROR: (%s,%d)\n", msg, code);
71 }
73 /*============================================================================
74 * Function purpose: initialize BLIS on both ARM and DSP
75 *============================================================================*/
76 int ti_blis_init(void)
77 {
78 int r_val = TI_CBLAS_INITFINI_SUCCESS;
80 /* Initialize BLIS on ARM */
81 TI_CBLAS_DEBUG_PRINT("Initializing BLIS on ARM...\n");
82 bli_init();
83 TI_CBLAS_DEBUG_PRINT("BLIS initialized on ARM.\n");
85 /* Initialize BLIS on DSP by offloading bli_init() on DSP */
86 TI_CBLAS_DEBUG_PRINT("Initializing BLIS on DSP...\n");
87 Event e;
88 Kernel* __K;
90 __K = ti_cblas_get_kernel("ocl_bli_init");
91 try
92 {
93 int err_code;
94 Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
95 __K->setArg(0, buf_err);
97 ti_cblas_ocl_Q->enqueueTask(*__K, 0, &e);
98 e.wait();
100 if(err_code != TICBLAS_SUCCESS) {
101 TI_CBLAS_DEBUG_PRINT("Error in offloaded ocl_bli_init with error code %d!\n", err_code);
102 r_val = TI_CBLAS_INITFINI_BLI_ERR;
103 }
105 ti_cblas_delete_kernel(__K);
106 TI_CBLAS_DEBUG_PRINT("BLIS DSP initialization finished.\n");
107 }
109 catch (Error err)
110 {
111 ti_cblas_delete_kernel(__K);
112 ti_cblas_error(err.what(),err.err());
113 r_val = TI_CBLAS_INITFINI_OCL_ERR;
114 }
116 return r_val;
117 } // ti_blis_init
120 /*============================================================================
121 * Function purpose: initialize and prepare for CBLAS calls:
122 * - parse the environment variables
123 * - initialize OpenCL
124 * - initialize BLIS
125 *
126 * Note: this function is invoked exactly once on startup, when any CBLAS function
127 * is called the first time.
128 *============================================================================*/
129 void ti_cblas_init(void)
130 {
131 #pragma omp critical (ti_cblas_init_critical)
132 {
133 /* Add code for interception */
134 #ifdef TI_CBLAS_DEBUG
135 char *no_debug_env = getenv("TI_CBLAS_NO_DEBUG");
136 if (no_debug_env) {
137 if (atoi(no_debug_env) > 0) {
138 ti_cblas_disable_debug = 1;
139 }
140 }
141 #endif
143 TI_CBLAS_DEBUG_PRINT("ti_cblas_init: Initializing OpenCL on first use..\n");
145 TI_CBLAS_PROFILE_START();
147 /* check environment variables */
148 const char *offload_env = getenv("TI_CBLAS_OFFLOAD");
149 if (!offload_env) {
150 TI_CBLAS_DEBUG_PRINT("Using build time default for offload: TI_CBLAS_OFFLOAD=%s\n", TI_CBLAS_OFFLOAD_DEF);
151 offload_env = TI_CBLAS_OFFLOAD_DEF;
152 }
153 else {
154 TI_CBLAS_DEBUG_PRINT("Using runtime override for offloads: TI_CBLAS_OFFLOAD=%s\n", offload_env);
155 }
156 if (offload_env) {
157 ti_cblas_offload = atoi(offload_env);
158 if (ti_cblas_offload == TI_CBLAS_OFFLOAD_NONE) {
159 TI_CBLAS_DEBUG_PRINT("Disabling all offloads\n");
160 }
161 }
163 /* 3-digit value: 012
164 * Left-most digit => L1 (0)
165 * Middle-digit => L2 (1)
166 * Right-most => L3 (2)
167 */
168 TI_CBLAS_L1_OFFLOAD = ti_cblas_offload / 100;
169 int tmp_offload = ti_cblas_offload % 100;
170 TI_CBLAS_L2_OFFLOAD = tmp_offload / 10;
171 TI_CBLAS_L3_OFFLOAD = tmp_offload % 10;
172 TI_CBLAS_DEBUG_PRINT("BLAS Offload values: L1=%d, L2=%d, L3=%d\n",
173 TI_CBLAS_L1_OFFLOAD, TI_CBLAS_L2_OFFLOAD, TI_CBLAS_L3_OFFLOAD);
174 if ((TI_CBLAS_L1_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE)) {
175 TI_CBLAS_ERROR_EXIT("Size-based offload NOT supported for BLAS Level 1 yet.\n");
176 }
177 if ((TI_CBLAS_L2_OFFLOAD == TI_CBLAS_OFFLOAD_SIZE)) {
178 TI_CBLAS_ERROR_EXIT("Size-based offload NOT supported for BLAS Level 2 yet.\n");
179 }
181 /*------------------------------------------------------------------------
182 * Read the offline compiled kernel module
183 *-----------------------------------------------------------------------*/
184 TI_CBLAS_DEBUG_PRINT("Reading Kernels\n");
185 const unsigned char* bin;
186 #ifdef TI_CBLAS_FAT_BINARY
187 bin = (unsigned char *)ti_cblas_kernel_dsp_bin;
188 const size_t bin_length = ti_cblas_kernel_dsp_bin_len;
189 #else
190 const char binary[] = "./ti_cblas_kernel.out";
191 unsigned int bin_length;
192 bin_length = ocl_read_binary(binary, (char*&)bin);
193 #endif /* FAT_BINARY */
195 /* OpenCL init */
196 TI_CBLAS_DEBUG_PRINT("Initializing OpenCL\n");
197 ti_cblas_ocl_context = new Context(CL_DEVICE_TYPE_ACCELERATOR);
198 ti_cblas_ocl_devices = new std::vector<Device> (ti_cblas_ocl_context->getInfo<CL_CONTEXT_DEVICES>());
199 ti_cblas_ocl_binary = new Program::Binaries(1, std::make_pair(bin, bin_length));
200 ti_cblas_ocl_program = new Program(*ti_cblas_ocl_context, *ti_cblas_ocl_devices, *ti_cblas_ocl_binary);
201 ti_cblas_ocl_program->build(*ti_cblas_ocl_devices);
202 ti_cblas_ocl_Q = new CommandQueue(*ti_cblas_ocl_context, ti_cblas_ocl_devices[0][0], CL_QUEUE_PROFILING_ENABLE);
204 #ifndef TI_CBLAS_FAT_BINARY
205 delete [] bin;
206 #endif /* FAT_BINARY */
208 TI_CBLAS_DEBUG_PRINT("OpenCL initialized\n");
210 /* Initializing pthreads */
211 TI_CBLAS_DEBUG_PRINT("Initializing Pthreads\n");
212 pthread_cond_init (&CV, 0);
213 pthread_mutex_init(&MUTEX, 0);
214 TI_CBLAS_DEBUG_PRINT("Pthreads initialized\n");
215 TI_CBLAS_DEBUG_PRINT("Initializing BLIS\n");
216 if(ti_blis_init() == TI_CBLAS_INITFINI_SUCCESS) {
217 TI_CBLAS_DEBUG_PRINT("BLIS initialized\n");\
218 }
219 else {
220 TI_CBLAS_DEBUG_PRINT("BLIS NOT initialized!\n");\
221 }
223 /* Register auto finalization to be called when program exits */
224 atexit(ti_cblas_auto_finalize);
226 TI_CBLAS_PROFILE_REPORT("Initialization took %8.2f us\n", (float) clock_diff);
227 ti_cblas_init_done = 1;
228 TI_CBLAS_DEBUG_PRINT("ti_cblas_init: Finished initialization\n");
230 } // End of critical section
232 return;
233 } //ti_cblas_init
235 /*============================================================================
236 * Function purpose: finalize BLIS on both ARM and DSP
237 *============================================================================*/
238 int ti_blis_finalize(void)
239 {
240 int r_val = TI_CBLAS_INITFINI_SUCCESS;
242 /* Finalize BLIS on ARM */
243 TI_CBLAS_DEBUG_PRINT("Finalizing BLIS on ARM...\n");
244 bli_finalize();
245 TI_CBLAS_DEBUG_PRINT("BLIS finalized on ARM.\n");
247 /* Finalize BLIS on DSP */
248 Event e;
249 Kernel* __K;
251 __K = ti_cblas_get_kernel("ocl_bli_finalize");
253 TI_CBLAS_DEBUG_PRINT("Finalizing BLIS on DSP...\n");
254 int err_code;
255 Buffer buf_err(*ti_cblas_ocl_context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, sizeof(int), &err_code);
256 __K->setArg(0, buf_err);
258 try
259 {
260 ti_cblas_ocl_Q->enqueueTask(*__K, 0, &e);
261 e.wait();
263 if(err_code != TICBLAS_SUCCESS) {
264 TI_CBLAS_DEBUG_PRINT("Error in offloaded ocl_bli_finalize with error code %d!\n", err_code);
265 r_val = TI_CBLAS_INITFINI_BLI_ERR;
266 }
268 ti_cblas_delete_kernel(__K);
269 }
271 catch (Error err)
272 {
273 ti_cblas_error(err.what(),err.err());
274 r_val = TI_CBLAS_INITFINI_OCL_ERR;
275 }
277 return r_val;
278 } // ti_blis_finalize
280 /*============================================================================
281 * Function purpose: finalize after all CBLAS calls:
282 * - finalize BLIS
283 * - delete OpenCL context
284 *
285 * Note: this function is invoked exactly once on program exit.
286 *============================================================================*/
287 int ti_cblas_finalize(void)
288 {
289 /* If ti_cblas_init_done is equal to 0,
290 * then we know that ti_cblas_init was not called,
291 * and so we can return early.
292 */
293 if(ti_cblas_init_done == 0) {
294 return TI_CBLAS_INITFINI_SUCCESS;
295 }
297 int r_val = ti_blis_finalize();
298 /*Using same name as ti_cblas_init critical region. See notes in bli_init*/
299 #pragma omp critical (ti_cblas_init_critical)
300 {
301 // Destroy Pthread
302 pthread_mutex_destroy(&MUTEX);
303 pthread_cond_destroy (&CV);
305 //destroy Command queue, program, devices and context.
306 if(ti_cblas_ocl_Q != NULL)
307 {
308 delete(ti_cblas_ocl_Q);
309 ti_cblas_ocl_Q = NULL;
310 }
311 if(ti_cblas_ocl_program != NULL)
312 {
313 delete(ti_cblas_ocl_program);
314 ti_cblas_ocl_program = NULL;
315 }
316 if(ti_cblas_ocl_binary != NULL)
317 {
318 delete(ti_cblas_ocl_binary);
319 ti_cblas_ocl_binary = NULL;
320 }
321 if(ti_cblas_ocl_devices != NULL)
322 {
323 delete(ti_cblas_ocl_devices);
324 ti_cblas_ocl_devices = NULL;
325 }
326 if(ti_cblas_ocl_context != NULL)
327 {
328 delete(ti_cblas_ocl_context);
329 ti_cblas_ocl_context = NULL;
330 }
331 }
333 return r_val;
334 } // ti_cblas_finalize
337 /*============================================================================
338 * Function purpose: auto-finalize on program exit.
339 *============================================================================*/
340 void ti_cblas_auto_finalize(void)
341 {
342 int r_val;
344 r_val = ti_cblas_finalize();
345 if (r_val != TI_CBLAS_INITFINI_SUCCESS)
346 {
347 fprintf(stderr, "Error: ti_cblas_finalize failed with error code %d!\n", r_val);
348 }
349 } //ti_cblas_auto_finalize
352 /*============================================================================
353 * Function purpose: free previously allocated MSMC memory
354 *============================================================================*/
355 void ti_cblas_mem_free(void *ptr)
356 {
357 pthread_mutex_lock(&MUTEX);
358 __free_msmc(ptr);
359 pthread_cond_broadcast(&CV);
360 pthread_mutex_unlock(&MUTEX);
362 }
364 /*============================================================================
365 * Function purpose: allocate MSMC memory
366 *============================================================================*/
367 void *ti_cblas_mem_alloc(size_t size)
368 {
369 void *ptr;
370 pthread_mutex_lock(&MUTEX);
372 /*-------------------------------------------------------------------------
373 * Loop in case of false signal after broadcast.
374 *------------------------------------------------------------------------*/
375 while ((ptr = __malloc_msmc(size)) == 0)
376 {
377 pthread_cond_wait(&CV, &MUTEX);
378 }
380 pthread_mutex_unlock(&MUTEX);
382 return ptr;
383 } //ti_cblas_mem_alloc
386 /*============================================================================
387 * Function purpose: create an OpenCL kernel
388 *============================================================================*/
389 Kernel *ti_cblas_get_kernel(const char *fname)
390 {
391 Kernel* __K;
393 __K = new Kernel(*ti_cblas_ocl_program, fname);
395 return __K;
396 }
398 /*============================================================================
399 * Function purpose: delete an OpenCL kernel
400 *============================================================================*/
401 int ti_cblas_delete_kernel(Kernel* K)
402 {
403 if(K != NULL)
404 {
405 delete(K);
406 K=NULL;
407 }
409 return 0;
410 }
412 /* Nothing after this line */