1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
36 #include "test_libblis.h"
39 // Static variables.
40 static char* op_str = "herk";
41 static char* o_types = "mm"; // a c
42 static char* p_types = "uh"; // uploc transa
43 static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s
44 { 1e-04, 1e-05 }, // warn, pass for c
45 { 1e-13, 1e-14 }, // warn, pass for d
46 { 1e-13, 1e-14 } }; // warn, pass for z
48 // Local prototypes.
49 void libblis_test_herk_deps( test_params_t* params,
50 test_op_t* op );
52 void libblis_test_herk_experiment( test_params_t* params,
53 test_op_t* op,
54 iface_t iface,
55 num_t datatype,
56 char* pc_str,
57 char* sc_str,
58 unsigned int p_cur,
59 perf_t* perf,
60 double* resid );
62 void libblis_test_herk_impl( iface_t iface,
63 obj_t* alpha,
64 obj_t* a,
65 obj_t* beta,
66 obj_t* c );
68 void libblis_test_herk_check( obj_t* alpha,
69 obj_t* a,
70 obj_t* beta,
71 obj_t* c,
72 obj_t* c_orig,
73 double* resid );
77 void libblis_test_herk_deps( test_params_t* params, test_op_t* op )
78 {
79 libblis_test_randv( params, &(op->ops->randv) );
80 libblis_test_randm( params, &(op->ops->randm) );
81 libblis_test_setv( params, &(op->ops->setv) );
82 libblis_test_normfv( params, &(op->ops->normfv) );
83 libblis_test_subv( params, &(op->ops->subv) );
84 libblis_test_scalv( params, &(op->ops->scalv) );
85 libblis_test_copym( params, &(op->ops->copym) );
86 libblis_test_scalm( params, &(op->ops->scalm) );
87 libblis_test_gemv( params, &(op->ops->gemv) );
88 libblis_test_hemv( params, &(op->ops->hemv) );
89 }
93 void libblis_test_herk( test_params_t* params, test_op_t* op )
94 {
96 // Return early if this test has already been done.
97 if ( op->test_done == TRUE ) return;
99 // Return early if operation is disabled.
100 if ( op->op_switch == DISABLE_ALL ||
101 op->ops->l3_over == DISABLE_ALL ) return;
103 // Call dependencies first.
104 if ( TRUE ) libblis_test_herk_deps( params, op );
106 // Execute the test driver for each implementation requested.
107 if ( op->front_seq == ENABLE )
108 {
109 libblis_test_op_driver( params,
110 op,
111 BLIS_TEST_SEQ_FRONT_END,
112 op_str,
113 p_types,
114 o_types,
115 thresh,
116 libblis_test_herk_experiment );
117 }
118 }
122 void libblis_test_herk_experiment( test_params_t* params,
123 test_op_t* op,
124 iface_t iface,
125 num_t datatype,
126 char* pc_str,
127 char* sc_str,
128 unsigned int p_cur,
129 perf_t* perf,
130 double* resid )
131 {
132 unsigned int n_repeats = params->n_repeats;
133 unsigned int i,j;
135 double time_min = 1e9;
136 double time;
138 dim_t m, k;
140 uplo_t uploc;
141 trans_t transa;
143 obj_t kappa;
144 obj_t alpha, a, beta;
145 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
146 dim_t test_way = bli_read_nway_from_env( "BLIS_LB_NT" );
147 obj_t c[test_way];
148 obj_t c_save[test_way];
149 double resid_local[test_way];
150 #else
151 obj_t c, c_save;
152 #endif
155 // Map the dimension specifier to actual dimensions.
156 m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
157 k = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );
159 // Map parameter characters to BLIS constants.
160 bli_param_map_char_to_blis_uplo( pc_str[0], &uploc );
161 bli_param_map_char_to_blis_trans( pc_str[1], &transa );
163 // Create test scalars.
164 bli_obj_scalar_init_detached( datatype, &kappa );
165 bli_obj_scalar_init_detached( datatype, &alpha );
166 bli_obj_scalar_init_detached( datatype, &beta );
168 // Create test operands (vectors and/or matrices).
169 libblis_test_mobj_create( params, datatype, transa,
170 sc_str[0], m, k, &a );
171 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
172 for(i = 0; i < test_way; i++)
173 {
174 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
175 sc_str[1], m, m, &c[i] );
176 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
177 sc_str[1], m, m, &c_save[i] );
178 }
179 #else
180 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
181 sc_str[1], m, m, &c );
182 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
183 sc_str[1], m, m, &c_save );
184 #endif
187 // Set alpha and beta.
188 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
189 if ( bli_obj_is_real( c[0] ) )
190 #else
191 if ( bli_obj_is_real( c ) )
192 #endif
193 {
194 bli_setsc( 1.2, 0.0, &alpha );
195 bli_setsc( -1.0, 0.0, &beta );
196 }
197 else
198 {
199 // For herk, alpha and beta must both be real-valued, even in the
200 // complex case (in order to preserve the Hermitian structure of C).
201 bli_setsc( 1.2, 0.0, &alpha );
202 bli_setsc( -1.0, 0.0, &beta );
203 }
205 // Randomize A.
206 bli_randm( &a );
208 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
209 for(i = 0; i < test_way; i++)
210 {
211 // Set the structure and uplo properties of C.
212 bli_obj_set_struc( BLIS_HERMITIAN, c[i] );
213 bli_obj_set_uplo( uploc, c[i] );
215 // Randomize A, make it densely Hermitian, and zero the unstored triangle
216 // to ensure the implementation is reads only from the stored region.
217 bli_randm( &c[i] );
218 bli_mkherm( &c[i] );
219 bli_mktrim( &c[i] );
221 // Save C and set its structure and uplo properties.
222 bli_obj_set_struc( BLIS_HERMITIAN, c_save[i] );
223 bli_obj_set_uplo( uploc, c_save[i] );
224 bli_copym( &c[i], &c_save[i] );
225 }
226 #else
227 // Set the structure and uplo properties of C.
228 bli_obj_set_struc( BLIS_HERMITIAN, c );
229 bli_obj_set_uplo( uploc, c );
231 // Randomize A, make it densely Hermitian, and zero the unstored triangle
232 // to ensure the implementation is reads only from the stored region.
233 bli_randm( &c );
234 bli_mkherm( &c );
235 bli_mktrim( &c );
237 // Save C and set its structure and uplo properties.
238 bli_obj_set_struc( BLIS_HERMITIAN, c_save );
239 bli_obj_set_uplo( uploc, c_save );
240 bli_copym( &c, &c_save );
242 #endif
244 // Normalize by k.
245 bli_setsc( 1.0/( double )k, 0.0, &kappa );
246 bli_scalm( &kappa, &a );
248 // Apply the remaining parameters.
249 bli_obj_set_conjtrans( transa, a );
251 // Repeat the experiment n_repeats times and record results.
252 for ( i = 0; i < n_repeats; ++i )
253 {
255 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
256 // Need only one call to initialize the CBLAS OpenCL kernel
257 bli_copym( &c_save[0], &c[0] );
259 libblis_test_herk_impl( iface, &alpha, &a, &beta, &c[0] );
261 //but need to re-initialize C for each of iteration of n_repeats
262 for(j = 0; j < test_way; j++)
263 {
264 bli_copym( &c_save[j], &c[j] );
265 }
266 #else
267 bli_copym( &c_save, &c );
269 libblis_test_herk_impl( iface, &alpha, &a, &beta, &c );
271 bli_copym( &c_save, &c );
272 #endif
274 time = bli_clock();
276 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
277 #pragma omp parallel num_threads(test_way)
278 {
279 #pragma omp for
280 for(j = 0; j < test_way; j++)
281 {
282 libblis_test_herk_impl( iface, &alpha, &a, &beta, &c[j] );
283 }
284 }
285 #else
286 libblis_test_herk_impl( iface, &alpha, &a, &beta, &c );
287 #endif
288 time_min = bli_clock_min_diff( time_min, time );
289 }
291 // Estimate the performance of the best experiment repeat.
292 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
293 perf->gflops = ( 1.0 * m * m * k ) * test_way/ time_min / FLOPS_PER_UNIT_PERF;
294 if ( bli_obj_is_complex( c[0] ) ) perf->gflops *= 4.0;
295 #else
296 perf->gflops = ( 1.0 * m * m * k ) / time_min / FLOPS_PER_UNIT_PERF;
297 if ( bli_obj_is_complex( c ) ) perf->gflops *= 4.0;
298 #endif
300 perf->time = time_min;
302 // Perform checks.
303 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
304 // Check output of each thread, and send max residue to main
305 for(i = 0; i < test_way; i++)
306 {
307 libblis_test_herk_check( &alpha, &a, &beta, &c[i], &c_save[i], &resid_local[i] );
308 libblis_test_check_empty_problem( &c[i], perf, &resid_local[i] );
310 if(i == 0)
311 {
312 *resid = resid_local[i];
313 }
314 else if (resid_local[i] > *resid)
315 {
316 *resid = resid_local[i];
317 }
318 }
320 #else
321 libblis_test_herk_check( &alpha, &a, &beta, &c, &c_save, resid );
323 // Zero out performance and residual if output matrix is empty.
324 libblis_test_check_empty_problem( &c, perf, resid );
325 #endif
328 // Free the test objects.
329 bli_obj_free( &a );
330 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
331 for(i = 0; i < test_way; i++)
332 {
333 bli_obj_free( &c[i] );
334 bli_obj_free( &c_save[i] );
335 }
336 #else
337 bli_obj_free( &c );
338 bli_obj_free( &c_save );
339 #endif
340 }
344 void libblis_test_herk_impl( iface_t iface,
345 obj_t* alpha,
346 obj_t* a,
347 obj_t* beta,
348 obj_t* c )
349 {
350 switch ( iface )
351 {
352 case BLIS_TEST_SEQ_FRONT_END:
353 ;
354 #ifdef CBLAS
355 enum CBLAS_ORDER cblas_order;
356 enum CBLAS_UPLO cblas_uplo;
357 enum CBLAS_TRANSPOSE cblas_trans;
358 int n, k;
359 int lda, ldc;
361 void *cblas_a, *cblas_c;
363 cblas_a = bli_obj_buffer( *a );
364 cblas_c = bli_obj_buffer( *c );
366 n = bli_obj_length( *c );
367 k = bli_obj_width_after_trans( *a );
370 if(bli_obj_is_upper( *c ))
371 {
372 cblas_uplo = CblasUpper;
373 }
374 else if (bli_obj_is_lower( *c ))
375 {
376 cblas_uplo = CblasLower;
377 }
378 else
379 {
380 bli_check_error_code( BLIS_INVALID_UPLO );
381 return;
382 }
384 if(bli_obj_has_notrans( *a ) && bli_obj_has_noconj( *a ))
385 {
386 cblas_trans = CblasNoTrans;
387 }
388 else if(bli_obj_has_trans( *a ) && bli_obj_has_conj( *a ))
389 {
390 cblas_trans = CblasConjTrans;
391 }
392 else
393 {
394 bli_check_error_code( BLIS_INVALID_TRANS );
395 return;
396 }
398 if(bli_obj_is_col_stored( *a ))
399 {
400 cblas_order = CblasColMajor;
401 lda = bli_obj_col_stride(*a);
402 ldc = bli_obj_col_stride(*c);
403 }
404 else if(bli_obj_is_row_stored( *a ))
405 {
406 cblas_order = CblasRowMajor;
407 lda = bli_obj_row_stride(*a);
408 ldc = bli_obj_row_stride(*c);
409 }
410 else
411 {
412 bli_check_error_code( BLIS_INVALID_DIM_STRIDE_COMBINATION );
413 return;
414 }
416 if (bli_obj_is_scomplex( *a ))
417 {
418 float *cblas_alpha, *cblas_beta;
419 cblas_alpha = bli_obj_buffer( *alpha );
420 cblas_beta = bli_obj_buffer( *beta );
422 cblas_cherk(cblas_order, cblas_uplo,
423 cblas_trans, n, k,
424 *cblas_alpha, cblas_a, lda,
425 *cblas_beta, cblas_c, ldc);
426 }
427 else if (bli_obj_is_dcomplex( *a ))
428 {
429 double *cblas_alpha, *cblas_beta;
430 cblas_alpha = bli_obj_buffer( *alpha );
431 cblas_beta = bli_obj_buffer( *beta );
432 cblas_zherk(cblas_order, cblas_uplo,
433 cblas_trans, n, k,
434 *cblas_alpha, cblas_a, lda,
435 *cblas_beta, cblas_c, ldc);
436 }
437 else
438 {
439 printf("CBLAS not supported\n");
440 }
442 #else
443 bli_herk( alpha, a, beta, c );
444 //bli_herk4m( alpha, a, beta, c );
445 //bli_herk3m( alpha, a, beta, c );
446 #endif
447 break;
449 default:
450 libblis_test_printf_error( "Invalid interface type.\n" );
451 }
452 }
456 void libblis_test_herk_check( obj_t* alpha,
457 obj_t* a,
458 obj_t* beta,
459 obj_t* c,
460 obj_t* c_orig,
461 double* resid )
462 {
463 num_t dt = bli_obj_datatype( *c );
464 num_t dt_real = bli_obj_datatype_proj_to_real( *c );
466 dim_t m = bli_obj_length( *c );
467 dim_t k = bli_obj_width_after_trans( *a );
469 obj_t ah;
470 obj_t kappa, norm;
471 obj_t t, v, w, z;
473 double junk;
475 //
476 // Pre-conditions:
477 // - a is randomized.
478 // - c_orig is randomized and Hermitian.
479 // Note:
480 // - alpha and beta must be real-valued.
481 //
482 // Under these conditions, we assume that the implementation for
483 //
484 // C := beta * C_orig + alpha * transa(A) * transa(A)^H
485 //
486 // is functioning correctly if
487 //
488 // normf( v - z )
489 //
490 // is negligible, where
491 //
492 // v = C * t
493 // z = ( beta * C_orig + alpha * transa(A) * transa(A)^H ) * t
494 // = beta * C_orig * t + alpha * transa(A) * transa(A)^H * t
495 // = beta * C_orig * t + alpha * transa(A) * w
496 // = beta * C_orig * t + z
497 //
499 bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *a, ah );
501 bli_obj_scalar_init_detached( dt, &kappa );
502 bli_obj_scalar_init_detached( dt_real, &norm );
504 bli_obj_create( dt, m, 1, 0, 0, &t );
505 bli_obj_create( dt, m, 1, 0, 0, &v );
506 bli_obj_create( dt, k, 1, 0, 0, &w );
507 bli_obj_create( dt, m, 1, 0, 0, &z );
509 bli_randv( &t );
510 bli_setsc( 1.0/( double )m, 0.0, &kappa );
511 bli_scalv( &kappa, &t );
513 bli_hemv( &BLIS_ONE, c, &t, &BLIS_ZERO, &v );
515 bli_gemv( &BLIS_ONE, &ah, &t, &BLIS_ZERO, &w );
516 bli_gemv( alpha, a, &w, &BLIS_ZERO, &z );
517 bli_hemv( beta, c_orig, &t, &BLIS_ONE, &z );
519 bli_subv( &z, &v );
520 bli_normfv( &v, &norm );
521 bli_getsc( &norm, resid, &junk );
523 bli_obj_free( &t );
524 bli_obj_free( &v );
525 bli_obj_free( &w );
526 bli_obj_free( &z );
527 }