d93853331d7162ae40eb35f906171c1aa0cc9fba
1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
36 #include "test_libblis.h"
39 // Static variables.
40 static char* op_str = "ger";
41 static char* o_types = "vvm"; // x y a
42 static char* p_types = "cc"; // transa conjx
43 static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s
44 { 1e-04, 1e-05 }, // warn, pass for c
45 { 1e-13, 1e-14 }, // warn, pass for d
46 { 1e-13, 1e-14 } }; // warn, pass for z
48 // Local prototypes.
49 void libblis_test_ger_deps( test_params_t* params,
50 test_op_t* op );
52 void libblis_test_ger_experiment( test_params_t* params,
53 test_op_t* op,
54 iface_t iface,
55 num_t datatype,
56 char* pc_str,
57 char* sc_str,
58 unsigned int p_cur,
59 double* perf,
60 double* resid );
62 void libblis_test_ger_impl( iface_t iface,
63 obj_t* alpha,
64 obj_t* x,
65 obj_t* y,
66 obj_t* a );
68 void libblis_test_ger_check( obj_t* alpha,
69 obj_t* x,
70 obj_t* y,
71 obj_t* a,
72 obj_t* a_orig,
73 double* resid );
77 void libblis_test_ger_deps( test_params_t* params, test_op_t* op )
78 {
79 libblis_test_randv( params, &(op->ops->randv) );
80 libblis_test_normfv( params, &(op->ops->normfv) );
81 libblis_test_subv( params, &(op->ops->subv) );
82 libblis_test_scal2v( params, &(op->ops->scal2v) );
83 libblis_test_dotv( params, &(op->ops->dotv) );
84 libblis_test_gemv( params, &(op->ops->gemv) );
85 }
89 void libblis_test_ger( test_params_t* params, test_op_t* op )
90 {
92 // Return early if this test has already been done.
93 if ( op->test_done == TRUE ) return;
95 // Return early if operation is disabled.
96 if ( op->op_switch == DISABLE_ALL ||
97 op->ops->l2_over == DISABLE_ALL ) return;
99 // Call dependencies first.
100 if ( TRUE ) libblis_test_ger_deps( params, op );
102 // Execute the test driver for each implementation requested.
103 if ( op->front_seq == ENABLE )
104 {
105 libblis_test_op_driver( params,
106 op,
107 BLIS_TEST_SEQ_FRONT_END,
108 op_str,
109 p_types,
110 o_types,
111 thresh,
112 libblis_test_ger_experiment );
113 }
114 }
118 void libblis_test_ger_experiment( test_params_t* params,
119 test_op_t* op,
120 iface_t iface,
121 num_t datatype,
122 char* pc_str,
123 char* sc_str,
124 unsigned int p_cur,
125 double* perf,
126 double* resid )
127 {
128 unsigned int n_repeats = params->n_repeats;
129 unsigned int i;
131 double time_min = 1e9;
132 double time;
134 dim_t m, n;
136 conj_t conjx, conjy;
138 obj_t alpha, x, y;
139 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
140 dim_t test_way = bli_read_nway_from_env( "BLIS_LB_NT" );
141 obj_t a[test_way];
142 obj_t a_save[test_way];
143 double resid_local[test_way];
144 unsigned int j;
145 #else
146 obj_t a, a_save;
147 #endif
150 // Map the dimension specifier to actual dimensions.
151 m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
152 n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur );
154 // Map parameter characters to BLIS constants.
155 bli_param_map_char_to_blis_conj( pc_str[0], &conjx );
156 bli_param_map_char_to_blis_conj( pc_str[1], &conjy );
158 // Create test scalars.
159 bli_obj_scalar_init_detached( datatype, &alpha );
161 // Create test operands (vectors and/or matrices).
162 libblis_test_vobj_create( params, datatype,
163 sc_str[0], m, &x );
164 libblis_test_vobj_create( params, datatype,
165 sc_str[1], n, &y );
166 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
167 for(i = 0; i < test_way; i++)
168 {
169 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
170 sc_str[2], m, n, &a[i] );
171 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
172 sc_str[2], m, n, &a_save[i] );
173 }
174 #else
175 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
176 sc_str[2], m, n, &a );
177 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
178 sc_str[2], m, n, &a_save );
179 #endif
182 // Set alpha.
183 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
184 if ( bli_obj_is_real( a[0] ) )
185 #else
186 if ( bli_obj_is_real( a ) )
187 #endif
188 {
189 bli_setsc( -1.0, 1.0, &alpha );
190 }
191 else
192 {
193 bli_setsc( -1.0, 1.0, &alpha );
194 }
196 // Randomize x and y.
197 bli_randv( &x );
198 bli_randv( &y );
200 // Initialize A to identity and save.
201 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
202 for(i = 0; i < test_way; i++)
203 {
204 bli_setm( &BLIS_ZERO, &a[i] );
205 bli_setd( &BLIS_ONE, &a[i] );
206 bli_copym( &a[i], &a_save[i] );
207 }
208 #else
209 bli_setm( &BLIS_ZERO, &a );
210 bli_setd( &BLIS_ONE, &a );
211 bli_copym( &a, &a_save );
212 #endif
215 // Apply the parameters.
216 bli_obj_set_conj( conjx, x );
217 bli_obj_set_conj( conjy, y );
219 // Repeat the experiment n_repeats times and record results.
220 for ( i = 0; i < n_repeats; ++i )
221 {
222 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
223 // Need only one call to initialize the CBLAS OpenCL kernel
224 bli_copym( &a_save[0], &a[0] );
226 libblis_test_ger_impl( iface, &alpha, &x, &y, &a[0] );
228 //but need to re-initialize C for each of iteration of n_repeats
229 for(j = 0; j < test_way; j++)
230 {
231 bli_copym( &a_save[i], &a[i] );
232 }
233 #else
234 bli_copym( &a_save, &a );
235 libblis_test_ger_impl( iface, &alpha, &x, &y, &a );
236 bli_copym( &a_save, &a );
237 #endif
239 time = bli_clock();
241 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
242 #pragma omp parallel num_threads(test_way)
243 {
244 #pragma omp for
245 for(j = 0; j < test_way; j++)
246 {
247 libblis_test_ger_impl( iface, &alpha, &x, &y, &a[j] );
248 }
249 }
250 #else
251 libblis_test_ger_impl( iface, &alpha, &x, &y, &a );
252 #endif
253 time_min = bli_clock_min_diff( time_min, time );
254 }
255 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
256 // Estimate the performance of the best experiment repeat.
257 *perf = ( 2.0 * m * n ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
258 if ( bli_obj_is_complex( a[0] ) ) *perf *= 4.0;
259 #else
260 // Estimate the performance of the best experiment repeat.
261 *perf = ( 2.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF;
262 if ( bli_obj_is_complex( a ) ) *perf *= 4.0;
263 #endif
265 // Perform checks.
266 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
267 // Check output of each thread, and send max residue to main
268 for(i = 0; i < test_way; i++)
269 {
270 // Perform checks.
271 libblis_test_ger_check( &alpha, &x, &y, &a[i], &a_save[i], &resid_local[i] );
273 // Zero out performance and residual if output matrix is empty.
274 libblis_test_check_empty_problem( &a[i], perf, &resid_local[i] );
276 if(i == 0)
277 {
278 *resid = resid_local[i];
279 }
280 else if (resid_local[i] > *resid)
281 {
282 *resid = resid_local[i];
283 }
284 }
286 #else
287 // Perform checks.
288 libblis_test_ger_check( &alpha, &x, &y, &a, &a_save, resid );
290 // Zero out performance and residual if output matrix is empty.
291 libblis_test_check_empty_problem( &a, perf, resid );
292 #endif
295 // Free the test objects.
296 bli_obj_free( &x );
297 bli_obj_free( &y );
298 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
299 for(i = 0; i < test_way; i++)
300 {
301 bli_obj_free( &a[i] );
302 bli_obj_free( &a_save[i] );
303 }
304 #else
305 bli_obj_free( &a );
306 bli_obj_free( &a_save );
307 #endif
308 }
312 void libblis_test_ger_impl( iface_t iface,
313 obj_t* alpha,
314 obj_t* x,
315 obj_t* y,
316 obj_t* a )
317 {
318 switch ( iface )
319 {
320 case BLIS_TEST_SEQ_FRONT_END:
321 ;
322 #ifdef CBLAS
323 enum CBLAS_ORDER order;
324 enum CBLAS_TRANSPOSE transY;
325 int m, n;
326 int lda, incx, incy;
328 m = bli_obj_length( *a );
329 n = bli_obj_width( *a );
331 if(bli_obj_is_col_stored( *a ))
332 {
333 order = CblasColMajor;
334 lda = bli_obj_col_stride(*a);
335 }
336 else if(bli_obj_is_row_stored( *a ))
337 {
338 order = CblasRowMajor;
339 lda = bli_obj_row_stride(*a);
340 }
341 else
342 {
343 bli_check_error_code( BLIS_INVALID_DIM_STRIDE_COMBINATION );
344 return;
345 }
347 incx = bli_obj_vector_inc(*x);
348 incy = bli_obj_vector_inc(*y);
350 if(bli_obj_has_notrans( *y ) && bli_obj_has_noconj( *y ))
351 {
352 transY = CblasNoTrans;
353 }
354 else if(bli_obj_has_conj( *y ) ) //&& bli_obj_has_noconj( *y ))
355 {
356 transY = CblasConjTrans;
357 }
358 else
359 {
360 bli_check_error_code( BLIS_INVALID_TRANS );
361 return;
362 }
365 if (bli_obj_is_float( *a ))
366 {
367 float *cblas_alpha;
368 float *cblas_a, *cblas_x, *cblas_y;
370 cblas_alpha = (float *) bli_obj_buffer( *alpha );
371 cblas_a = (float *) bli_obj_buffer( *a );
372 cblas_x = (float *) bli_obj_buffer( *x );
373 cblas_y = (float *) bli_obj_buffer( *y );
375 cblas_sger(order, m, n, *cblas_alpha, cblas_x, incx, cblas_y, incy, cblas_a, lda);
377 }
378 else if (bli_obj_is_double( *a ))
379 {
380 double *cblas_alpha;
381 double *cblas_a, *cblas_x, *cblas_y;
383 cblas_alpha = (double *) bli_obj_buffer( *alpha );
384 cblas_a = (double *) bli_obj_buffer( *a );
385 cblas_x = (double *) bli_obj_buffer( *x );
386 cblas_y = (double *) bli_obj_buffer( *y );
388 cblas_dger(order, m, n, *cblas_alpha, cblas_x, incx, cblas_y, incy, cblas_a, lda);
390 }
391 else if (bli_obj_is_scomplex( *a ))
392 {
393 void *cblas_alpha;
394 void *cblas_a, *cblas_x, *cblas_y;
396 cblas_alpha = bli_obj_buffer( *alpha );
397 cblas_a = bli_obj_buffer( *a );
398 cblas_x = bli_obj_buffer( *x );
399 cblas_y = bli_obj_buffer( *y );
401 if(transY == CblasNoTrans)
402 cblas_cgeru(order, m, n, cblas_alpha, cblas_x, incx, cblas_y, incy, cblas_a, lda);
403 else if (transY==CblasConjTrans)
404 cblas_cgerc(order, m, n, cblas_alpha, cblas_x, incx, cblas_y, incy, cblas_a, lda);
405 }
406 else if (bli_obj_is_dcomplex( *a ))
407 {
408 void *cblas_alpha;
409 void *cblas_a, *cblas_x, *cblas_y;
411 cblas_alpha = bli_obj_buffer( *alpha );
412 cblas_a = bli_obj_buffer( *a );
413 cblas_x = bli_obj_buffer( *x );
414 cblas_y = bli_obj_buffer( *y );
416 if(transY == CblasNoTrans)
417 cblas_zgeru(order, m, n, cblas_alpha, cblas_x, incx, cblas_y, incy, cblas_a, lda);
418 else if (transY==CblasConjTrans)
419 cblas_zgerc(order, m, n, cblas_alpha, cblas_x, incx, cblas_y, incy, cblas_a, lda);
420 }
421 #else
422 bli_ger( alpha, x, y, a );
423 #endif
424 break;
426 default:
427 libblis_test_printf_error( "Invalid interface type.\n" );
428 }
429 }
433 void libblis_test_ger_check( obj_t* alpha,
434 obj_t* x,
435 obj_t* y,
436 obj_t* a,
437 obj_t* a_orig,
438 double* resid )
439 {
440 num_t dt = bli_obj_datatype( *a );
441 num_t dt_real = bli_obj_datatype_proj_to_real( *a );
443 dim_t m_a = bli_obj_length( *a );
444 dim_t n_a = bli_obj_width( *a );
446 obj_t t, v, w;
447 obj_t tau, rho, norm;
449 double junk;
451 //
452 // Pre-conditions:
453 // - x is randomized.
454 // - y is randomized.
455 // - a is identity.
456 // Note:
457 // - alpha should have a non-zero imaginary component in the
458 // complex cases in order to more fully exercise the implementation.
459 //
460 // Under these conditions, we assume that the implementation for
461 //
462 // A := A_orig + alpha * conjx(x) * conjy(y)
463 //
464 // is functioning correctly if
465 //
466 // normf( v - w )
467 //
468 // is negligible, where
469 //
470 // v = A * t
471 // w = ( A_orig + alpha * conjx(x) * conjy(y)^T ) * t
472 // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t
473 // = A_orig * t + alpha * conjx(x) * rho
474 // = A_orig * t + w
475 //
477 bli_obj_scalar_init_detached( dt, &tau );
478 bli_obj_scalar_init_detached( dt, &rho );
479 bli_obj_scalar_init_detached( dt_real, &norm );
481 bli_obj_create( dt, n_a, 1, 0, 0, &t );
482 bli_obj_create( dt, m_a, 1, 0, 0, &v );
483 bli_obj_create( dt, m_a, 1, 0, 0, &w );
485 bli_setsc( 1.0/( double )n_a, -1.0/( double )n_a, &tau );
486 bli_setv( &tau, &t );
488 bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v );
490 bli_dotv( y, &t, &rho );
491 bli_mulsc( alpha, &rho );
492 bli_scal2v( &rho, x, &w );
493 bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w );
495 bli_subv( &w, &v );
496 bli_normfv( &v, &norm );
497 bli_getsc( &norm, resid, &junk );
499 bli_obj_free( &t );
500 bli_obj_free( &v );
501 bli_obj_free( &w );
502 }