[dense-linear-algebra-libraries/linalg.git] / src / ti / linalg / blis / testsuite / src / test_syr2.c
1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
36 #include "test_libblis.h"
39 // Static variables.
40 static char* op_str = "syr2";
41 static char* o_types = "vvm"; // x y a
42 static char* p_types = "ucc"; // uploa conjx conjy
43 static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s
44 { 1e-04, 1e-05 }, // warn, pass for c
45 { 1e-13, 1e-14 }, // warn, pass for d
46 { 1e-13, 1e-14 } }; // warn, pass for z
48 // Local prototypes.
49 void libblis_test_syr2_deps( test_params_t* params,
50 test_op_t* op );
52 void libblis_test_syr2_experiment( test_params_t* params,
53 test_op_t* op,
54 iface_t iface,
55 num_t datatype,
56 char* pc_str,
57 char* sc_str,
58 unsigned int p_cur,
59 perf_t* perf,
60 double* resid );
62 void libblis_test_syr2_impl( iface_t iface,
63 obj_t* alpha,
64 obj_t* x,
65 obj_t* y,
66 obj_t* a );
68 void libblis_test_syr2_check( obj_t* alpha,
69 obj_t* x,
70 obj_t* y,
71 obj_t* a,
72 obj_t* a_orig,
73 double* resid );
77 void libblis_test_syr2_deps( test_params_t* params, test_op_t* op )
78 {
79 libblis_test_randv( params, &(op->ops->randv) );
80 libblis_test_randm( params, &(op->ops->randm) );
81 libblis_test_normfv( params, &(op->ops->normfv) );
82 libblis_test_subv( params, &(op->ops->subv) );
83 libblis_test_copym( params, &(op->ops->copym) );
84 libblis_test_scal2v( params, &(op->ops->scal2v) );
85 libblis_test_dotv( params, &(op->ops->dotv) );
86 libblis_test_gemv( params, &(op->ops->gemv) );
87 }
91 void libblis_test_syr2( test_params_t* params, test_op_t* op )
92 {
94 // Return early if this test has already been done.
95 if ( op->test_done == TRUE ) return;
97 // Return early if operation is disabled.
98 if ( op->op_switch == DISABLE_ALL ||
99 op->ops->l2_over == DISABLE_ALL ) return;
101 // Call dependencies first.
102 if ( TRUE ) libblis_test_syr2_deps( params, op );
104 // Execute the test driver for each implementation requested.
105 if ( op->front_seq == ENABLE )
106 {
107 libblis_test_op_driver( params,
108 op,
109 BLIS_TEST_SEQ_FRONT_END,
110 op_str,
111 p_types,
112 o_types,
113 thresh,
114 libblis_test_syr2_experiment );
115 }
116 }
120 void libblis_test_syr2_experiment( test_params_t* params,
121 test_op_t* op,
122 iface_t iface,
123 num_t datatype,
124 char* pc_str,
125 char* sc_str,
126 unsigned int p_cur,
127 perf_t* perf,
128 double* resid )
129 {
130 unsigned int n_repeats = params->n_repeats;
131 unsigned int i;
133 double time_min = 1e9;
134 double time;
136 dim_t m;
138 uplo_t uploa;
139 conj_t conjx, conjy;
141 obj_t alpha, x, y;
142 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
143 dim_t test_way = bli_read_nway_from_env( "BLIS_LB_NT" );
144 obj_t a[test_way];
145 obj_t a_save[test_way];
146 double resid_local[test_way];
147 unsigned int j;
148 #else
149 obj_t a, a_save;
150 #endif
153 // Map the dimension specifier to an actual dimension.
154 m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
156 // Map parameter characters to BLIS constants.
157 bli_param_map_char_to_blis_uplo( pc_str[0], &uploa );
158 bli_param_map_char_to_blis_conj( pc_str[1], &conjx );
159 bli_param_map_char_to_blis_conj( pc_str[2], &conjy );
161 // Create test scalars.
162 bli_obj_scalar_init_detached( datatype, &alpha );
164 // Create test operands (vectors and/or matrices).
165 libblis_test_vobj_create( params, datatype,
166 sc_str[0], m, &x );
167 libblis_test_vobj_create( params, datatype,
168 sc_str[1], m, &y );
170 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
171 for(i = 0; i < test_way; i++)
172 {
173 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
174 sc_str[2], m, m, &a[i] );
175 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
176 sc_str[2], m, m, &a_save[i] );
177 }
178 #else
179 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
180 sc_str[2], m, m, &a );
181 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
182 sc_str[2], m, m, &a_save );
183 #endif
185 // Set alpha.
186 //bli_copysc( &BLIS_MINUS_ONE, &alpha );
187 bli_setsc( -1.0, 1.0, &alpha );
189 // Randomize x and y.
190 bli_randv( &x );
191 bli_randv( &y );
193 // Set the structure and uplo properties of A.
194 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
195 for(i = 0; i < test_way; i++)
196 {
197 bli_obj_set_struc( BLIS_SYMMETRIC, a[i] );
198 bli_obj_set_uplo( uploa, a[i] );
200 // Randomize A, make it densely symmetric, and zero the unstored triangle
201 // to ensure the implementation is reads only from the stored region.
202 bli_randm( &a[i] );
203 bli_mksymm( &a[i] );
204 bli_mktrim( &a[i] );
206 bli_obj_set_struc( BLIS_SYMMETRIC, a_save[i] );
207 bli_obj_set_uplo( uploa, a_save[i] );
208 bli_copym( &a[i], &a_save[i] );
210 }
211 #else
212 bli_obj_set_struc( BLIS_SYMMETRIC, a );
213 bli_obj_set_uplo( uploa, a );
215 // Randomize A, make it densely symmetric, and zero the unstored triangle
216 // to ensure the implementation is reads only from the stored region.
217 bli_randm( &a );
218 bli_mksymm( &a );
219 bli_mktrim( &a );
221 bli_obj_set_struc( BLIS_SYMMETRIC, a_save );
222 bli_obj_set_uplo( uploa, a_save );
223 bli_copym( &a, &a_save );
225 #endif
227 // Apply the remaining parameters.
228 bli_obj_set_conj( conjx, x );
229 bli_obj_set_conj( conjy, y );
231 // Repeat the experiment n_repeats times and record results.
232 for ( i = 0; i < n_repeats; ++i )
233 {
234 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
235 // Need only one call to initialize the CBLAS OpenCL kernel
236 bli_copym( &a_save[0], &a[0] );
237 libblis_test_syr2_impl( iface, &alpha, &x, &y, &a[0] );
239 //but need to re-initialize C for each of iteration of n_repeats
240 for(j = 0; j < test_way; j++)
241 {
242 bli_copym( &a_save[j], &a[j] );
243 }
244 #else
245 bli_copym( &a_save, &a );
246 libblis_test_syr2_impl( iface, &alpha, &x, &y, &a );
247 bli_copym( &a_save, &a );
248 #endif
250 time = bli_clock();
252 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
253 #pragma omp parallel num_threads(test_way)
254 {
255 #pragma omp for
256 for(j = 0; j < test_way; j++)
257 {
258 libblis_test_syr2_impl( iface, &alpha, &x, &y, &a[j] );
259 }
260 }
261 #else
262 libblis_test_syr2_impl( iface, &alpha, &x, &y, &a );
263 #endif
265 time_min = bli_clock_min_diff( time_min, time );
266 }
267 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
268 // Estimate the performance of the best experiment repeat.
269 perf->gflops = ( 2.0 * m * m ) * test_way / time_min / FLOPS_PER_UNIT_PERF;
270 if ( bli_obj_is_complex( a[0] ) ) perf->gflops *= 4.0;
271 #else
272 // Estimate the performance of the best experiment repeat.
273 perf->gflops = ( 2.0 * m * m ) / time_min / FLOPS_PER_UNIT_PERF;
274 if ( bli_obj_is_complex( a ) ) perf->gflops *= 4.0;
275 #endif
276 perf->time = time_min;
278 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
279 // Check output of each thread, and send max residue to main
280 for(i = 0; i < test_way; i++)
281 {
282 // Perform checks.
283 libblis_test_syr2_check( &alpha, &x, &y, &a[i], &a_save[i], &resid_local[i] );
285 // Zero out performance and residual if output matrix is empty.
286 libblis_test_check_empty_problem( &a[i], perf, &resid_local[i] );
288 if(i == 0)
289 {
290 *resid = resid_local[i];
291 }
292 else if (resid_local[i] > *resid)
293 {
294 *resid = resid_local[i];
295 }
296 }
298 #else
299 // Perform checks.
300 libblis_test_syr2_check( &alpha, &x, &y, &a, &a_save, resid );
302 // Zero out performance and residual if output matrix is empty.
303 libblis_test_check_empty_problem( &a, perf, resid );
304 #endif
307 // Free the test objects.
308 bli_obj_free( &x );
309 bli_obj_free( &y );
310 #ifdef BLIS_ENABLE_MULTITHREAD_TEST
311 for(i = 0; i < test_way; i++)
312 {
313 bli_obj_free( &a[i] );
314 bli_obj_free( &a_save[i] );
315 }
316 #else
317 bli_obj_free( &a );
318 bli_obj_free( &a_save );
319 #endif
320 }
324 void libblis_test_syr2_impl( iface_t iface,
325 obj_t* alpha,
326 obj_t* x,
327 obj_t* y,
328 obj_t* a )
329 {
330 switch ( iface )
331 {
332 case BLIS_TEST_SEQ_FRONT_END:
333 ;
334 #ifdef CBLAS
335 enum CBLAS_ORDER cblas_order;
336 enum CBLAS_UPLO cblas_uplo;
337 int n;
338 int lda, incx, incy;
340 //m = bli_obj_length( *a );
341 n = bli_obj_width( *a );
343 if(bli_obj_is_col_stored( *a ))
344 {
345 cblas_order = CblasColMajor;
346 lda = bli_obj_col_stride(*a);
347 }
348 else if(bli_obj_is_row_stored( *a ))
349 {
350 cblas_order = CblasRowMajor;
351 lda = bli_obj_row_stride(*a);
352 }
353 else
354 {
355 bli_check_error_code( BLIS_INVALID_DIM_STRIDE_COMBINATION );
356 return;
357 }
359 incx = bli_obj_vector_inc(*x);
360 incy = bli_obj_vector_inc(*y);
362 if(bli_obj_is_upper( *a ))
363 {
364 cblas_uplo = CblasUpper;
365 }
366 else if (bli_obj_is_lower( *a ))
367 {
368 cblas_uplo = CblasLower;
369 }
370 else
371 {
372 bli_check_error_code( BLIS_INVALID_UPLO );
373 return;
374 }
376 if (bli_obj_is_float( *a ))
377 {
378 float *cblas_alpha;
379 float *cblas_a, *cblas_x, *cblas_y;
381 cblas_alpha = (float *) bli_obj_buffer( *alpha );
382 cblas_a = (float *) bli_obj_buffer( *a );
383 cblas_x = (float *) bli_obj_buffer( *x );
384 cblas_y = (float *) bli_obj_buffer( *y );
386 cblas_ssyr2(cblas_order, cblas_uplo, n, *cblas_alpha, cblas_x, incx, cblas_y, incy, cblas_a, lda);
387 }
388 else if (bli_obj_is_double( *a ))
390 {
391 double *cblas_alpha;
392 double *cblas_a, *cblas_x, *cblas_y;
394 cblas_alpha = (double *) bli_obj_buffer( *alpha );
395 cblas_a = (double *) bli_obj_buffer( *a );
396 cblas_x = (double *) bli_obj_buffer( *x );
397 cblas_y = (double *) bli_obj_buffer( *y );
399 cblas_dsyr2(cblas_order, cblas_uplo, n, *cblas_alpha, cblas_x, incx, cblas_y, incy, cblas_a, lda);
400 }
401 else
402 {
403 printf("CBLAS not supported\n");
404 }
406 #else
407 bli_syr2( alpha, x, y, a );
408 #endif
409 break;
411 default:
412 libblis_test_printf_error( "Invalid interface type.\n" );
413 }
414 }
418 void libblis_test_syr2_check( obj_t* alpha,
419 obj_t* x,
420 obj_t* y,
421 obj_t* a,
422 obj_t* a_orig,
423 double* resid )
424 {
425 num_t dt = bli_obj_datatype( *a );
426 num_t dt_real = bli_obj_datatype_proj_to_real( *a );
428 dim_t m_a = bli_obj_length( *a );
430 obj_t xt, yt;
431 obj_t t, v, w1, w2;
432 obj_t tau, rho, norm;
434 double junk;
436 //
437 // Pre-conditions:
438 // - x is randomized.
439 // - y is randomized.
440 // - a is randomized and symmetric.
441 // Note:
442 // - alpha should have a non-zero imaginary component in the
443 // complex cases in order to more fully exercise the implementation.
444 //
445 // Under these conditions, we assume that the implementation for
446 //
447 // A := A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T
448 //
449 // is functioning correctly if
450 //
451 // normf( v - w )
452 //
453 // is negligible, where
454 //
455 // v = A * t
456 // w = ( A_orig + alpha * conjx(x) * conjy(y)^T + alpha * conjy(y) * conjx(x)^T ) * t
457 // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * conjx(x)^T * t
458 // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + alpha * conjy(y) * rho
459 // = A_orig * t + alpha * conjx(x) * conjy(y)^T * t + w1
460 // = A_orig * t + alpha * conjx(x) * rho + w1
461 // = A_orig * t + w2 + w1
462 //
464 bli_mksymm( a );
465 bli_mksymm( a_orig );
466 bli_obj_set_struc( BLIS_GENERAL, *a );
467 bli_obj_set_struc( BLIS_GENERAL, *a_orig );
468 bli_obj_set_uplo( BLIS_DENSE, *a );
469 bli_obj_set_uplo( BLIS_DENSE, *a_orig );
471 bli_obj_scalar_init_detached( dt, &tau );
472 bli_obj_scalar_init_detached( dt, &rho );
473 bli_obj_scalar_init_detached( dt_real, &norm );
475 bli_obj_create( dt, m_a, 1, 0, 0, &t );
476 bli_obj_create( dt, m_a, 1, 0, 0, &v );
477 bli_obj_create( dt, m_a, 1, 0, 0, &w1 );
478 bli_obj_create( dt, m_a, 1, 0, 0, &w2 );
480 bli_obj_alias_to( *x, xt );
481 bli_obj_alias_to( *y, yt );
483 bli_setsc( 1.0/( double )m_a, -1.0/( double )m_a, &tau );
484 bli_setv( &tau, &t );
486 bli_gemv( &BLIS_ONE, a, &t, &BLIS_ZERO, &v );
488 bli_dotv( &xt, &t, &rho );
489 bli_mulsc( alpha, &rho );
490 bli_scal2v( &rho, y, &w1 );
492 bli_dotv( &yt, &t, &rho );
493 bli_mulsc( alpha, &rho );
494 bli_scal2v( &rho, x, &w2 );
496 bli_addv( &w2, &w1 );
498 bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w1 );
500 bli_subv( &w1, &v );
501 bli_normfv( &v, &norm );
502 bli_getsc( &norm, resid, &junk );
504 bli_obj_free( &t );
505 bli_obj_free( &v );
506 bli_obj_free( &w1 );
507 bli_obj_free( &w2 );
508 }