b4361470ad26ae1f0c62f65b518eac2a4b215cfc
1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
36 #include "test_libblis.h"
39 // Static variables.
40 static char* op_str = "dotxaxpyf";
41 static char* o_types = "mvvvv"; // A w x y z
42 static char* p_types = "cccc"; // conjat conja conjw conjx
43 static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s
44 { 1e-04, 1e-05 }, // warn, pass for c
45 { 1e-13, 1e-14 }, // warn, pass for d
46 { 1e-13, 1e-14 } }; // warn, pass for z
48 // Local prototypes.
49 void libblis_test_dotxaxpyf_deps( test_params_t* params,
50 test_op_t* op );
52 void libblis_test_dotxaxpyf_experiment( test_params_t* params,
53 test_op_t* op,
54 iface_t iface,
55 num_t datatype,
56 char* pc_str,
57 char* sc_str,
58 unsigned int p_cur,
59 double* perf,
60 double* resid );
62 void libblis_test_dotxaxpyf_impl( iface_t iface,
63 obj_t* alpha,
64 obj_t* at,
65 obj_t* a,
66 obj_t* w,
67 obj_t* x,
68 obj_t* beta,
69 obj_t* y,
70 obj_t* z );
72 void libblis_test_dotxaxpyf_check( obj_t* alpha,
73 obj_t* at,
74 obj_t* a,
75 obj_t* w,
76 obj_t* x,
77 obj_t* beta,
78 obj_t* y,
79 obj_t* z,
80 obj_t* y_orig,
81 obj_t* z_orig,
82 double* resid );
86 void libblis_test_dotxaxpyf_deps( test_params_t* params, test_op_t* op )
87 {
88 libblis_test_randv( params, &(op->ops->randv) );
89 libblis_test_randm( params, &(op->ops->randm) );
90 libblis_test_normfv( params, &(op->ops->normfv) );
91 libblis_test_subv( params, &(op->ops->subv) );
92 libblis_test_copyv( params, &(op->ops->copyv) );
93 libblis_test_axpyv( params, &(op->ops->axpyv) );
94 libblis_test_dotxv( params, &(op->ops->dotxv) );
95 }
99 void libblis_test_dotxaxpyf( test_params_t* params, test_op_t* op )
100 {
102 // Return early if this test has already been done.
103 if ( op->test_done == TRUE ) return;
105 // Return early if operation is disabled.
106 if ( op->op_switch == DISABLE_ALL ||
107 op->ops->l1f_over == DISABLE_ALL ) return;
109 // Call dependencies first.
110 if ( TRUE ) libblis_test_dotxaxpyf_deps( params, op );
112 // Execute the test driver for each implementation requested.
113 if ( op->front_seq == ENABLE )
114 {
115 libblis_test_op_driver( params,
116 op,
117 BLIS_TEST_SEQ_FRONT_END,
118 op_str,
119 p_types,
120 o_types,
121 thresh,
122 libblis_test_dotxaxpyf_experiment );
123 }
124 }
128 void libblis_test_dotxaxpyf_experiment( test_params_t* params,
129 test_op_t* op,
130 iface_t iface,
131 num_t datatype,
132 char* pc_str,
133 char* sc_str,
134 unsigned int p_cur,
135 double* perf,
136 double* resid )
137 {
138 unsigned int n_repeats = params->n_repeats;
139 unsigned int i;
141 double time_min = 1e9;
142 double time;
144 dim_t m, b_n;
146 conj_t conjat, conja, conjw, conjx;
148 obj_t alpha, at, a, w, x, beta, y, z;
149 obj_t y_save, z_save;
152 // Map the dimension specifier to an actual dimension.
153 m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur );
155 // Query the operation's fusing factor for the current datatype.
156 b_n = bli_dotxaxpyf_fusefac( datatype );
158 // Store the fusing factor so that the driver can retrieve the value
159 // later when printing results.
160 op->dim_aux[0] = b_n;
162 // Map parameter characters to BLIS constants.
163 bli_param_map_char_to_blis_conj( pc_str[0], &conjat );
164 bli_param_map_char_to_blis_conj( pc_str[1], &conja );
165 bli_param_map_char_to_blis_conj( pc_str[2], &conjw );
166 bli_param_map_char_to_blis_conj( pc_str[3], &conjx );
168 // Create test scalars.
169 bli_obj_scalar_init_detached( datatype, &alpha );
170 bli_obj_scalar_init_detached( datatype, &beta );
172 // Create test operands (vectors and/or matrices).
173 libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE,
174 sc_str[0], m, b_n, &a );
175 libblis_test_vobj_create( params, datatype, sc_str[1], m, &w );
176 libblis_test_vobj_create( params, datatype, sc_str[2], b_n, &x );
177 libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y );
178 libblis_test_vobj_create( params, datatype, sc_str[3], b_n, &y_save );
179 libblis_test_vobj_create( params, datatype, sc_str[4], m, &z );
180 libblis_test_vobj_create( params, datatype, sc_str[4], m, &z_save );
182 // Set alpha.
183 if ( bli_obj_is_real( y ) )
184 {
185 bli_setsc( 1.2, 0.0, &alpha );
186 bli_setsc( -1.0, 0.0, &beta );
187 }
188 else
189 {
190 bli_setsc( 1.2, 0.1, &alpha );
191 bli_setsc( -1.0, -0.1, &beta );
192 }
194 // Randomize A, w, x, y, and z, and save y and z.
195 bli_randm( &a );
196 bli_randv( &w );
197 bli_randv( &x );
198 bli_randv( &y );
199 bli_randv( &z );
200 bli_copyv( &y, &y_save );
201 bli_copyv( &z, &z_save );
203 // Create an alias to a for at. (Note that it should NOT actually be
204 // marked for transposition since the transposition is part of the dotxf
205 // subproblem.)
206 bli_obj_alias_to( a, at );
208 // Apply the parameters.
209 bli_obj_set_conj( conjat, at );
210 bli_obj_set_conj( conja, a );
211 bli_obj_set_conj( conjw, w );
212 bli_obj_set_conj( conjx, x );
214 // Repeat the experiment n_repeats times and record results.
215 for ( i = 0; i < n_repeats; ++i )
216 {
217 bli_copyv( &y_save, &y );
218 bli_copyv( &z_save, &z );
220 time = bli_clock();
222 libblis_test_dotxaxpyf_impl( iface, &alpha, &at, &a, &w, &x, &beta, &y, &z );
224 time_min = bli_clock_min_diff( time_min, time );
225 }
227 // Estimate the performance of the best experiment repeat.
228 *perf = ( 2.0 * m * b_n + 2.0 * m * b_n ) / time_min / FLOPS_PER_UNIT_PERF;
229 if ( bli_obj_is_complex( y ) ) *perf *= 4.0;
231 // Perform checks.
232 libblis_test_dotxaxpyf_check( &alpha, &at, &a, &w, &x, &beta, &y, &z, &y_save, &z_save, resid );
234 // Zero out performance and residual if either output vector is empty.
235 libblis_test_check_empty_problem( &y, perf, resid );
236 libblis_test_check_empty_problem( &z, perf, resid );
238 // Free the test objects.
239 bli_obj_free( &a );
240 bli_obj_free( &w );
241 bli_obj_free( &x );
242 bli_obj_free( &y );
243 bli_obj_free( &z );
244 bli_obj_free( &y_save );
245 bli_obj_free( &z_save );
246 }
250 void libblis_test_dotxaxpyf_impl( iface_t iface,
251 obj_t* alpha,
252 obj_t* at,
253 obj_t* a,
254 obj_t* w,
255 obj_t* x,
256 obj_t* beta,
257 obj_t* y,
258 obj_t* z )
259 {
260 switch ( iface )
261 {
262 case BLIS_TEST_SEQ_FRONT_END:
263 bli_dotxaxpyf_kernel( alpha, at, a, w, x, beta, y, z );
264 break;
266 default:
267 libblis_test_printf_error( "Invalid interface type.\n" );
268 }
269 }
273 void libblis_test_dotxaxpyf_check( obj_t* alpha,
274 obj_t* at,
275 obj_t* a,
276 obj_t* w,
277 obj_t* x,
278 obj_t* beta,
279 obj_t* y,
280 obj_t* z,
281 obj_t* y_orig,
282 obj_t* z_orig,
283 double* resid )
284 {
285 num_t dt = bli_obj_datatype( *y );
286 num_t dt_real = bli_obj_datatype_proj_to_real( *y );
288 dim_t m = bli_obj_vector_dim( *z );
289 dim_t b_n = bli_obj_vector_dim( *y );
291 dim_t i;
293 obj_t a1, chi1, psi1, v, q;
294 obj_t alpha_chi1;
295 obj_t norm;
297 double resid1, resid2;
298 double junk;
300 //
301 // Pre-conditions:
302 // - a is randomized.
303 // - w is randomized.
304 // - x is randomized.
305 // - y is randomized.
306 // - z is randomized.
307 // - at is an alias to a.
308 // Note:
309 // - alpha and beta should have a non-zero imaginary component in the
310 // complex cases in order to more fully exercise the implementation.
311 //
312 // Under these conditions, we assume that the implementation for
313 //
314 // y := beta * y_orig + alpha * conjat(A^T) * conjw(w)
315 // z := z_orig + alpha * conja(A) * conjx(x)
316 //
317 // is functioning correctly if
318 //
319 // normf( y - v )
320 //
321 // and
322 //
323 // normf( z - q )
324 //
325 // are negligible, where v and q contain y and z as computed by repeated
326 // calls to dotxv and axpyv, respectively.
327 //
329 bli_obj_scalar_init_detached( dt_real, &norm );
330 bli_obj_scalar_init_detached( dt, &alpha_chi1 );
332 bli_obj_create( dt, b_n, 1, 0, 0, &v );
333 bli_obj_create( dt, m, 1, 0, 0, &q );
335 bli_copyv( y_orig, &v );
336 bli_copyv( z_orig, &q );
338 // v := beta * v + alpha * conjat(at) * conjw(w)
339 for ( i = 0; i < b_n; ++i )
340 {
341 bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, at, &a1 );
342 bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, &v, &psi1 );
344 bli_dotxv( alpha, &a1, w, beta, &psi1 );
345 }
347 // q := q + alpha * conja(a) * conjx(x)
348 for ( i = 0; i < b_n; ++i )
349 {
350 bli_acquire_mpart_l2r( BLIS_SUBPART1, i, 1, a, &a1 );
351 bli_acquire_vpart_f2b( BLIS_SUBPART1, i, 1, x, &chi1 );
353 bli_copysc( &chi1, &alpha_chi1 );
354 bli_mulsc( alpha, &alpha_chi1 );
356 bli_axpyv( &alpha_chi1, &a1, &q );
357 }
360 bli_subv( y, &v );
361 bli_normfv( &v, &norm );
362 bli_getsc( &norm, &resid1, &junk );
364 bli_subv( z, &q );
365 bli_normfv( &q, &norm );
366 bli_getsc( &norm, &resid2, &junk );
369 *resid = bli_fmaxabs( resid1, resid2 );
371 bli_obj_free( &v );
372 bli_obj_free( &q );
373 }