[dense-linear-algebra-libraries/linalg.git] / blis / frame / 3 / trmm / other / bli_trmm_lu_blk_var1.c
1 /*
3 BLIS
4 An object-based framework for developing high-performance BLAS-like
5 libraries.
7 Copyright (C) 2014, The University of Texas at Austin
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 - Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 - Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
17 - Neither the name of The University of Texas at Austin nor the names
18 of its contributors may be used to endorse or promote products
19 derived from this software without specific prior written permission.
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 void bli_trmm_lu_blk_var1( obj_t* alpha,
38 obj_t* a,
39 obj_t* b,
40 obj_t* beta,
41 obj_t* c,
42 trmm_t* cntl )
43 {
44 obj_t a1, a1_pack;
45 obj_t b_pack;
46 obj_t c1, c1_pack;
48 dim_t i;
49 dim_t b_alg;
50 dim_t mT_trans;
52 // Initialize all pack objects that are passed into packm_init().
53 bli_obj_init_pack( &a1_pack );
54 bli_obj_init_pack( &b_pack );
55 bli_obj_init_pack( &c1_pack );
57 // If A is [upper] triangular, use the diagonal offset of A to determine
58 // the length of the non-zero region.
59 if ( bli_obj_is_triangular( *a ) )
60 mT_trans = bli_abs( bli_obj_diag_offset_after_trans( *a ) ) +
61 bli_obj_width_after_trans( *a );
62 else // if ( bli_obj_is_general( *a )
63 mT_trans = bli_obj_length_after_trans( *a );
65 // Scale C by beta (if instructed).
66 bli_scalm_int( beta,
67 c,
68 cntl_sub_scalm( cntl ) );
70 // Initialize object for packing B.
71 bli_packm_init( b, &b_pack,
72 cntl_sub_packm_b( cntl ) );
74 // Pack B and scale by alpha (if instructed).
75 bli_packm_int( alpha,
76 b, &b_pack,
77 cntl_sub_packm_b( cntl ) );
79 // Partition along the m dimension.
80 for ( i = 0; i < mT_trans; i += b_alg )
81 {
82 // Determine the current algorithmic blocksize.
83 b_alg = bli_determine_blocksize_f( i, mT_trans, a,
84 cntl_blocksize( cntl ) );
86 // Acquire partitions for A1 and C1.
87 bli_acquire_mpart_t2b( BLIS_SUBPART1,
88 i, b_alg, a, &a1 );
89 bli_acquire_mpart_t2b( BLIS_SUBPART1,
90 i, b_alg, c, &c1 );
92 // Initialize objects for packing A1 and C1.
93 bli_packm_init( &a1, &a1_pack,
94 cntl_sub_packm_a( cntl ) );
95 bli_packm_init( &c1, &c1_pack,
96 cntl_sub_packm_c( cntl ) );
98 // Pack A1 and scale by alpha (if instructed).
99 bli_packm_int( alpha,
100 &a1, &a1_pack,
101 cntl_sub_packm_a( cntl ) );
103 // Pack C1 and scale by beta (if instructed).
104 bli_packm_int( beta,
105 &c1, &c1_pack,
106 cntl_sub_packm_c( cntl ) );
108 // Perform trmm subproblem.
109 bli_trmm_int( BLIS_LEFT,
110 alpha,
111 &a1_pack,
112 &b_pack,
113 beta,
114 &c1_pack,
115 cntl_sub_gemm( cntl ) );
117 // Unpack C1 (if C1 was packed).
118 bli_unpackm_int( &c1_pack, &c1,
119 cntl_sub_unpackm_c( cntl ) );
120 }
122 // If any packing buffers were acquired within packm, release them back
123 // to the memory manager.
124 bli_obj_release_pack( &a1_pack );
125 bli_obj_release_pack( &b_pack );
126 bli_obj_release_pack( &c1_pack );
127 }