]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - dense-linear-algebra-libraries/linalg.git/blob - blis/frame/2/trmv/bli_trmv_cntl.c
TI Linear Algebra Library (LINALG) Rlease 1.0.0
[dense-linear-algebra-libraries/linalg.git] / blis / frame / 2 / trmv / bli_trmv_cntl.c
1 /*
3    BLIS    
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
7    Copyright (C) 2014, The University of Texas at Austin
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name of The University of Texas at Austin nor the names
18       of its contributors may be used to endorse or promote products
19       derived from this software without specific prior written permission.
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 extern packm_t*   packm_cntl;
38 extern packv_t*   packv_cntl;
39 extern unpackv_t* unpackv_cntl;
41 extern gemv_t*    gemv_cntl_rp_bs_dot;
42 extern gemv_t*    gemv_cntl_rp_bs_axpy;
43 extern gemv_t*    gemv_cntl_cp_bs_dot;
44 extern gemv_t*    gemv_cntl_cp_bs_axpy;
46 extern blksz_t*   gemv_mc;
48 trmv_t*           trmv_cntl_bs_ke_nrow_tcol;
49 trmv_t*           trmv_cntl_bs_ke_ncol_trow;
50 trmv_t*           trmv_cntl_ge_nrow_tcol;
51 trmv_t*           trmv_cntl_ge_ncol_trow;
54 void bli_trmv_cntl_init()
55 {
56         // Create control trees for the lowest-level kernels. These trees induce
57         // operations on (presumably) relatively small block-subvector problems.
58         trmv_cntl_bs_ke_nrow_tcol
59         =
60         bli_trmv_cntl_obj_create( BLIS_UNB_FUSED,
61                                   BLIS_VARIANT1,
62                                   NULL, NULL, NULL,
63                                   NULL, NULL, NULL,
64                                   NULL );
66         trmv_cntl_bs_ke_ncol_trow
67         =
68         bli_trmv_cntl_obj_create( BLIS_UNB_FUSED,
69                                   BLIS_VARIANT2,
70                                   NULL, NULL, NULL,
71                                   NULL, NULL, NULL,
72                                   NULL );
75         // Create control trees for generally large problems. Here we choose a
76         // variant that prioritizes keeping a subvector of x in cache.
77         trmv_cntl_ge_nrow_tcol
78         =
79         bli_trmv_cntl_obj_create( BLIS_BLOCKED,
80                                   BLIS_VARIANT1,         // use var1 to maximize x1 usage
81                                   gemv_mc,
82                                   packm_cntl,            // pack A11 (if needed)
83                                   packv_cntl,            // pack x1 (if needed)
84                                   gemv_cntl_rp_bs_dot,   // gemv_rp needed by var1
85                                   NULL,                  // gemv_cp not needed by var1
86                                   trmv_cntl_bs_ke_nrow_tcol,
87                                   unpackv_cntl );        // unpack x1 (if packed)
88         trmv_cntl_ge_ncol_trow
89         =
90         bli_trmv_cntl_obj_create( BLIS_BLOCKED,
91                                   BLIS_VARIANT1,        // use var1 to maximize x1 usage
92                                   gemv_mc,
93                                   packm_cntl,           // pack A11 (if needed)
94                                   packv_cntl,           // pack x1 (if needed)
95                                   gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1
96                                   NULL,                 // gemv_cp not needed by var1
97                                   trmv_cntl_bs_ke_ncol_trow,
98                                   unpackv_cntl );       // unpack x1 (if packed)
99 }
101 void bli_trmv_cntl_finalize()
103         bli_cntl_obj_free( trmv_cntl_bs_ke_nrow_tcol );
104         bli_cntl_obj_free( trmv_cntl_bs_ke_ncol_trow );
105         bli_cntl_obj_free( trmv_cntl_ge_nrow_tcol );
106         bli_cntl_obj_free( trmv_cntl_ge_ncol_trow );
110 trmv_t* bli_trmv_cntl_obj_create( impl_t     impl_type,
111                                   varnum_t   var_num,
112                                   blksz_t*   b,
113                                   packm_t*   sub_packm_a11,
114                                   packv_t*   sub_packv_x1,
115                                   gemv_t*    sub_gemv_rp,
116                                   gemv_t*    sub_gemv_cp,
117                                   trmv_t*    sub_trmv,
118                                   unpackv_t* sub_unpackv_x1 )
120         trmv_t* cntl;
122         cntl = ( trmv_t* ) bli_malloc( sizeof(trmv_t) );        
124         cntl->impl_type      = impl_type;
125         cntl->var_num        = var_num;
126         cntl->b              = b;
127         cntl->sub_packm_a11  = sub_packm_a11;
128         cntl->sub_packv_x1   = sub_packv_x1;
129         cntl->sub_gemv_rp    = sub_gemv_rp;
130         cntl->sub_gemv_cp    = sub_gemv_cp;
131         cntl->sub_trmv       = sub_trmv;
132         cntl->sub_unpackv_x1 = sub_unpackv_x1;
134         return cntl;
137 void bli_trmv_cntl_obj_init( trmv_t*    cntl,
138                              impl_t     impl_type,
139                              varnum_t   var_num,
140                              blksz_t*   b,
141                              packm_t*   sub_packm_a11,
142                              packv_t*   sub_packv_x1,
143                              gemv_t*    sub_gemv_rp,
144                              gemv_t*    sub_gemv_cp,
145                              trmv_t*    sub_trmv,
146                              unpackv_t* sub_unpackv_x1 )
148         cntl->impl_type      = impl_type;
149         cntl->var_num        = var_num;
150         cntl->b              = b;
151         cntl->sub_packm_a11  = sub_packm_a11;
152         cntl->sub_packv_x1   = sub_packv_x1;
153         cntl->sub_gemv_rp    = sub_gemv_rp;
154         cntl->sub_gemv_cp    = sub_gemv_cp;
155         cntl->sub_trmv       = sub_trmv;
156         cntl->sub_unpackv_x1 = sub_unpackv_x1;