]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - dense-linear-algebra-libraries/linalg.git/blob - blis/frame/3/her2k/attic/bli_her2k_cntl.c
TI Linear Algebra Library (LINALG) Rlease 1.0.0
[dense-linear-algebra-libraries/linalg.git] / blis / frame / 3 / her2k / attic / bli_her2k_cntl.c
1 /*
3    BLIS    
4    An object-based framework for developing high-performance BLAS-like
5    libraries.
7    Copyright (C) 2014, The University of Texas at Austin
9    Redistribution and use in source and binary forms, with or without
10    modification, are permitted provided that the following conditions are
11    met:
12     - Redistributions of source code must retain the above copyright
13       notice, this list of conditions and the following disclaimer.
14     - Redistributions in binary form must reproduce the above copyright
15       notice, this list of conditions and the following disclaimer in the
16       documentation and/or other materials provided with the distribution.
17     - Neither the name of The University of Texas at Austin nor the names
18       of its contributors may be used to endorse or promote products
19       derived from this software without specific prior written permission.
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 */
35 #include "blis.h"
37 extern scalm_t*   scalm_cntl;
39 extern blksz_t*   gemm_mc;
40 extern blksz_t*   gemm_nc;
41 extern blksz_t*   gemm_kc;
42 extern blksz_t*   gemm_mr;
43 extern blksz_t*   gemm_nr;
44 extern blksz_t*   gemm_kr;
46 extern func_t*    gemm_ukrs;
48 extern herk_t*    herk_cntl_bp_ke;
50 packm_t*          her2k_packa_cntl;
51 packm_t*          her2k_packb_cntl;
53 her2k_t*          her2k_cntl_bp_ke;
54 her2k_t*          her2k_cntl_op_bp;
55 her2k_t*          her2k_cntl_mm_op;
56 her2k_t*          her2k_cntl_vl_mm;
58 her2k_t*          her2k_cntl;
61 void bli_her2k_cntl_init()
62 {
64         // Create control tree objects for packm operations.
65         her2k_packa_cntl
66         =
67         bli_packm_cntl_obj_create( BLIS_BLOCKED,
68                                    BLIS_VARIANT2,
69                                    her2k_mr,
70                                    her2k_kr,
71                                    FALSE, // do NOT invert diagonal
72                                    FALSE, // reverse iteration if upper?
73                                    FALSE, // reverse iteration if lower?
74                                    BLIS_PACKED_ROW_PANELS,
75                                    BLIS_BUFFER_FOR_A_BLOCK );
77         her2k_packb_cntl
78         =
79         bli_packm_cntl_obj_create( BLIS_BLOCKED,
80                                    BLIS_VARIANT2,
81                                    her2k_kr,
82                                    her2k_nr,
83                                    FALSE, // do NOT invert diagonal
84                                    FALSE, // reverse iteration if upper?
85                                    FALSE, // reverse iteration if lower?
86                                    BLIS_PACKED_COL_PANELS,
87                                    BLIS_BUFFER_FOR_B_PANEL );
90         // Create control tree object for lowest-level block-panel kernel.
91         her2k_cntl_bp_ke
92         =
93         bli_her2k_cntl_obj_create( BLIS_UNB_OPT,
94                                    BLIS_VARIANT2,
95                                    NULL,
96                                    gemm_ukrs,
97                                    NULL, NULL, NULL, NULL,
98                                    NULL, NULL, NULL );
100         // Create control tree object for outer panel (to block-panel)
101         // problem.
102         her2k_cntl_op_bp
103         =
104         bli_her2k_cntl_obj_create( BLIS_BLOCKED,
105                                    BLIS_VARIANT1,
106                                    her2k_mc,
107                                    NULL,
108                                    NULL,
109                                    her2k_packa_cntl,
110                                    her2k_packb_cntl,
111                                    NULL,
112                                    her2k_cntl_bp_ke,
113                                    herk_cntl_bp_ke,
114                                    NULL );
116         // Create control tree object for general problem via multiple
117         // rank-k (outer panel) updates.
118         her2k_cntl_mm_op
119         =
120         bli_her2k_cntl_obj_create( BLIS_BLOCKED,
121                                    BLIS_VARIANT3,
122                                    her2k_kc,
123                                    NULL,
124                                    NULL,
125                                    NULL, 
126                                    NULL,
127                                    NULL,
128                                    her2k_cntl_op_bp,
129                                    NULL,
130                                    NULL );
132         // Create control tree object for very large problem via multiple
133         // general problems.
134         her2k_cntl_vl_mm
135         =
136         bli_her2k_cntl_obj_create( BLIS_BLOCKED,
137                                    BLIS_VARIANT2,
138                                    her2k_nc,
139                                    NULL,
140                                    NULL,
141                                    NULL, 
142                                    NULL,
143                                    NULL,
144                                    her2k_cntl_mm_op,
145                                    NULL,
146                                    NULL );
148         // Alias the "master" her2k control tree to a shorter name.
149         her2k_cntl = her2k_cntl_vl_mm;
152 void bli_her2k_cntl_finalize()
154         bli_cntl_obj_free( her2k_packa_cntl );
155         bli_cntl_obj_free( her2k_packb_cntl );
157         bli_cntl_obj_free( her2k_cntl_bp_ke );
158         bli_cntl_obj_free( her2k_cntl_op_bp );
159         bli_cntl_obj_free( her2k_cntl_mm_op );
160         bli_cntl_obj_free( her2k_cntl_vl_mm );
163 her2k_t* bli_her2k_cntl_obj_create( impl_t     impl_type,
164                                     varnum_t   var_num,
165                                     blksz_t*   b,
166                                     func_t*    gemm_ukrs_,
167                                     scalm_t*   sub_scalm,
168                                     packm_t*   sub_packm_a,
169                                     packm_t*   sub_packm_b,
170                                     packm_t*   sub_packm_c,
171                                     her2k_t*   sub_her2k,
172                                     herk_t*    sub_herk,
173                                     unpackm_t* sub_unpackm_c )
175         her2k_t* cntl;
177         cntl = ( her2k_t* ) bli_malloc( sizeof(her2k_t) );
179         cntl->impl_type     = impl_type;
180         cntl->var_num       = var_num;
181         cntl->b             = b;
182         cntl->gemm_ukrs     = gemm_ukrs_; // avoid name conflict with global symbol
183         cntl->sub_scalm     = sub_scalm;
184         cntl->sub_packm_a   = sub_packm_a;
185         cntl->sub_packm_b   = sub_packm_b;
186         cntl->sub_packm_c   = sub_packm_c;
187         cntl->sub_her2k     = sub_her2k;
188         cntl->sub_herk      = sub_herk;
189         cntl->sub_unpackm_c = sub_unpackm_c;
191         return cntl;