blis/frame/1/unpackv/bli_unpackv_int.c

   1 /*
   2
   3    BLIS
   4    An object-based framework for developing high-performance BLAS-like
   5    libraries.
   6
   7    Copyright (C) 2014, The University of Texas at Austin
   8
   9    Redistribution and use in source and binary forms, with or without
  10    modification, are permitted provided that the following conditions are
  11    met:
  12     - Redistributions of source code must retain the above copyright
  13       notice, this list of conditions and the following disclaimer.
  14     - Redistributions in binary form must reproduce the above copyright
  15       notice, this list of conditions and the following disclaimer in the
  16       documentation and/or other materials provided with the distribution.
  17     - Neither the name of The University of Texas at Austin nor the names
  18       of its contributors may be used to endorse or promote products
  19       derived from this software without specific prior written permission.
  20
  21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  24    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  25    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  26    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  27    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  28    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  29    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  30    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  31    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  32
  33 */
  34
  35 #include "blis.h"
  36
  37 #define FUNCPTR_T unpackv_fp
  38
  39 typedef void (*FUNCPTR_T)( obj_t*     p,
  40                            obj_t*     a,
  41                            unpackv_t* cntl );
  42
  43 static FUNCPTR_T vars[1][3] =
  44 {
  45         // unblocked            optimized unblocked    blocked
  46         { bli_unpackv_unb_var1, NULL,                  NULL }
  47 };
  48
  49 void bli_unpackv_int( obj_t*     p,
  50                       obj_t*     a,
  51                       unpackv_t* cntl )
  52 {
  53         // The unpackv operation consists of an optional casting post-process.
  54         // (This post-process is analogous to the cast pre-process in packv.)
  55         // Here are the following possible ways unpackv can execute:
  56         //  1. unpack and cast: Unpack to a temporary vector c and then cast
  57         //     c to a.
  58         //  2. unpack only: Unpack directly to vector a since typecasting is
  59         //     not needed.
  60         //  3. cast only: Not yet supported / not used.
  61         //  4. no-op: The control tree directs us to skip the unpack operation
  62         //     entirely. No action is taken.
  63
  64         obj_t     c;
  65
  66         varnum_t  n;
  67         impl_t    i;
  68         FUNCPTR_T f;
  69
  70         // Check parameters.
  71         if ( bli_error_checking_is_enabled() )
  72                 bli_unpackv_check( p, a, cntl );
  73
  74         // Sanity check; A should never have a zero dimension. If we must support
  75         // it, then we should fold it into the next alias-and-early-exit block.
  76         if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
  77
  78         // First check if we are to skip this operation because the control tree
  79         // is NULL, and if so, simply return.
  80         if ( cntl_is_noop( cntl ) )
  81         {
  82                 return;
  83         }
  84
  85         // If p was aliased to a during the pack stage (because it was already
  86         // in an acceptable packed/contiguous format), then no unpack is actually
  87         // necessary, so we return.
  88         if ( bli_obj_is_alias_of( *p, *a ) )
  89         {
  90                 return;
  91         }
  92
  93         // Now, if we are not skipping the unpack operation, then the only
  94         // question left is whether we are to typecast vector a after unpacking.
  95         if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
  96                 bli_abort();
  97 /*
  98         if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
  99         {
 100                 // Initialize an object c for the intermediate typecast vector.
 101                 bli_unpackv_init_cast( p,
 102                                        a,
 103                                        &c );
 104         }
 105         else
 106 */
 107         {
 108                 // If no cast is needed, then aliasing object c to the original
 109                 // vector serves as a minor optimization. This causes the unpackv
 110                 // implementation to unpack directly into vector a.
 111                 bli_obj_alias_to( *a, c );
 112         }
 113
 114         // Now we are ready to proceed with the unpacking.
 115
 116         // Extract the variant number and implementation type.
 117         n = cntl_var_num( cntl );
 118         i = cntl_impl_type( cntl );
 119
 120         // Index into the variant array to extract the correct function pointer.
 121         f = vars[n][i];
 122
 123         // Invoke the variant.
 124         f( p,
 125            &c,
 126            cntl );
 127
 128         // Now, if necessary, we cast the contents of c to vector a. If casting
 129         // was not necessary, then we are done because the call to the unpackv
 130         // implementation would have unpacked directly to vector a.
 131 /*
 132         if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
 133         {
 134                 // Copy/typecast vector c to vector a.
 135                 // NOTE: Here, we use copynzv instead of copym because, in the cases
 136                 // where we are unpacking/typecasting a real vector c to a complex
 137                 // vector a, we want to touch only the real components of a, rather
 138                 // than also set the imaginary components to zero. This comes about
 139                 // because of the fact that, if we are unpacking real-to-complex,
 140                 // then it is because all of the computation occurred in the real
 141                 // domain, and so we would want to leave whatever imaginary values
 142                 // there are in vector a untouched. Notice that for unpackings that
 143                 // entail complex-to-complex data movements, the copynzv operation
 144                 // behaves exactly as copym, so no use cases are lost (at least none
 145                 // that I can think of).
 146                 bli_copynzv( &c,
 147                              a );
 148
 149                 // NOTE: The above code/comment is outdated. What should happen is
 150                 // as follows:
 151                 // - If dt(a) is complex and dt(p) is real, then create an alias of
 152                 //   a and then tweak it so that it looks like a real domain object.
 153                 //   This will involve:
 154                 //   - projecting the datatype to real domain
 155                 //   - scaling both the row and column strides by 2
 156                 //   ALL OF THIS should be done in the front-end, NOT here, as
 157                 //   unpackv() won't even be needed in that case.
 158         }
 159 */
 160 }
 161
 162 /*
 163 void bli_unpackv_init_cast( obj_t*  p,
 164                             obj_t*  a,
 165                             obj_t*  c )
 166 {
 167         // The idea here is that we want to create an object c that is identical
 168         // to object a, except that:
 169         //  (1) the storage datatype of c is equal to the target datatype of a,
 170         //      with the element size of c adjusted accordingly,
 171         //  (2) object c is marked as being stored in a standard, contiguous
 172         //      format (ie: a column vector),
 173         //  (3) the view offset of c is reset to (0,0), and
 174         //  (4) object c's main buffer is set to a new memory region acquired
 175         //      from the memory manager, or extracted from p if a mem entry is
 176         //      already available. (After acquring a mem entry from the memory
 177         //      manager, it is cached within p for quick access later on.)
 178
 179         num_t dt_targ_a    = bli_obj_target_datatype( *a );
 180         dim_t dim_a        = bli_obj_vector_dim( *a );
 181         siz_t elem_size_c  = bli_datatype_size( dt_targ_a );
 182
 183         // We begin by copying the basic fields of a.
 184         bli_obj_alias_to( *a, *c );
 185
 186         // Update datatype and element size fields.
 187         bli_obj_set_datatype( dt_targ_a, *c );
 188         bli_obj_set_elem_size( elem_size_c, *c );
 189
 190         // Update the strides and dimensions. We set the increments to reflect a
 191         // column-stored vector. Note that the column stride is set to dim(a),
 192         // though it should never be used because there is no second column to
 193         // index into (and therefore it also does not need to be aligned).
 194         bli_obj_set_dims( dim_a, 1, *c );
 195         bli_obj_set_incs( 1, dim_a, *c );
 196
 197         // Reset the view offsets to (0,0).
 198         bli_obj_set_offs( 0, 0, *c );
 199
 200         // Check the mem_t entry of p associated with the cast buffer. If it is
 201         // NULL, then acquire memory sufficient to hold the object data and cache
 202         // it to p. (Otherwise, if it is non-NULL, then memory has already been
 203         // acquired from the memory manager and cached.) We then set the main
 204         // buffer of c to the cached address of the cast memory.
 205         bli_obj_set_buffer_with_cached_cast_mem( *p, *c );
 206 }
 207 */