]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - processor-sdk/kaldi.git/blob - src/nnet3/nnet-convolutional-component.cc
[src,scripts,egs] Attention modeling, with example scripts (#1731)
[processor-sdk/kaldi.git] / src / nnet3 / nnet-convolutional-component.cc
1 // nnet3/nnet-convolutional-component.cc
3 // Copyright      2017  Johns Hopkins University (author: Daniel Povey)
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 //  http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
20 #include <iterator>
21 #include <sstream>
22 #include <iomanip>
23 #include "nnet3/nnet-convolutional-component.h"
24 #include "nnet3/nnet-computation-graph.h"
25 #include "nnet3/nnet-parse.h"
27 namespace kaldi {
28 namespace nnet3 {
31 TimeHeightConvolutionComponent::TimeHeightConvolutionComponent():
32     use_natural_gradient_(true),
33     num_minibatches_history_(4.0) { }
35 TimeHeightConvolutionComponent::TimeHeightConvolutionComponent(
36     const TimeHeightConvolutionComponent &other):
37     UpdatableComponent(other),  // initialize base-class
38     model_(other.model_),
39     all_time_offsets_(other.all_time_offsets_),
40     time_offset_required_(other.time_offset_required_),
41     linear_params_(other.linear_params_),
42     bias_params_(other.bias_params_),
43     max_memory_mb_(other.max_memory_mb_),
44     use_natural_gradient_(other.use_natural_gradient_),
45     num_minibatches_history_(other.num_minibatches_history_),
46     preconditioner_in_(other.preconditioner_in_),
47     preconditioner_out_(other.preconditioner_out_) {
48   Check();
49 }
52 void TimeHeightConvolutionComponent::Check() const {
53   model_.Check();
54   KALDI_ASSERT(bias_params_.Dim() == model_.num_filters_out &&
55                linear_params_.NumRows() == model_.ParamRows() &&
56                linear_params_.NumCols() == model_.ParamCols());
57 }
59 int32 TimeHeightConvolutionComponent::InputDim() const {
60   return model_.InputDim();
61 }
63 int32 TimeHeightConvolutionComponent::OutputDim() const {
64   return model_.OutputDim();
65 }
67 std::string TimeHeightConvolutionComponent::Info() const {
68   std::ostringstream stream;
69   // The output of model_.Info() has been designed to be suitable
70   // as a component-level info string, it has
71   // {num-filters,height}-{in-out}, offsets=[...], required-time-offsets=[...],
72   // {input,output}-dim.
73   stream << UpdatableComponent::Info() << ' ' << model_.Info();
74   PrintParameterStats(stream, "filter-params", linear_params_);
75   PrintParameterStats(stream, "bias-params", bias_params_, true);
76   stream << ", num-params=" << NumParameters()
77          << ", max-memory-mb=" << max_memory_mb_
78          << ", use-natural-gradient=" << use_natural_gradient_;
79   if (use_natural_gradient_) {
80     stream << ", num-minibatches-history=" << num_minibatches_history_
81            << ", rank-in=" << preconditioner_in_.GetRank()
82            << ", rank-out=" << preconditioner_out_.GetRank()
83            << ", alpha-in=" << preconditioner_in_.GetAlpha()
84            << ", alpha-out=" << preconditioner_out_.GetAlpha();
85   }
86   return stream.str();
87 }
90 void TimeHeightConvolutionComponent::InitUnit() {
91   if (model_.num_filters_in != model_.num_filters_out) {
92     KALDI_ERR << "You cannot specify init-unit if the num-filters-in "
93               << "and num-filters-out differ.";
94   }
95   size_t i;
96   int32 zero_offset = 0;
97   for (i = 0; i < model_.offsets.size(); i++) {
98     if (model_.offsets[i].time_offset == 0 &&
99         model_.offsets[i].height_offset == 0) {
100       zero_offset = i;
101       break;
102     }
103   }
104   if (i == model_.offsets.size())  // did not break.
105     KALDI_ERR << "You cannot specify init-unit if the model does "
106               << "not have the offset (0, 0).";
108   CuSubMatrix<BaseFloat> zero_offset_block(
109       linear_params_, 0, linear_params_.NumRows(),
110       zero_offset * model_.num_filters_in, model_.num_filters_in);
112   KALDI_ASSERT(zero_offset_block.NumRows() == zero_offset_block.NumCols());
113   zero_offset_block.AddToDiag(1.0);  // set this block to the unit matrix.
116 void TimeHeightConvolutionComponent::InitFromConfig(ConfigLine *cfl) {
117   // 1. Config values inherited from UpdatableComponent.
118   InitLearningRatesFromConfig(cfl);
120   // 2. convolution-related config values.
121   model_.height_subsample_out = 1;  // default.
122   max_memory_mb_ = 200.0;
123   std::string height_offsets, time_offsets, required_time_offsets = "undef";
125   bool ok = cfl->GetValue("num-filters-in", &model_.num_filters_in) &&
126       cfl->GetValue("num-filters-out", &model_.num_filters_out) &&
127       cfl->GetValue("height-in", &model_.height_in) &&
128       cfl->GetValue("height-out", &model_.height_out) &&
129       cfl->GetValue("height-offsets", &height_offsets) &&
130       cfl->GetValue("time-offsets", &time_offsets);
131   if (!ok) {
132     KALDI_ERR << "Bad initializer: expected all the values "
133         "num-filters-in, num-filters-out, height-in, height-out, "
134         "height-offsets, time-offsets to be defined: "
135               << cfl->WholeLine();
136   }
137   // some optional structural configs.
138   cfl->GetValue("required-time-offsets", &required_time_offsets);
139   cfl->GetValue("height-subsample-out", &model_.height_subsample_out);
140   cfl->GetValue("max-memory-mb", &max_memory_mb_);
141   KALDI_ASSERT(max_memory_mb_ > 0.0);
142   {  // This block attempts to parse height_offsets, time_offsets
143      // and required_time_offsets.
144     std::vector<int32> height_offsets_vec,
145         time_offsets_vec, required_time_offsets_vec;
146     if (!SplitStringToIntegers(height_offsets, ",", false,
147                                &height_offsets_vec) ||
148         !SplitStringToIntegers(time_offsets, ",", false,
149                                &time_offsets_vec)) {
150       KALDI_ERR << "Formatting problem in time-offsets or height-offsets: "
151                 << cfl->WholeLine();
152     }
153     if (height_offsets_vec.empty() || !IsSortedAndUniq(height_offsets_vec) ||
154         time_offsets_vec.empty() || !IsSortedAndUniq(time_offsets_vec)) {
155       KALDI_ERR << "Options time-offsets and height-offsets must be nonempty, "
156           "sorted and unique.";
157     }
158     if (required_time_offsets == "undef") {
159       required_time_offsets_vec = time_offsets_vec;
160     } else {
161       if (!SplitStringToIntegers(required_time_offsets, ",", false,
162                                  &required_time_offsets_vec) ||
163           required_time_offsets_vec.empty() ||
164           !IsSortedAndUniq(required_time_offsets_vec)) {
165       KALDI_ERR << "Formatting problem in required-time-offsets: "
166                 << cfl->WholeLine();
167       }
168     }
169     model_.offsets.clear();
170     for (size_t i = 0; i < time_offsets_vec.size(); i++) {
171       for (size_t j = 0; j < height_offsets_vec.size(); j++) {
172         time_height_convolution::ConvolutionModel::Offset offset;
173         offset.time_offset = time_offsets_vec[i];
174         offset.height_offset = height_offsets_vec[j];
175         model_.offsets.push_back(offset);
176       }
177     }
178     model_.required_time_offsets.clear();
179     model_.required_time_offsets.insert(
180         required_time_offsets_vec.begin(),
181         required_time_offsets_vec.end());
182   }
184   model_.ComputeDerived();
185   if (!model_.Check(false, true)) {
186     KALDI_ERR << "Parameters used to initialize TimeHeightConvolutionComponent "
187               << "do not make sense,  line was: " << cfl->WholeLine();
188   }
189   if (!model_.Check(true, true)) {
190     KALDI_WARN << "There are input heights unused in "
191         "TimeHeightConvolutionComponent; consider increasing output "
192         "height or decreasing height of preceding layer."
193                << cfl->WholeLine();
194   }
196   // 3. Parameter-initialization configs.
197   BaseFloat param_stddev = -1, bias_stddev = 0.0;
198   bool init_unit = false;
199   cfl->GetValue("param-stddev", &param_stddev);
200   cfl->GetValue("bias-stddev", &bias_stddev);
201   cfl->GetValue("init-unit", &init_unit);
202   if (param_stddev < 0.0) {
203     param_stddev = 1.0 / sqrt(model_.num_filters_in *
204                               model_.offsets.size());
205   }
206   // initialize the parameters.
207   linear_params_.Resize(model_.ParamRows(), model_.ParamCols());
208   if (!init_unit) {
209     linear_params_.SetRandn();
210     linear_params_.Scale(param_stddev);
211   } else {
212     InitUnit();
213   }
214   bias_params_.Resize(model_.num_filters_out);
215   bias_params_.SetRandn();
216   bias_params_.Scale(bias_stddev);
219   // 4. Natural-gradient related configs.
220   use_natural_gradient_ = true;
221   num_minibatches_history_ = 4.0;
222   int32 rank_out = -1, rank_in = -1;
223   BaseFloat alpha_out = 4.0, alpha_in = 4.0;
224   cfl->GetValue("use-natural-gradient", &use_natural_gradient_);
225   cfl->GetValue("rank-in", &rank_in);
226   cfl->GetValue("rank-out", &rank_out);
227   cfl->GetValue("alpha-in", &alpha_in);
228   cfl->GetValue("alpha-out", &alpha_out);
229   cfl->GetValue("num-minibatches-history", &num_minibatches_history_);
231   preconditioner_in_.SetAlpha(alpha_in);
232   preconditioner_out_.SetAlpha(alpha_out);
233   int32 dim_in = linear_params_.NumCols() + 1,
234       dim_out = linear_params_.NumRows();
235   if (rank_in < 0) {
236     rank_in = std::min<int32>(80, (dim_in + 1) / 2);
237     preconditioner_in_.SetRank(rank_in);
238   }
239   if (rank_out < 0) {
240     rank_out = std::min<int32>(80, (dim_out + 1) / 2);
241     preconditioner_out_.SetRank(rank_out);
242   }
243   // the swapping of in and out in the lines below is intentional.  the num-rows
244   // of the matrix that we give to preconditioner_in_ to precondition is
245   // dim-out, and the num-rows of the matrix we give to preconditioner_out_ to
246   // preconditioner is dim-in.  the preconditioner objects treat these rows
247   // as separate samples, e.g. separate frames, even though they actually
248   // correspond to a different dimension in the parameter space.
249   preconditioner_in_.SetNumSamplesHistory(dim_out * num_minibatches_history_);
250   preconditioner_out_.SetNumSamplesHistory(dim_in * num_minibatches_history_);
252   preconditioner_in_.SetAlpha(alpha_in);
253   preconditioner_out_.SetAlpha(alpha_out);
255   ComputeDerived();
258 void* TimeHeightConvolutionComponent::Propagate(
259     const ComponentPrecomputedIndexes *indexes_in,
260     const CuMatrixBase<BaseFloat> &in,
261     CuMatrixBase<BaseFloat> *out) const {
262   const PrecomputedIndexes *indexes =
263       dynamic_cast<const PrecomputedIndexes*>(indexes_in);
264   KALDI_ASSERT(indexes != NULL);
265   { // this block handles the bias term.
266     KALDI_ASSERT(out->Stride() == out->NumCols() &&
267                  out->NumCols() == model_.height_out * model_.num_filters_out);
268     CuSubMatrix<BaseFloat> out_reshaped(
269         out->Data(), out->NumRows() * model_.height_out,
270         model_.num_filters_out, model_.num_filters_out);
271     out_reshaped.CopyRowsFromVec(bias_params_);
272   }
273   ConvolveForward(indexes->computation, in, linear_params_, out);
274   return NULL;
277 void TimeHeightConvolutionComponent::Backprop(
278     const std::string &debug_info,
279     const ComponentPrecomputedIndexes *indexes_in,
280     const CuMatrixBase<BaseFloat> &in_value,
281     const CuMatrixBase<BaseFloat> &, // out_value
282     const CuMatrixBase<BaseFloat> &out_deriv,
283     void*, // memo
284     Component *to_update_in,
285     CuMatrixBase<BaseFloat> *in_deriv) const {
286   const PrecomputedIndexes *indexes =
287       dynamic_cast<const PrecomputedIndexes*>(indexes_in);
288   KALDI_ASSERT(indexes != NULL);
290   if (in_deriv != NULL) {
291     ConvolveBackwardData(indexes->computation, linear_params_,
292                          out_deriv, in_deriv);
293   }
294   if (to_update_in != NULL) {
295     TimeHeightConvolutionComponent *to_update =
296         dynamic_cast<TimeHeightConvolutionComponent*>(to_update_in);
297     KALDI_ASSERT(to_update != NULL);
299     if (to_update->learning_rate_ == 0.0)
300       return;
302     if (to_update->is_gradient_ || !to_update->use_natural_gradient_)
303       to_update->UpdateSimple(*indexes, in_value, out_deriv);
304     else
305       to_update->UpdateNaturalGradient(*indexes, in_value, out_deriv);
306   }
309 void TimeHeightConvolutionComponent::UpdateSimple(
310     const PrecomputedIndexes &indexes,
311     const CuMatrixBase<BaseFloat> &in_value,
312     const CuMatrixBase<BaseFloat> &out_deriv) {
314   { // this block handles the bias term.
315     KALDI_ASSERT(out_deriv.Stride() == out_deriv.NumCols() &&
316                  out_deriv.NumCols() ==
317                  model_.height_out * model_.num_filters_out);
318     CuSubMatrix<BaseFloat> out_deriv_reshaped(
319         out_deriv.Data(), out_deriv.NumRows() * model_.height_out,
320         model_.num_filters_out, model_.num_filters_out);
321     bias_params_.AddRowSumMat(learning_rate_, out_deriv_reshaped);
322   }
324   ConvolveBackwardParams(indexes.computation, in_value, out_deriv,
325                          learning_rate_, &linear_params_);
329 void TimeHeightConvolutionComponent::UpdateNaturalGradient(
330     const PrecomputedIndexes &indexes,
331     const CuMatrixBase<BaseFloat> &in_value,
332     const CuMatrixBase<BaseFloat> &out_deriv) {
334   CuVector<BaseFloat> bias_temp(bias_params_.Dim());
336   { // this block computes 'bias_temp', the derivative w.r.t. the bias.
337     KALDI_ASSERT(out_deriv.Stride() == out_deriv.NumCols() &&
338                  out_deriv.NumCols() ==
339                  model_.height_out * model_.num_filters_out);
340     CuSubMatrix<BaseFloat> out_deriv_reshaped(
341         out_deriv.Data(), out_deriv.NumRows() * model_.height_out,
342         model_.num_filters_out, model_.num_filters_out);
343     bias_temp.AddRowSumMat(1.0, out_deriv_reshaped);
344   }
346   CuMatrix<BaseFloat> params_temp(linear_params_.NumRows(),
347                                   linear_params_.NumCols() + 1);
348   params_temp.CopyColFromVec(bias_temp, linear_params_.NumCols());
351   CuSubMatrix<BaseFloat> linear_params_temp(
352       params_temp, 0, linear_params_.NumRows(),
353       0, linear_params_.NumCols());
355   ConvolveBackwardParams(indexes.computation, in_value, out_deriv,
356                          1.0, &linear_params_temp);
358   // the precondition-directions code outputs a scalar that
359   // must be multiplied by its output (this saves one
360   // CUDA operation internally).
361   // We don't bother applying this scale before doing the other
362   // dimenson of natural gradient, because although it's not
363   // invariant to scalar multiplication of the input if the
364   // scalars are different across iterations, the scalars
365   // will be pretty similar on different iterations
366   BaseFloat scale1, scale2;
367   preconditioner_in_.PreconditionDirections(&params_temp, NULL,
368                                             &scale1);
371   CuMatrix<BaseFloat> params_temp_transpose(params_temp, kTrans);
372   preconditioner_out_.PreconditionDirections(&params_temp_transpose,
373                                              NULL, &scale2);
376   linear_params_.AddMat(
377       learning_rate_ * scale1 * scale2,
378       params_temp_transpose.RowRange(0, linear_params_.NumCols()),
379       kTrans);
381   bias_params_.AddVec(learning_rate_ * scale1 * scale2,
382                       params_temp_transpose.Row(linear_params_.NumCols()));
386 void TimeHeightConvolutionComponent::ReorderIndexes(
387     std::vector<Index> *input_indexes,
388     std::vector<Index> *output_indexes) const {
389   using namespace time_height_convolution;
390   ConvolutionComputationOptions opts;
391   opts.max_memory_mb = max_memory_mb_;
392   ConvolutionComputation computation_temp;
393   std::vector<Index> input_indexes_modified,
394       output_indexes_modified;
395   CompileConvolutionComputation(
396       model_, *input_indexes, *output_indexes, opts,
397       &computation_temp, &input_indexes_modified, &output_indexes_modified);
398   input_indexes->swap(input_indexes_modified);
399   output_indexes->swap(output_indexes_modified);
402 void TimeHeightConvolutionComponent::Write(std::ostream &os, bool binary) const {
403   WriteUpdatableCommon(os, binary);  // Write opening tag and learning rate.
404   WriteToken(os, binary, "<Model>");
405   model_.Write(os, binary);
406   WriteToken(os, binary, "<LinearParams>");
407   linear_params_.Write(os, binary);
408   WriteToken(os, binary, "<BiasParams>");
409   bias_params_.Write(os, binary);
410   WriteToken(os, binary, "<MaxMemoryMb>");
411   WriteBasicType(os, binary, max_memory_mb_);
412   WriteToken(os, binary, "<UseNaturalGradient>");
413   WriteBasicType(os, binary, use_natural_gradient_);
414   WriteToken(os, binary, "<NumMinibatchesHistory>");
415   WriteBasicType(os, binary, num_minibatches_history_);
416   int32 rank_in = preconditioner_in_.GetRank(),
417       rank_out = preconditioner_out_.GetRank();
418   BaseFloat alpha_in = preconditioner_in_.GetAlpha(),
419       alpha_out = preconditioner_out_.GetAlpha();
420   WriteToken(os, binary, "<AlphaInOut>");
421   WriteBasicType(os, binary, alpha_in);
422   WriteBasicType(os, binary, alpha_out);
423   WriteToken(os, binary, "<RankInOut>");
424   WriteBasicType(os, binary, rank_in);
425   WriteBasicType(os, binary, rank_out);
426   WriteToken(os, binary, "</TimeHeightConvolutionComponent>");
429 void TimeHeightConvolutionComponent::Read(std::istream &is, bool binary) {
430   std::string token = ReadUpdatableCommon(is, binary);
431   // the next few lines are only for back compatibility.
432   if (token != "") {
433     KALDI_ASSERT(token == "<Model>");
434   } else {
435     ExpectToken(is, binary, "<Model>");
436   }
437   model_.Read(is, binary);
438   ExpectToken(is, binary, "<LinearParams>");
439   linear_params_.Read(is, binary);
440   ExpectToken(is, binary, "<BiasParams>");
441   bias_params_.Read(is, binary);
442   ExpectToken(is, binary, "<MaxMemoryMb>");
443   ReadBasicType(is, binary, &max_memory_mb_);
444   ExpectToken(is, binary, "<UseNaturalGradient>");
445   ReadBasicType(is, binary, &use_natural_gradient_);
446   ExpectToken(is, binary, "<NumMinibatchesHistory>");
447   ReadBasicType(is, binary, &num_minibatches_history_);
448   int32 rank_in,  rank_out;
449   BaseFloat alpha_in, alpha_out;
450   ExpectToken(is, binary, "<AlphaInOut>");
451   ReadBasicType(is, binary, &alpha_in);
452   ReadBasicType(is, binary, &alpha_out);
453   preconditioner_in_.SetAlpha(alpha_in);
454   preconditioner_out_.SetAlpha(alpha_out);
455   ExpectToken(is, binary, "<RankInOut>");
456   ReadBasicType(is, binary, &rank_in);
457   ReadBasicType(is, binary, &rank_out);
458   preconditioner_in_.SetRank(rank_in);
459   preconditioner_out_.SetRank(rank_out);
460   int32 dim_in = linear_params_.NumCols() + 1,
461       dim_out = linear_params_.NumRows();
462   // the following lines mirror similar lines in InitFromConfig().
463   // the swapping of in and out is intentional; see comment in InitFromConfig(),
464   // by similar lines.
465   preconditioner_in_.SetNumSamplesHistory(dim_out * num_minibatches_history_);
466   preconditioner_out_.SetNumSamplesHistory(dim_in * num_minibatches_history_);
467   ExpectToken(is, binary, "</TimeHeightConvolutionComponent>");
468   ComputeDerived();
469   Check();
472 void TimeHeightConvolutionComponent::ComputeDerived() {
473   all_time_offsets_.clear();
474   all_time_offsets_.insert(
475       all_time_offsets_.end(),
476       model_.all_time_offsets.begin(),
477       model_.all_time_offsets.end());
478   time_offset_required_.resize(all_time_offsets_.size());
479   for (size_t i = 0; i < all_time_offsets_.size(); i++) {
480     time_offset_required_[i] =
481         (model_.required_time_offsets.count(all_time_offsets_[i]) > 0);
482   }
485 void TimeHeightConvolutionComponent::GetInputIndexes(
486     const MiscComputationInfo &misc_info,
487     const Index &output_index,
488     std::vector<Index> *desired_indexes) const {
489   KALDI_ASSERT(output_index.t != kNoTime);
490   size_t size = all_time_offsets_.size();
491   desired_indexes->resize(size);
492   for (size_t i = 0; i < size; i++) {
493     (*desired_indexes)[i].n = output_index.n;
494     (*desired_indexes)[i].t = output_index.t + all_time_offsets_[i];
495     (*desired_indexes)[i].x = output_index.x;
496   }
500 bool TimeHeightConvolutionComponent::IsComputable(
501     const MiscComputationInfo &misc_info,
502     const Index &output_index,
503     const IndexSet &input_index_set,
504     std::vector<Index> *used_inputs) const {
505   KALDI_ASSERT(output_index.t != kNoTime);
506   size_t size = all_time_offsets_.size();
507   Index index(output_index);
508   if (used_inputs != NULL) {
509     used_inputs->clear();
510     used_inputs->reserve(size);
511     for (size_t i = 0; i < size; i++) {
512       index.t = output_index.t + all_time_offsets_[i];
513       if (input_index_set(index)) {
514         // This input index is available.
515         used_inputs->push_back(index);
516       } else {
517         // This input index is not available.
518         if (time_offset_required_[i]) {
519           // A required offset was not present -> this output index is not
520           // computable.
521           used_inputs->clear();
522           return false;
523         }
524       }
525     }
526     // All required time-offsets of the output were computable. -> return true.
527     return true;
528   } else {
529     for (size_t i = 0; i < size; i++) {
530       if (time_offset_required_[i]) {
531         index.t = output_index.t + all_time_offsets_[i];
532         if (!input_index_set(index))
533           return false;
534       }
535     }
536     return true;
537   }
541 ComponentPrecomputedIndexes* TimeHeightConvolutionComponent::PrecomputeIndexes(
542       const MiscComputationInfo &misc_info,
543       const std::vector<Index> &input_indexes,
544       const std::vector<Index> &output_indexes,
545       bool need_backprop) const {
546   using namespace time_height_convolution;
547   ConvolutionComputationOptions opts;
548   opts.max_memory_mb = max_memory_mb_;
549   PrecomputedIndexes *ans = new PrecomputedIndexes();
550   std::vector<Index> input_indexes_modified,
551       output_indexes_modified;
552   CompileConvolutionComputation(
553       model_, input_indexes, output_indexes, opts,
554       &(ans->computation), &input_indexes_modified, &output_indexes_modified);
555   if (input_indexes_modified != input_indexes ||
556       output_indexes_modified != output_indexes) {
557     KALDI_ERR << "Problem precomputing indexes";
558   }
559   return ans;
562 void TimeHeightConvolutionComponent::Scale(BaseFloat scale) {
563   if (scale == 0.0) {
564     linear_params_.SetZero();
565     bias_params_.SetZero();
566   } else {
567     linear_params_.Scale(scale);
568     bias_params_.Scale(scale);
569   }
572 void TimeHeightConvolutionComponent::Add(BaseFloat alpha,
573                                          const Component &other_in) {
574   const TimeHeightConvolutionComponent *other =
575       dynamic_cast<const TimeHeightConvolutionComponent*>(&other_in);
576   KALDI_ASSERT(other != NULL);
577   linear_params_.AddMat(alpha, other->linear_params_);
578   bias_params_.AddVec(alpha, other->bias_params_);
581 void TimeHeightConvolutionComponent::PerturbParams(BaseFloat stddev) {
582   CuMatrix<BaseFloat> temp_mat(linear_params_.NumRows(),
583                                linear_params_.NumCols(), kUndefined);
584   temp_mat.SetRandn();
585   linear_params_.AddMat(stddev, temp_mat);
586   CuVector<BaseFloat> temp_vec(bias_params_.Dim(), kUndefined);
587   temp_vec.SetRandn();
588   bias_params_.AddVec(stddev, temp_vec);
591 BaseFloat TimeHeightConvolutionComponent::DotProduct(
592     const UpdatableComponent &other_in) const {
593   const TimeHeightConvolutionComponent *other =
594       dynamic_cast<const TimeHeightConvolutionComponent*>(&other_in);
595   KALDI_ASSERT(other != NULL);
596   return TraceMatMat(linear_params_, other->linear_params_, kTrans) +
597       VecVec(bias_params_, other->bias_params_);
600 int32 TimeHeightConvolutionComponent::NumParameters() const {
601   return linear_params_.NumRows() * linear_params_.NumCols() +
602       bias_params_.Dim();
605 void TimeHeightConvolutionComponent::Vectorize(
606     VectorBase<BaseFloat> *params) const {
607   KALDI_ASSERT(params->Dim() == NumParameters());
608   int32 linear_size = linear_params_.NumRows() * linear_params_.NumCols(),
609       bias_size = bias_params_.Dim();
610   params->Range(0, linear_size).CopyRowsFromMat(linear_params_);
611   params->Range(linear_size, bias_size).CopyFromVec(bias_params_);
614 void TimeHeightConvolutionComponent::UnVectorize(
615     const VectorBase<BaseFloat> &params) {
616   KALDI_ASSERT(params.Dim() == NumParameters());
617   int32 linear_size = linear_params_.NumRows() * linear_params_.NumCols(),
618       bias_size = bias_params_.Dim();
619   linear_params_.CopyRowsFromVec(params.Range(0, linear_size));
620   bias_params_.CopyFromVec(params.Range(linear_size, bias_size));
623 void TimeHeightConvolutionComponent::FreezeNaturalGradient(bool freeze) {
624   preconditioner_in_.Freeze(freeze);
625   preconditioner_out_.Freeze(freeze);
628 TimeHeightConvolutionComponent::PrecomputedIndexes*
629 TimeHeightConvolutionComponent::PrecomputedIndexes::Copy() const {
630   return new PrecomputedIndexes(*this);
633 void TimeHeightConvolutionComponent::PrecomputedIndexes::Write(
634     std::ostream &os, bool binary) const {
635   WriteToken(os, binary, "<TimeHeightConvolutionComponentPrecomputedIndexes>");
636   WriteToken(os, binary, "<Computation>");
637   computation.Write(os, binary);
638   WriteToken(os, binary, "</TimeHeightConvolutionComponentPrecomputedIndexes>");
641 void TimeHeightConvolutionComponent::PrecomputedIndexes::Read(
642     std::istream &is, bool binary) {
643   ExpectOneOrTwoTokens(is, binary,
644                        "<TimeHeightConvolutionComponentPrecomputedIndexes>",
645                        "<Computation>");
646   computation.Read(is, binary);
647   ExpectToken(is, binary, "</TimeHeightConvolutionComponentPrecomputedIndexes>");
651 } // namespace nnet3
652 } // namespace kaldi