1 // nnet3/nnet-convolutional-component.cc
3 // Copyright 2017 Johns Hopkins University (author: Daniel Povey)
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
20 #include <iterator>
21 #include <sstream>
22 #include <iomanip>
23 #include "nnet3/nnet-convolutional-component.h"
24 #include "nnet3/nnet-computation-graph.h"
25 #include "nnet3/nnet-parse.h"
27 namespace kaldi {
28 namespace nnet3 {
31 TimeHeightConvolutionComponent::TimeHeightConvolutionComponent():
32 use_natural_gradient_(true),
33 num_minibatches_history_(4.0) { }
35 TimeHeightConvolutionComponent::TimeHeightConvolutionComponent(
36 const TimeHeightConvolutionComponent &other):
37 UpdatableComponent(other), // initialize base-class
38 model_(other.model_),
39 all_time_offsets_(other.all_time_offsets_),
40 time_offset_required_(other.time_offset_required_),
41 linear_params_(other.linear_params_),
42 bias_params_(other.bias_params_),
43 max_memory_mb_(other.max_memory_mb_),
44 use_natural_gradient_(other.use_natural_gradient_),
45 num_minibatches_history_(other.num_minibatches_history_),
46 preconditioner_in_(other.preconditioner_in_),
47 preconditioner_out_(other.preconditioner_out_) {
48 Check();
49 }
52 void TimeHeightConvolutionComponent::Check() const {
53 model_.Check();
54 KALDI_ASSERT(bias_params_.Dim() == model_.num_filters_out &&
55 linear_params_.NumRows() == model_.ParamRows() &&
56 linear_params_.NumCols() == model_.ParamCols());
57 }
59 int32 TimeHeightConvolutionComponent::InputDim() const {
60 return model_.InputDim();
61 }
63 int32 TimeHeightConvolutionComponent::OutputDim() const {
64 return model_.OutputDim();
65 }
67 std::string TimeHeightConvolutionComponent::Info() const {
68 std::ostringstream stream;
69 // The output of model_.Info() has been designed to be suitable
70 // as a component-level info string, it has
71 // {num-filters,height}-{in-out}, offsets=[...], required-time-offsets=[...],
72 // {input,output}-dim.
73 stream << UpdatableComponent::Info() << ' ' << model_.Info();
74 PrintParameterStats(stream, "filter-params", linear_params_);
75 PrintParameterStats(stream, "bias-params", bias_params_, true);
76 stream << ", num-params=" << NumParameters()
77 << ", max-memory-mb=" << max_memory_mb_
78 << ", use-natural-gradient=" << use_natural_gradient_;
79 if (use_natural_gradient_) {
80 stream << ", num-minibatches-history=" << num_minibatches_history_
81 << ", rank-in=" << preconditioner_in_.GetRank()
82 << ", rank-out=" << preconditioner_out_.GetRank()
83 << ", alpha-in=" << preconditioner_in_.GetAlpha()
84 << ", alpha-out=" << preconditioner_out_.GetAlpha();
85 }
86 return stream.str();
87 }
90 void TimeHeightConvolutionComponent::InitUnit() {
91 if (model_.num_filters_in != model_.num_filters_out) {
92 KALDI_ERR << "You cannot specify init-unit if the num-filters-in "
93 << "and num-filters-out differ.";
94 }
95 size_t i;
96 int32 zero_offset = 0;
97 for (i = 0; i < model_.offsets.size(); i++) {
98 if (model_.offsets[i].time_offset == 0 &&
99 model_.offsets[i].height_offset == 0) {
100 zero_offset = i;
101 break;
102 }
103 }
104 if (i == model_.offsets.size()) // did not break.
105 KALDI_ERR << "You cannot specify init-unit if the model does "
106 << "not have the offset (0, 0).";
108 CuSubMatrix<BaseFloat> zero_offset_block(
109 linear_params_, 0, linear_params_.NumRows(),
110 zero_offset * model_.num_filters_in, model_.num_filters_in);
112 KALDI_ASSERT(zero_offset_block.NumRows() == zero_offset_block.NumCols());
113 zero_offset_block.AddToDiag(1.0); // set this block to the unit matrix.
114 }
116 void TimeHeightConvolutionComponent::InitFromConfig(ConfigLine *cfl) {
117 // 1. Config values inherited from UpdatableComponent.
118 InitLearningRatesFromConfig(cfl);
120 // 2. convolution-related config values.
121 model_.height_subsample_out = 1; // default.
122 max_memory_mb_ = 200.0;
123 std::string height_offsets, time_offsets, required_time_offsets = "undef";
125 bool ok = cfl->GetValue("num-filters-in", &model_.num_filters_in) &&
126 cfl->GetValue("num-filters-out", &model_.num_filters_out) &&
127 cfl->GetValue("height-in", &model_.height_in) &&
128 cfl->GetValue("height-out", &model_.height_out) &&
129 cfl->GetValue("height-offsets", &height_offsets) &&
130 cfl->GetValue("time-offsets", &time_offsets);
131 if (!ok) {
132 KALDI_ERR << "Bad initializer: expected all the values "
133 "num-filters-in, num-filters-out, height-in, height-out, "
134 "height-offsets, time-offsets to be defined: "
135 << cfl->WholeLine();
136 }
137 // some optional structural configs.
138 cfl->GetValue("required-time-offsets", &required_time_offsets);
139 cfl->GetValue("height-subsample-out", &model_.height_subsample_out);
140 cfl->GetValue("max-memory-mb", &max_memory_mb_);
141 KALDI_ASSERT(max_memory_mb_ > 0.0);
142 { // This block attempts to parse height_offsets, time_offsets
143 // and required_time_offsets.
144 std::vector<int32> height_offsets_vec,
145 time_offsets_vec, required_time_offsets_vec;
146 if (!SplitStringToIntegers(height_offsets, ",", false,
147 &height_offsets_vec) ||
148 !SplitStringToIntegers(time_offsets, ",", false,
149 &time_offsets_vec)) {
150 KALDI_ERR << "Formatting problem in time-offsets or height-offsets: "
151 << cfl->WholeLine();
152 }
153 if (height_offsets_vec.empty() || !IsSortedAndUniq(height_offsets_vec) ||
154 time_offsets_vec.empty() || !IsSortedAndUniq(time_offsets_vec)) {
155 KALDI_ERR << "Options time-offsets and height-offsets must be nonempty, "
156 "sorted and unique.";
157 }
158 if (required_time_offsets == "undef") {
159 required_time_offsets_vec = time_offsets_vec;
160 } else {
161 if (!SplitStringToIntegers(required_time_offsets, ",", false,
162 &required_time_offsets_vec) ||
163 required_time_offsets_vec.empty() ||
164 !IsSortedAndUniq(required_time_offsets_vec)) {
165 KALDI_ERR << "Formatting problem in required-time-offsets: "
166 << cfl->WholeLine();
167 }
168 }
169 model_.offsets.clear();
170 for (size_t i = 0; i < time_offsets_vec.size(); i++) {
171 for (size_t j = 0; j < height_offsets_vec.size(); j++) {
172 time_height_convolution::ConvolutionModel::Offset offset;
173 offset.time_offset = time_offsets_vec[i];
174 offset.height_offset = height_offsets_vec[j];
175 model_.offsets.push_back(offset);
176 }
177 }
178 model_.required_time_offsets.clear();
179 model_.required_time_offsets.insert(
180 required_time_offsets_vec.begin(),
181 required_time_offsets_vec.end());
182 }
184 model_.ComputeDerived();
185 if (!model_.Check(false, true)) {
186 KALDI_ERR << "Parameters used to initialize TimeHeightConvolutionComponent "
187 << "do not make sense, line was: " << cfl->WholeLine();
188 }
189 if (!model_.Check(true, true)) {
190 KALDI_WARN << "There are input heights unused in "
191 "TimeHeightConvolutionComponent; consider increasing output "
192 "height or decreasing height of preceding layer."
193 << cfl->WholeLine();
194 }
196 // 3. Parameter-initialization configs.
197 BaseFloat param_stddev = -1, bias_stddev = 0.0;
198 bool init_unit = false;
199 cfl->GetValue("param-stddev", ¶m_stddev);
200 cfl->GetValue("bias-stddev", &bias_stddev);
201 cfl->GetValue("init-unit", &init_unit);
202 if (param_stddev < 0.0) {
203 param_stddev = 1.0 / sqrt(model_.num_filters_in *
204 model_.offsets.size());
205 }
206 // initialize the parameters.
207 linear_params_.Resize(model_.ParamRows(), model_.ParamCols());
208 if (!init_unit) {
209 linear_params_.SetRandn();
210 linear_params_.Scale(param_stddev);
211 } else {
212 InitUnit();
213 }
214 bias_params_.Resize(model_.num_filters_out);
215 bias_params_.SetRandn();
216 bias_params_.Scale(bias_stddev);
219 // 4. Natural-gradient related configs.
220 use_natural_gradient_ = true;
221 num_minibatches_history_ = 4.0;
222 int32 rank_out = -1, rank_in = -1;
223 BaseFloat alpha_out = 4.0, alpha_in = 4.0;
224 cfl->GetValue("use-natural-gradient", &use_natural_gradient_);
225 cfl->GetValue("rank-in", &rank_in);
226 cfl->GetValue("rank-out", &rank_out);
227 cfl->GetValue("alpha-in", &alpha_in);
228 cfl->GetValue("alpha-out", &alpha_out);
229 cfl->GetValue("num-minibatches-history", &num_minibatches_history_);
231 preconditioner_in_.SetAlpha(alpha_in);
232 preconditioner_out_.SetAlpha(alpha_out);
233 int32 dim_in = linear_params_.NumCols() + 1,
234 dim_out = linear_params_.NumRows();
235 if (rank_in < 0) {
236 rank_in = std::min<int32>(80, (dim_in + 1) / 2);
237 preconditioner_in_.SetRank(rank_in);
238 }
239 if (rank_out < 0) {
240 rank_out = std::min<int32>(80, (dim_out + 1) / 2);
241 preconditioner_out_.SetRank(rank_out);
242 }
243 // the swapping of in and out in the lines below is intentional. the num-rows
244 // of the matrix that we give to preconditioner_in_ to precondition is
245 // dim-out, and the num-rows of the matrix we give to preconditioner_out_ to
246 // preconditioner is dim-in. the preconditioner objects treat these rows
247 // as separate samples, e.g. separate frames, even though they actually
248 // correspond to a different dimension in the parameter space.
249 preconditioner_in_.SetNumSamplesHistory(dim_out * num_minibatches_history_);
250 preconditioner_out_.SetNumSamplesHistory(dim_in * num_minibatches_history_);
252 preconditioner_in_.SetAlpha(alpha_in);
253 preconditioner_out_.SetAlpha(alpha_out);
255 ComputeDerived();
256 }
258 void* TimeHeightConvolutionComponent::Propagate(
259 const ComponentPrecomputedIndexes *indexes_in,
260 const CuMatrixBase<BaseFloat> &in,
261 CuMatrixBase<BaseFloat> *out) const {
262 const PrecomputedIndexes *indexes =
263 dynamic_cast<const PrecomputedIndexes*>(indexes_in);
264 KALDI_ASSERT(indexes != NULL);
265 { // this block handles the bias term.
266 KALDI_ASSERT(out->Stride() == out->NumCols() &&
267 out->NumCols() == model_.height_out * model_.num_filters_out);
268 CuSubMatrix<BaseFloat> out_reshaped(
269 out->Data(), out->NumRows() * model_.height_out,
270 model_.num_filters_out, model_.num_filters_out);
271 out_reshaped.CopyRowsFromVec(bias_params_);
272 }
273 ConvolveForward(indexes->computation, in, linear_params_, out);
274 return NULL;
275 }
277 void TimeHeightConvolutionComponent::Backprop(
278 const std::string &debug_info,
279 const ComponentPrecomputedIndexes *indexes_in,
280 const CuMatrixBase<BaseFloat> &in_value,
281 const CuMatrixBase<BaseFloat> &, // out_value
282 const CuMatrixBase<BaseFloat> &out_deriv,
283 void*, // memo
284 Component *to_update_in,
285 CuMatrixBase<BaseFloat> *in_deriv) const {
286 const PrecomputedIndexes *indexes =
287 dynamic_cast<const PrecomputedIndexes*>(indexes_in);
288 KALDI_ASSERT(indexes != NULL);
290 if (in_deriv != NULL) {
291 ConvolveBackwardData(indexes->computation, linear_params_,
292 out_deriv, in_deriv);
293 }
294 if (to_update_in != NULL) {
295 TimeHeightConvolutionComponent *to_update =
296 dynamic_cast<TimeHeightConvolutionComponent*>(to_update_in);
297 KALDI_ASSERT(to_update != NULL);
299 if (to_update->learning_rate_ == 0.0)
300 return;
302 if (to_update->is_gradient_ || !to_update->use_natural_gradient_)
303 to_update->UpdateSimple(*indexes, in_value, out_deriv);
304 else
305 to_update->UpdateNaturalGradient(*indexes, in_value, out_deriv);
306 }
307 }
309 void TimeHeightConvolutionComponent::UpdateSimple(
310 const PrecomputedIndexes &indexes,
311 const CuMatrixBase<BaseFloat> &in_value,
312 const CuMatrixBase<BaseFloat> &out_deriv) {
314 { // this block handles the bias term.
315 KALDI_ASSERT(out_deriv.Stride() == out_deriv.NumCols() &&
316 out_deriv.NumCols() ==
317 model_.height_out * model_.num_filters_out);
318 CuSubMatrix<BaseFloat> out_deriv_reshaped(
319 out_deriv.Data(), out_deriv.NumRows() * model_.height_out,
320 model_.num_filters_out, model_.num_filters_out);
321 bias_params_.AddRowSumMat(learning_rate_, out_deriv_reshaped);
322 }
324 ConvolveBackwardParams(indexes.computation, in_value, out_deriv,
325 learning_rate_, &linear_params_);
326 }
329 void TimeHeightConvolutionComponent::UpdateNaturalGradient(
330 const PrecomputedIndexes &indexes,
331 const CuMatrixBase<BaseFloat> &in_value,
332 const CuMatrixBase<BaseFloat> &out_deriv) {
334 CuVector<BaseFloat> bias_temp(bias_params_.Dim());
336 { // this block computes 'bias_temp', the derivative w.r.t. the bias.
337 KALDI_ASSERT(out_deriv.Stride() == out_deriv.NumCols() &&
338 out_deriv.NumCols() ==
339 model_.height_out * model_.num_filters_out);
340 CuSubMatrix<BaseFloat> out_deriv_reshaped(
341 out_deriv.Data(), out_deriv.NumRows() * model_.height_out,
342 model_.num_filters_out, model_.num_filters_out);
343 bias_temp.AddRowSumMat(1.0, out_deriv_reshaped);
344 }
346 CuMatrix<BaseFloat> params_temp(linear_params_.NumRows(),
347 linear_params_.NumCols() + 1);
348 params_temp.CopyColFromVec(bias_temp, linear_params_.NumCols());
351 CuSubMatrix<BaseFloat> linear_params_temp(
352 params_temp, 0, linear_params_.NumRows(),
353 0, linear_params_.NumCols());
355 ConvolveBackwardParams(indexes.computation, in_value, out_deriv,
356 1.0, &linear_params_temp);
358 // the precondition-directions code outputs a scalar that
359 // must be multiplied by its output (this saves one
360 // CUDA operation internally).
361 // We don't bother applying this scale before doing the other
362 // dimenson of natural gradient, because although it's not
363 // invariant to scalar multiplication of the input if the
364 // scalars are different across iterations, the scalars
365 // will be pretty similar on different iterations
366 BaseFloat scale1, scale2;
367 preconditioner_in_.PreconditionDirections(¶ms_temp, NULL,
368 &scale1);
371 CuMatrix<BaseFloat> params_temp_transpose(params_temp, kTrans);
372 preconditioner_out_.PreconditionDirections(¶ms_temp_transpose,
373 NULL, &scale2);
376 linear_params_.AddMat(
377 learning_rate_ * scale1 * scale2,
378 params_temp_transpose.RowRange(0, linear_params_.NumCols()),
379 kTrans);
381 bias_params_.AddVec(learning_rate_ * scale1 * scale2,
382 params_temp_transpose.Row(linear_params_.NumCols()));
383 }
386 void TimeHeightConvolutionComponent::ReorderIndexes(
387 std::vector<Index> *input_indexes,
388 std::vector<Index> *output_indexes) const {
389 using namespace time_height_convolution;
390 ConvolutionComputationOptions opts;
391 opts.max_memory_mb = max_memory_mb_;
392 ConvolutionComputation computation_temp;
393 std::vector<Index> input_indexes_modified,
394 output_indexes_modified;
395 CompileConvolutionComputation(
396 model_, *input_indexes, *output_indexes, opts,
397 &computation_temp, &input_indexes_modified, &output_indexes_modified);
398 input_indexes->swap(input_indexes_modified);
399 output_indexes->swap(output_indexes_modified);
400 }
402 void TimeHeightConvolutionComponent::Write(std::ostream &os, bool binary) const {
403 WriteUpdatableCommon(os, binary); // Write opening tag and learning rate.
404 WriteToken(os, binary, "<Model>");
405 model_.Write(os, binary);
406 WriteToken(os, binary, "<LinearParams>");
407 linear_params_.Write(os, binary);
408 WriteToken(os, binary, "<BiasParams>");
409 bias_params_.Write(os, binary);
410 WriteToken(os, binary, "<MaxMemoryMb>");
411 WriteBasicType(os, binary, max_memory_mb_);
412 WriteToken(os, binary, "<UseNaturalGradient>");
413 WriteBasicType(os, binary, use_natural_gradient_);
414 WriteToken(os, binary, "<NumMinibatchesHistory>");
415 WriteBasicType(os, binary, num_minibatches_history_);
416 int32 rank_in = preconditioner_in_.GetRank(),
417 rank_out = preconditioner_out_.GetRank();
418 BaseFloat alpha_in = preconditioner_in_.GetAlpha(),
419 alpha_out = preconditioner_out_.GetAlpha();
420 WriteToken(os, binary, "<AlphaInOut>");
421 WriteBasicType(os, binary, alpha_in);
422 WriteBasicType(os, binary, alpha_out);
423 WriteToken(os, binary, "<RankInOut>");
424 WriteBasicType(os, binary, rank_in);
425 WriteBasicType(os, binary, rank_out);
426 WriteToken(os, binary, "</TimeHeightConvolutionComponent>");
427 }
429 void TimeHeightConvolutionComponent::Read(std::istream &is, bool binary) {
430 std::string token = ReadUpdatableCommon(is, binary);
431 // the next few lines are only for back compatibility.
432 if (token != "") {
433 KALDI_ASSERT(token == "<Model>");
434 } else {
435 ExpectToken(is, binary, "<Model>");
436 }
437 model_.Read(is, binary);
438 ExpectToken(is, binary, "<LinearParams>");
439 linear_params_.Read(is, binary);
440 ExpectToken(is, binary, "<BiasParams>");
441 bias_params_.Read(is, binary);
442 ExpectToken(is, binary, "<MaxMemoryMb>");
443 ReadBasicType(is, binary, &max_memory_mb_);
444 ExpectToken(is, binary, "<UseNaturalGradient>");
445 ReadBasicType(is, binary, &use_natural_gradient_);
446 ExpectToken(is, binary, "<NumMinibatchesHistory>");
447 ReadBasicType(is, binary, &num_minibatches_history_);
448 int32 rank_in, rank_out;
449 BaseFloat alpha_in, alpha_out;
450 ExpectToken(is, binary, "<AlphaInOut>");
451 ReadBasicType(is, binary, &alpha_in);
452 ReadBasicType(is, binary, &alpha_out);
453 preconditioner_in_.SetAlpha(alpha_in);
454 preconditioner_out_.SetAlpha(alpha_out);
455 ExpectToken(is, binary, "<RankInOut>");
456 ReadBasicType(is, binary, &rank_in);
457 ReadBasicType(is, binary, &rank_out);
458 preconditioner_in_.SetRank(rank_in);
459 preconditioner_out_.SetRank(rank_out);
460 int32 dim_in = linear_params_.NumCols() + 1,
461 dim_out = linear_params_.NumRows();
462 // the following lines mirror similar lines in InitFromConfig().
463 // the swapping of in and out is intentional; see comment in InitFromConfig(),
464 // by similar lines.
465 preconditioner_in_.SetNumSamplesHistory(dim_out * num_minibatches_history_);
466 preconditioner_out_.SetNumSamplesHistory(dim_in * num_minibatches_history_);
467 ExpectToken(is, binary, "</TimeHeightConvolutionComponent>");
468 ComputeDerived();
469 Check();
470 }
472 void TimeHeightConvolutionComponent::ComputeDerived() {
473 all_time_offsets_.clear();
474 all_time_offsets_.insert(
475 all_time_offsets_.end(),
476 model_.all_time_offsets.begin(),
477 model_.all_time_offsets.end());
478 time_offset_required_.resize(all_time_offsets_.size());
479 for (size_t i = 0; i < all_time_offsets_.size(); i++) {
480 time_offset_required_[i] =
481 (model_.required_time_offsets.count(all_time_offsets_[i]) > 0);
482 }
483 }
485 void TimeHeightConvolutionComponent::GetInputIndexes(
486 const MiscComputationInfo &misc_info,
487 const Index &output_index,
488 std::vector<Index> *desired_indexes) const {
489 KALDI_ASSERT(output_index.t != kNoTime);
490 size_t size = all_time_offsets_.size();
491 desired_indexes->resize(size);
492 for (size_t i = 0; i < size; i++) {
493 (*desired_indexes)[i].n = output_index.n;
494 (*desired_indexes)[i].t = output_index.t + all_time_offsets_[i];
495 (*desired_indexes)[i].x = output_index.x;
496 }
497 }
500 bool TimeHeightConvolutionComponent::IsComputable(
501 const MiscComputationInfo &misc_info,
502 const Index &output_index,
503 const IndexSet &input_index_set,
504 std::vector<Index> *used_inputs) const {
505 KALDI_ASSERT(output_index.t != kNoTime);
506 size_t size = all_time_offsets_.size();
507 Index index(output_index);
508 if (used_inputs != NULL) {
509 used_inputs->clear();
510 used_inputs->reserve(size);
511 for (size_t i = 0; i < size; i++) {
512 index.t = output_index.t + all_time_offsets_[i];
513 if (input_index_set(index)) {
514 // This input index is available.
515 used_inputs->push_back(index);
516 } else {
517 // This input index is not available.
518 if (time_offset_required_[i]) {
519 // A required offset was not present -> this output index is not
520 // computable.
521 used_inputs->clear();
522 return false;
523 }
524 }
525 }
526 // All required time-offsets of the output were computable. -> return true.
527 return true;
528 } else {
529 for (size_t i = 0; i < size; i++) {
530 if (time_offset_required_[i]) {
531 index.t = output_index.t + all_time_offsets_[i];
532 if (!input_index_set(index))
533 return false;
534 }
535 }
536 return true;
537 }
538 }
541 ComponentPrecomputedIndexes* TimeHeightConvolutionComponent::PrecomputeIndexes(
542 const MiscComputationInfo &misc_info,
543 const std::vector<Index> &input_indexes,
544 const std::vector<Index> &output_indexes,
545 bool need_backprop) const {
546 using namespace time_height_convolution;
547 ConvolutionComputationOptions opts;
548 opts.max_memory_mb = max_memory_mb_;
549 PrecomputedIndexes *ans = new PrecomputedIndexes();
550 std::vector<Index> input_indexes_modified,
551 output_indexes_modified;
552 CompileConvolutionComputation(
553 model_, input_indexes, output_indexes, opts,
554 &(ans->computation), &input_indexes_modified, &output_indexes_modified);
555 if (input_indexes_modified != input_indexes ||
556 output_indexes_modified != output_indexes) {
557 KALDI_ERR << "Problem precomputing indexes";
558 }
559 return ans;
560 }
562 void TimeHeightConvolutionComponent::Scale(BaseFloat scale) {
563 if (scale == 0.0) {
564 linear_params_.SetZero();
565 bias_params_.SetZero();
566 } else {
567 linear_params_.Scale(scale);
568 bias_params_.Scale(scale);
569 }
570 }
572 void TimeHeightConvolutionComponent::Add(BaseFloat alpha,
573 const Component &other_in) {
574 const TimeHeightConvolutionComponent *other =
575 dynamic_cast<const TimeHeightConvolutionComponent*>(&other_in);
576 KALDI_ASSERT(other != NULL);
577 linear_params_.AddMat(alpha, other->linear_params_);
578 bias_params_.AddVec(alpha, other->bias_params_);
579 }
581 void TimeHeightConvolutionComponent::PerturbParams(BaseFloat stddev) {
582 CuMatrix<BaseFloat> temp_mat(linear_params_.NumRows(),
583 linear_params_.NumCols(), kUndefined);
584 temp_mat.SetRandn();
585 linear_params_.AddMat(stddev, temp_mat);
586 CuVector<BaseFloat> temp_vec(bias_params_.Dim(), kUndefined);
587 temp_vec.SetRandn();
588 bias_params_.AddVec(stddev, temp_vec);
589 }
591 BaseFloat TimeHeightConvolutionComponent::DotProduct(
592 const UpdatableComponent &other_in) const {
593 const TimeHeightConvolutionComponent *other =
594 dynamic_cast<const TimeHeightConvolutionComponent*>(&other_in);
595 KALDI_ASSERT(other != NULL);
596 return TraceMatMat(linear_params_, other->linear_params_, kTrans) +
597 VecVec(bias_params_, other->bias_params_);
598 }
600 int32 TimeHeightConvolutionComponent::NumParameters() const {
601 return linear_params_.NumRows() * linear_params_.NumCols() +
602 bias_params_.Dim();
603 }
605 void TimeHeightConvolutionComponent::Vectorize(
606 VectorBase<BaseFloat> *params) const {
607 KALDI_ASSERT(params->Dim() == NumParameters());
608 int32 linear_size = linear_params_.NumRows() * linear_params_.NumCols(),
609 bias_size = bias_params_.Dim();
610 params->Range(0, linear_size).CopyRowsFromMat(linear_params_);
611 params->Range(linear_size, bias_size).CopyFromVec(bias_params_);
612 }
614 void TimeHeightConvolutionComponent::UnVectorize(
615 const VectorBase<BaseFloat> ¶ms) {
616 KALDI_ASSERT(params.Dim() == NumParameters());
617 int32 linear_size = linear_params_.NumRows() * linear_params_.NumCols(),
618 bias_size = bias_params_.Dim();
619 linear_params_.CopyRowsFromVec(params.Range(0, linear_size));
620 bias_params_.CopyFromVec(params.Range(linear_size, bias_size));
621 }
623 void TimeHeightConvolutionComponent::FreezeNaturalGradient(bool freeze) {
624 preconditioner_in_.Freeze(freeze);
625 preconditioner_out_.Freeze(freeze);
626 }
628 TimeHeightConvolutionComponent::PrecomputedIndexes*
629 TimeHeightConvolutionComponent::PrecomputedIndexes::Copy() const {
630 return new PrecomputedIndexes(*this);
631 }
633 void TimeHeightConvolutionComponent::PrecomputedIndexes::Write(
634 std::ostream &os, bool binary) const {
635 WriteToken(os, binary, "<TimeHeightConvolutionComponentPrecomputedIndexes>");
636 WriteToken(os, binary, "<Computation>");
637 computation.Write(os, binary);
638 WriteToken(os, binary, "</TimeHeightConvolutionComponentPrecomputedIndexes>");
639 }
641 void TimeHeightConvolutionComponent::PrecomputedIndexes::Read(
642 std::istream &is, bool binary) {
643 ExpectOneOrTwoTokens(is, binary,
644 "<TimeHeightConvolutionComponentPrecomputedIndexes>",
645 "<Computation>");
646 computation.Read(is, binary);
647 ExpectToken(is, binary, "</TimeHeightConvolutionComponentPrecomputedIndexes>");
648 }
651 } // namespace nnet3
652 } // namespace kaldi