1 // nnet3/nnet-simple-component.h
3 // Copyright 2011-2013 Karel Vesely
4 // 2012-2015 Johns Hopkins University (author: Daniel Povey)
5 // 2013 Xiaohui Zhang
6 // 2014-2015 Vijayaditya Peddinti
7 // 2014-2015 Guoguo Chen
8 // 2015 Daniel Galvez
9 // 2015 Tom Ko
11 // See ../../COPYING for clarification regarding multiple authors
12 //
13 // Licensed under the Apache License, Version 2.0 (the "License");
14 // you may not use this file except in compliance with the License.
15 // You may obtain a copy of the License at
16 //
17 // http://www.apache.org/licenses/LICENSE-2.0
18 //
19 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
21 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
22 // MERCHANTABLITY OR NON-INFRINGEMENT.
23 // See the Apache 2 License for the specific language governing permissions and
24 // limitations under the License.
26 #ifndef KALDI_NNET3_NNET_SIMPLE_COMPONENT_H_
27 #define KALDI_NNET3_NNET_SIMPLE_COMPONENT_H_
29 #include "nnet3/nnet-common.h"
30 #include "nnet3/nnet-component-itf.h"
31 #include "nnet3/natural-gradient-online.h"
32 #include <iostream>
34 namespace kaldi {
35 namespace nnet3 {
37 /// @file This file contains declarations of components that are "simple", meaning
38 /// they don't care about the indexes they are operating on, produce one
39 /// output for one input, and return the kSimpleComponent flag in their
40 /// Properties(): for example, tanh and affine components. In
41 /// nnet-general-component.h there are components that don't fit this pattern.
43 // This "nnet3" version of the p-norm component only supports the 2-norm.
44 class PnormComponent: public Component {
45 public:
46 void Init(int32 input_dim, int32 output_dim);
47 explicit PnormComponent(int32 input_dim, int32 output_dim) {
48 Init(input_dim, output_dim);
49 }
50 virtual int32 Properties() const {
51 return kSimpleComponent|kLinearInInput|kBackpropNeedsInput|kBackpropNeedsOutput;
52 }
53 PnormComponent(): input_dim_(0), output_dim_(0) { }
54 virtual std::string Type() const { return "PnormComponent"; }
55 virtual void InitFromConfig(ConfigLine *cfl);
56 virtual int32 InputDim() const { return input_dim_; }
57 virtual int32 OutputDim() const { return output_dim_; }
58 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
59 const CuMatrixBase<BaseFloat> &in,
60 CuMatrixBase<BaseFloat> *out) const;
61 virtual void Backprop(const std::string &debug_info,
62 const ComponentPrecomputedIndexes *indexes,
63 const CuMatrixBase<BaseFloat> &in_value,
64 const CuMatrixBase<BaseFloat> &out_value,
65 const CuMatrixBase<BaseFloat> &out_deriv,
66 Component *to_update,
67 CuMatrixBase<BaseFloat> *in_deriv) const;
68 virtual Component* Copy() const { return new PnormComponent(input_dim_,
69 output_dim_); }
71 virtual void Read(std::istream &is, bool binary); // This Read function
72 // requires that the Component has the correct type.
74 /// Write component to stream
75 virtual void Write(std::ostream &os, bool binary) const;
77 protected:
78 int32 input_dim_;
79 int32 output_dim_;
80 };
82 class ElementwiseProductComponent: public Component {
83 public:
84 void Init(int32 input_dim, int32 output_dim);
85 explicit ElementwiseProductComponent(int32 input_dim, int32 output_dim) {
86 Init(input_dim, output_dim);
87 }
88 virtual int32 Properties() const {
89 return kSimpleComponent|kBackpropNeedsInput;
90 }
91 ElementwiseProductComponent(): input_dim_(0), output_dim_(0) { }
92 virtual std::string Type() const { return "ElementwiseProductComponent"; }
93 virtual void InitFromConfig(ConfigLine *cfl);
94 virtual int32 InputDim() const { return input_dim_; }
95 virtual int32 OutputDim() const { return output_dim_; }
96 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
97 const CuMatrixBase<BaseFloat> &in,
98 CuMatrixBase<BaseFloat> *out) const;
99 virtual void Backprop(const std::string &debug_info,
100 const ComponentPrecomputedIndexes *indexes,
101 const CuMatrixBase<BaseFloat> &in_value,
102 const CuMatrixBase<BaseFloat> &out_value,
103 const CuMatrixBase<BaseFloat> &out_deriv,
104 Component *to_update,
105 CuMatrixBase<BaseFloat> *in_deriv) const;
106 virtual Component* Copy() const { return new ElementwiseProductComponent(input_dim_,
107 output_dim_); }
109 virtual void Read(std::istream &is, bool binary); // This Read function
110 // requires that the Component has the correct type.
112 /// Write component to stream
113 virtual void Write(std::ostream &os, bool binary) const;
115 protected:
116 int32 input_dim_;
117 int32 output_dim_;
118 };
120 class NormalizeComponent: public NonlinearComponent {
121 // note: although we inherit from NonlinearComponent, we don't actually bohter
122 // accumulating the stats that NonlinearComponent is capable of accumulating.
123 public:
124 void Init(int32 dim, BaseFloat target_rms, bool add_log_stddev);
125 explicit NormalizeComponent(int32 dim, BaseFloat target_rms = 1.0,
126 bool add_log_stddev = false) { Init(dim, target_rms, add_log_stddev); }
127 explicit NormalizeComponent(const NormalizeComponent &other): NonlinearComponent(other),
128 target_rms_(other.target_rms_), add_log_stddev_(other.add_log_stddev_) { }
129 virtual int32 Properties() const {
130 return (add_log_stddev_ ? kSimpleComponent|kBackpropNeedsInput :
131 kSimpleComponent|kBackpropNeedsInput|kPropagateInPlace|
132 kBackpropInPlace);
133 }
134 NormalizeComponent(): target_rms_(1.0), add_log_stddev_(false) { }
135 virtual std::string Type() const { return "NormalizeComponent"; }
136 virtual void InitFromConfig(ConfigLine *cfl);
137 virtual Component* Copy() const { return new NormalizeComponent(*this); }
138 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
139 const CuMatrixBase<BaseFloat> &in,
140 CuMatrixBase<BaseFloat> *out) const;
141 virtual void Backprop(const std::string &debug_info,
142 const ComponentPrecomputedIndexes *indexes,
143 const CuMatrixBase<BaseFloat> &in_value,
144 const CuMatrixBase<BaseFloat> &, // out_value
145 const CuMatrixBase<BaseFloat> &out_deriv,
146 Component *to_update,
147 CuMatrixBase<BaseFloat> *in_deriv) const;
149 virtual void Read(std::istream &is, bool binary); // This Read function
150 // requires that the Component has the correct type.
152 /// Write component to stream
153 virtual void Write(std::ostream &os, bool binary) const;
155 virtual int32 OutputDim() const { return (dim_ + (add_log_stddev_ ? 1 : 0)); }
157 virtual std::string Info() const;
158 private:
159 NormalizeComponent &operator = (const NormalizeComponent &other); // Disallow.
160 static const BaseFloat kNormFloor;
161 BaseFloat target_rms_; // The target rms for outputs.
162 // about 0.7e-20. We need a value that's exactly representable in
163 // float and whose inverse square root is also exactly representable
164 // in float (hence, an even power of two).
166 bool add_log_stddev_; // If true, log(max(epsi, sqrt(row_in^T row_in / D)))
167 // is an extra dimension of the output.
168 };
171 class SigmoidComponent: public NonlinearComponent {
172 public:
173 explicit SigmoidComponent(int32 dim): NonlinearComponent(dim) { }
174 explicit SigmoidComponent(const SigmoidComponent &other): NonlinearComponent(other) { }
175 SigmoidComponent() { }
176 virtual std::string Type() const { return "SigmoidComponent"; }
177 virtual int32 Properties() const {
178 return kSimpleComponent|kBackpropNeedsOutput|kPropagateInPlace|kStoresStats;
179 }
180 virtual Component* Copy() const { return new SigmoidComponent(*this); }
181 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
182 const CuMatrixBase<BaseFloat> &in,
183 CuMatrixBase<BaseFloat> *out) const;
184 virtual void Backprop(const std::string &debug_info,
185 const ComponentPrecomputedIndexes *indexes,
186 const CuMatrixBase<BaseFloat> &, //in_value
187 const CuMatrixBase<BaseFloat> &out_value,
188 const CuMatrixBase<BaseFloat> &out_deriv,
189 Component *to_update,
190 CuMatrixBase<BaseFloat> *in_deriv) const;
191 virtual void StoreStats(const CuMatrixBase<BaseFloat> &out_value);
192 private:
193 SigmoidComponent &operator = (const SigmoidComponent &other); // Disallow.
194 };
196 class TanhComponent: public NonlinearComponent {
197 public:
198 explicit TanhComponent(int32 dim): NonlinearComponent(dim) { }
199 explicit TanhComponent(const TanhComponent &other): NonlinearComponent(other) { }
200 TanhComponent() { }
201 virtual std::string Type() const { return "TanhComponent"; }
202 virtual Component* Copy() const { return new TanhComponent(*this); }
203 virtual int32 Properties() const {
204 return kSimpleComponent|kBackpropNeedsOutput|kPropagateInPlace|kStoresStats;
205 }
206 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
207 const CuMatrixBase<BaseFloat> &in,
208 CuMatrixBase<BaseFloat> *out) const;
209 virtual void Backprop(const std::string &debug_info,
210 const ComponentPrecomputedIndexes *indexes,
211 const CuMatrixBase<BaseFloat> &, //in_value
212 const CuMatrixBase<BaseFloat> &out_value,
213 const CuMatrixBase<BaseFloat> &out_deriv,
214 Component *to_update,
215 CuMatrixBase<BaseFloat> *in_deriv) const;
216 virtual void StoreStats(const CuMatrixBase<BaseFloat> &out_value);
217 private:
218 TanhComponent &operator = (const TanhComponent &other); // Disallow.
219 };
222 class RectifiedLinearComponent: public NonlinearComponent {
223 public:
224 explicit RectifiedLinearComponent(int32 dim): NonlinearComponent(dim) { }
225 explicit RectifiedLinearComponent(const RectifiedLinearComponent &other): NonlinearComponent(other) { }
226 RectifiedLinearComponent() { }
227 virtual std::string Type() const { return "RectifiedLinearComponent"; }
228 virtual Component* Copy() const { return new RectifiedLinearComponent(*this); }
229 virtual int32 Properties() const {
230 return kSimpleComponent|kLinearInInput|kBackpropNeedsOutput|kPropagateInPlace|
231 kStoresStats;
232 }
233 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
234 const CuMatrixBase<BaseFloat> &in,
235 CuMatrixBase<BaseFloat> *out) const;
236 virtual void Backprop(const std::string &debug_info,
237 const ComponentPrecomputedIndexes *indexes,
238 const CuMatrixBase<BaseFloat> &, //in_value
239 const CuMatrixBase<BaseFloat> &out_value,
240 const CuMatrixBase<BaseFloat> &out_deriv,
241 Component *to_update,
242 CuMatrixBase<BaseFloat> *in_deriv) const;
243 virtual void StoreStats(const CuMatrixBase<BaseFloat> &out_value);
244 private:
245 RectifiedLinearComponent &operator = (const RectifiedLinearComponent &other); // Disallow.
246 };
248 /**
249 This component is a fixed (non-trainable) nonlinearity that sums its inputs
250 to produce outputs. Currently the only supported configuration is that its
251 input-dim is interpreted as consisting of n blocks, and the output is just a
252 summation over the n blocks, where n = input-dim / output-dim, so for instance
253 output[n] = input[n] + input[block-size + n] + .... .
254 Later if needed we can add a configuration variable that allows you to sum
255 over 'interleaved' input.
256 */
257 class SumReduceComponent: public Component {
258 public:
259 void Init(int32 input_dim, int32 output_dim);
260 explicit SumReduceComponent(int32 input_dim, int32 output_dim) {
261 Init(input_dim, output_dim);
262 }
263 virtual int32 Properties() const {
264 return kSimpleComponent|kLinearInInput;
265 }
266 SumReduceComponent(): input_dim_(0), output_dim_(0) { }
267 virtual std::string Type() const { return "SumReduceComponent"; }
268 virtual void InitFromConfig(ConfigLine *cfl);
269 virtual int32 InputDim() const { return input_dim_; }
270 virtual int32 OutputDim() const { return output_dim_; }
271 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
272 const CuMatrixBase<BaseFloat> &in,
273 CuMatrixBase<BaseFloat> *out) const;
274 virtual void Backprop(const std::string &debug_info,
275 const ComponentPrecomputedIndexes *indexes,
276 const CuMatrixBase<BaseFloat> &, // in_value
277 const CuMatrixBase<BaseFloat> &, // out_value,
278 const CuMatrixBase<BaseFloat> &out_deriv,
279 Component *, // to_update
280 CuMatrixBase<BaseFloat> *in_deriv) const;
281 virtual Component* Copy() const { return new SumReduceComponent(input_dim_,
282 output_dim_); }
284 virtual void Read(std::istream &is, bool binary); // This Read function
285 // requires that the Component has the correct type.
287 /// Write component to stream
288 virtual void Write(std::ostream &os, bool binary) const;
290 protected:
291 int32 input_dim_;
292 int32 output_dim_;
293 };
296 class FixedAffineComponent;
297 class FixedScaleComponent;
298 class PerElementScaleComponent;
299 class PerElementOffsetComponent;
301 // Affine means a linear function plus an offset.
302 // Note: although this class can be instantiated, it also
303 // functions as a base-class for more specialized versions of
304 // AffineComponent.
305 class AffineComponent: public UpdatableComponent {
306 friend class SoftmaxComponent; // Friend declaration relates to mixing up.
307 public:
309 virtual int32 InputDim() const { return linear_params_.NumCols(); }
310 virtual int32 OutputDim() const { return linear_params_.NumRows(); }
312 virtual std::string Info() const;
313 virtual void InitFromConfig(ConfigLine *cfl);
315 AffineComponent() { } // use Init to really initialize.
316 virtual std::string Type() const { return "AffineComponent"; }
317 virtual int32 Properties() const {
318 return kSimpleComponent|kUpdatableComponent|kLinearInParameters|
319 kBackpropNeedsInput|kBackpropAdds;
320 }
323 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
324 const CuMatrixBase<BaseFloat> &in,
325 CuMatrixBase<BaseFloat> *out) const;
326 virtual void Backprop(const std::string &debug_info,
327 const ComponentPrecomputedIndexes *indexes,
328 const CuMatrixBase<BaseFloat> &in_value,
329 const CuMatrixBase<BaseFloat> &, // out_value
330 const CuMatrixBase<BaseFloat> &out_deriv,
331 Component *to_update,
332 CuMatrixBase<BaseFloat> *in_deriv) const;
334 virtual void Read(std::istream &is, bool binary);
335 virtual void Write(std::ostream &os, bool binary) const;
337 virtual Component* Copy() const;
340 // Some functions from base-class UpdatableComponent.
341 virtual void Scale(BaseFloat scale);
342 virtual void Add(BaseFloat alpha, const Component &other);
343 virtual void SetZero(bool treat_as_gradient);
344 virtual void PerturbParams(BaseFloat stddev);
345 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
346 virtual int32 NumParameters() const;
347 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
348 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
350 // Some functions that are specific to this class.
352 // This new function is used when mixing up:
353 virtual void SetParams(const VectorBase<BaseFloat> &bias,
354 const MatrixBase<BaseFloat> &linear);
355 const CuVector<BaseFloat> &BiasParams() { return bias_params_; }
356 const CuMatrix<BaseFloat> &LinearParams() { return linear_params_; }
357 explicit AffineComponent(const AffineComponent &other);
358 // The next constructor is used in converting from nnet1.
359 AffineComponent(const CuMatrixBase<BaseFloat> &linear_params,
360 const CuVectorBase<BaseFloat> &bias_params,
361 BaseFloat learning_rate);
362 void Init(int32 input_dim, int32 output_dim,
363 BaseFloat param_stddev, BaseFloat bias_stddev);
364 void Init(std::string matrix_filename);
366 // This function resizes the dimensions of the component, setting the
367 // parameters to zero, while leaving any other configuration values the same.
368 virtual void Resize(int32 input_dim, int32 output_dim);
370 // The following functions are used for collapsing multiple layers
371 // together. They return a pointer to a new Component equivalent to
372 // the sequence of two components. We haven't implemented this for
373 // FixedLinearComponent yet.
374 Component *CollapseWithNext(const AffineComponent &next) const ;
375 Component *CollapseWithNext(const FixedAffineComponent &next) const;
376 Component *CollapseWithNext(const FixedScaleComponent &next) const;
377 Component *CollapseWithPrevious(const FixedAffineComponent &prev) const;
379 protected:
380 friend class NaturalGradientAffineComponent;
381 // This function Update() is for extensibility; child classes may override
382 // this, e.g. for natural gradient update.
383 virtual void Update(
384 const std::string &debug_info,
385 const CuMatrixBase<BaseFloat> &in_value,
386 const CuMatrixBase<BaseFloat> &out_deriv) {
387 UpdateSimple(in_value, out_deriv);
388 }
389 // UpdateSimple is used when *this is a gradient. Child classes may override
390 // this if needed, but typically won't need to.
391 virtual void UpdateSimple(
392 const CuMatrixBase<BaseFloat> &in_value,
393 const CuMatrixBase<BaseFloat> &out_deriv);
395 const AffineComponent &operator = (const AffineComponent &other); // Disallow.
396 CuMatrix<BaseFloat> linear_params_;
397 CuVector<BaseFloat> bias_params_;
398 };
400 class RepeatedAffineComponent;
402 /// This class implements an affine transform using a block diagonal matrix
403 /// e.g., one whose weight matrix is all zeros except for blocks on the
404 /// diagonal. All these blocks have the same dimensions.
405 /// input-dim: num cols of block diagonal matrix.
406 /// output-dim: num rows of block diagonal matrix.
407 /// num-blocks: number of blocks in diagonal of the matrix.
408 /// num-blocks must divide both input-dim and output-dim
409 class BlockAffineComponent : public UpdatableComponent {
410 public:
411 virtual int32 InputDim() const { return linear_params_.NumCols() * num_blocks_; }
412 virtual int32 OutputDim() const { return linear_params_.NumRows(); }
414 virtual std::string Info() const;
415 virtual void InitFromConfig(ConfigLine *cfl);
417 BlockAffineComponent() { }
418 virtual std::string Type() const { return "BlockAffineComponent"; }
419 virtual int32 Properties() const {
420 return kSimpleComponent|kUpdatableComponent|kLinearInParameters|
421 kBackpropNeedsInput|kBackpropAdds;
422 }
424 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
425 const CuMatrixBase<BaseFloat> &in,
426 CuMatrixBase<BaseFloat> *out) const;
428 virtual void Backprop(const std::string &debug_info,
429 const ComponentPrecomputedIndexes *indexes,
430 const CuMatrixBase<BaseFloat> &in_value,
431 const CuMatrixBase<BaseFloat> &, // out_value
432 const CuMatrixBase<BaseFloat> &out_deriv,
433 Component *to_update,
434 CuMatrixBase<BaseFloat> *in_deriv) const;
436 virtual void Read(std::istream &is, bool binary);
437 virtual void Write(std::ostream &os, bool binary) const;
439 virtual Component* Copy() const;
441 // Functions from base-class UpdatableComponent.
442 virtual void Scale(BaseFloat scale);
443 virtual void Add(BaseFloat alpha, const Component &other);
444 virtual void SetZero(bool treat_as_gradient);
445 virtual void PerturbParams(BaseFloat stddev);
446 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
447 virtual int32 NumParameters() const;
448 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
449 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
451 // BlockAffine-specific functions.
452 void Init(int32 input_dim, int32 output_dim, int32 num_blocks,
453 BaseFloat param_stddev, BaseFloat bias_mean,
454 BaseFloat bias_stddev);
455 explicit BlockAffineComponent(const BlockAffineComponent &other);
456 explicit BlockAffineComponent(const RepeatedAffineComponent &rac);
457 protected:
458 // The matrix linear_params_ has a block structure, with num_blocks_ blocks of
459 // equal size. The blocks are stored in linear_params_ as
460 // [ M
461 // N
462 // O ] but we actually treat it as the matrix:
463 // [ M 0 0
464 // 0 N 0
465 // 0 0 O ]
466 CuMatrix<BaseFloat> linear_params_;
467 CuVector<BaseFloat> bias_params_;
468 int32 num_blocks_;
469 private:
470 const BlockAffineComponent &operator = (const BlockAffineComponent &other); // Disallow.
471 };
473 class RepeatedAffineComponent: public UpdatableComponent {
474 public:
476 virtual int32 InputDim() const { return linear_params_.NumCols() * num_repeats_; }
477 virtual int32 OutputDim() const { return linear_params_.NumRows() * num_repeats_; }
479 virtual std::string Info() const;
480 virtual void InitFromConfig(ConfigLine *cfl);
482 RepeatedAffineComponent() { } // use Init to really initialize.
483 virtual std::string Type() const { return "RepeatedAffineComponent"; }
484 virtual int32 Properties() const {
485 return kSimpleComponent|kUpdatableComponent|kLinearInParameters|
486 kBackpropNeedsInput|kBackpropAdds;
487 }
488 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
489 const CuMatrixBase<BaseFloat> &in,
490 CuMatrixBase<BaseFloat> *out) const;
491 virtual void Backprop(const std::string &debug_info,
492 const ComponentPrecomputedIndexes *indexes,
493 const CuMatrixBase<BaseFloat> &in_value,
494 const CuMatrixBase<BaseFloat> &, // out_value
495 const CuMatrixBase<BaseFloat> &out_deriv,
496 Component *to_update,
497 CuMatrixBase<BaseFloat> *in_deriv) const;
499 virtual void Read(std::istream &is, bool binary);
500 virtual void Write(std::ostream &os, bool binary) const;
502 virtual Component* Copy() const;
504 // Some functions from base-class UpdatableComponent.
505 virtual void Scale(BaseFloat scale);
506 virtual void Add(BaseFloat alpha, const Component &other);
507 virtual void SetZero(bool treat_as_gradient);
508 virtual void PerturbParams(BaseFloat stddev);
509 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
510 virtual int32 NumParameters() const;
511 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
512 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
514 // Some functions that are specific to this class.
515 const CuVector<BaseFloat> &BiasParams() { return bias_params_; }
516 const CuMatrix<BaseFloat> &LinearParams() { return linear_params_; }
517 explicit RepeatedAffineComponent(const RepeatedAffineComponent &other);
519 void Init(int32 input_dim, int32 output_dim, int32 num_repeats,
520 BaseFloat param_stddev, BaseFloat bias_mean,
521 BaseFloat bias_stddev);
522 friend BlockAffineComponent::BlockAffineComponent(const RepeatedAffineComponent &rac);
523 protected:
524 // This function Update(), called from backprop, is broken out for
525 // extensibility to natural gradient update.
526 virtual void Update(
527 const CuMatrixBase<BaseFloat> &in_value,
528 const CuMatrixBase<BaseFloat> &out_deriv);
530 // This function does nothing here but is redefined in child-class
531 // NaturalGradientRepeatedAffineComponent. This help avoid repeated code.
532 virtual void SetNaturalGradientConfigs() { }
534 const RepeatedAffineComponent &operator = (const RepeatedAffineComponent &other); // Disallow.
535 CuMatrix<BaseFloat> linear_params_;
536 CuVector<BaseFloat> bias_params_;
537 int32 num_repeats_;
538 };
540 class NaturalGradientRepeatedAffineComponent: public RepeatedAffineComponent {
541 public:
542 // Use Init() to really initialize.
543 NaturalGradientRepeatedAffineComponent() { }
545 // Most of the public functions are inherited from RepeatedAffineComponent.
546 virtual std::string Type() const {
547 return "NaturalGradientRepeatedAffineComponent";
548 }
550 virtual Component* Copy() const;
552 // Copy constructor
553 explicit NaturalGradientRepeatedAffineComponent(
554 const NaturalGradientRepeatedAffineComponent &other);
555 private:
556 virtual void Update(
557 const CuMatrixBase<BaseFloat> &in_value,
558 const CuMatrixBase<BaseFloat> &out_deriv);
560 const NaturalGradientRepeatedAffineComponent &operator=(
561 const NaturalGradientRepeatedAffineComponent &other); // Disallow.
563 // Applies the default configuration to preconditioner_in_.
564 virtual void SetNaturalGradientConfigs();
566 // For efficiency reasons we only apply the natural gradient to the input
567 // side, i.e. not to the space of output derivatives-- we believe the input
568 // side is the more important side. We don't make the natural-gradient
569 // configurable; we just give it a reasonable configuration.
570 // Instead of using the individual data-points, for efficiency reasons we use
571 // the distribution of per-minibatch summed derivatives over each dimension of
572 // the output space, as the source for the Fisher matrix.
573 OnlineNaturalGradient preconditioner_in_;
574 };
576 class SoftmaxComponent: public NonlinearComponent {
577 public:
578 explicit SoftmaxComponent(int32 dim): NonlinearComponent(dim) { }
579 explicit SoftmaxComponent(const SoftmaxComponent &other):
580 NonlinearComponent(other) { }
581 SoftmaxComponent() { }
582 virtual std::string Type() const { return "SoftmaxComponent"; }
583 virtual int32 Properties() const {
584 return kSimpleComponent|kBackpropNeedsOutput|kStoresStats;
585 }
586 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
587 const CuMatrixBase<BaseFloat> &in,
588 CuMatrixBase<BaseFloat> *out) const;
589 virtual void Backprop(const std::string &debug_info,
590 const ComponentPrecomputedIndexes *indexes,
591 const CuMatrixBase<BaseFloat> &in_value,
592 const CuMatrixBase<BaseFloat> &out_value,
593 const CuMatrixBase<BaseFloat> &out_deriv,
594 Component *to_update,
595 CuMatrixBase<BaseFloat> *in_deriv) const;
596 virtual void StoreStats(const CuMatrixBase<BaseFloat> &out_value);
598 virtual Component* Copy() const { return new SoftmaxComponent(*this); }
599 private:
600 SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
601 };
603 class LogSoftmaxComponent: public NonlinearComponent {
604 public:
605 explicit LogSoftmaxComponent(int32 dim): NonlinearComponent(dim) { }
606 explicit LogSoftmaxComponent(const LogSoftmaxComponent &other):
607 NonlinearComponent(other) { }
608 LogSoftmaxComponent() { }
609 virtual std::string Type() const { return "LogSoftmaxComponent"; }
610 virtual int32 Properties() const {
611 return kSimpleComponent|kBackpropNeedsOutput|kStoresStats;
612 }
613 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
614 const CuMatrixBase<BaseFloat> &in,
615 CuMatrixBase<BaseFloat> *out) const;
616 virtual void Backprop(const std::string &debug_info,
617 const ComponentPrecomputedIndexes *indexes,
618 const CuMatrixBase<BaseFloat> &in_value,
619 const CuMatrixBase<BaseFloat> &out_value,
620 const CuMatrixBase<BaseFloat> &out_deriv,
621 Component *to_update,
622 CuMatrixBase<BaseFloat> *in_deriv) const;
624 virtual Component* Copy() const { return new LogSoftmaxComponent(*this); }
625 private:
626 LogSoftmaxComponent &operator = (const LogSoftmaxComponent &other); // Disallow.
627 };
629 /// Keywords: natural gradient descent, NG-SGD, naturalgradient. For
630 /// the top-level of the natural gradient code look here, and also in
631 /// nnet-precondition-online.h.
632 /// NaturalGradientAffineComponent is
633 /// a version of AffineComponent that has a non-(multiple of unit) learning-rate
634 /// matrix. See nnet-precondition-online.h for a description of the technique.
635 /// It is described, under the name Online NG-SGD, in the paper "Parallel
636 /// training of DNNs with Natural Gradient and Parameter Averaging" (ICLR
637 /// workshop, 2015) by Daniel Povey, Xiaohui Zhang and Sanjeev Khudanpur.
638 class NaturalGradientAffineComponent: public AffineComponent {
639 public:
640 virtual std::string Type() const { return "NaturalGradientAffineComponent"; }
641 virtual void Read(std::istream &is, bool binary);
642 virtual void Write(std::ostream &os, bool binary) const;
643 void Init(int32 input_dim, int32 output_dim,
644 BaseFloat param_stddev, BaseFloat bias_stddev, BaseFloat bias_mean,
645 int32 rank_in, int32 rank_out, int32 update_period,
646 BaseFloat num_samples_history, BaseFloat alpha,
647 BaseFloat max_change_per_sample);
648 void Init(int32 rank_in, int32 rank_out, int32 update_period,
649 BaseFloat num_samples_history,
650 BaseFloat alpha, BaseFloat max_change_per_sample,
651 std::string matrix_filename);
652 // this constructor does not really initialize, use Init() or Read().
653 NaturalGradientAffineComponent();
654 virtual void Resize(int32 input_dim, int32 output_dim);
655 virtual void InitFromConfig(ConfigLine *cfl);
656 virtual std::string Info() const;
657 virtual Component* Copy() const;
658 virtual void Scale(BaseFloat scale);
659 virtual void Add(BaseFloat alpha, const Component &other);
660 // copy constructor
661 explicit NaturalGradientAffineComponent(
662 const NaturalGradientAffineComponent &other);
663 virtual void ZeroStats();
665 private:
666 // disallow assignment operator.
667 NaturalGradientAffineComponent &operator= (
668 const NaturalGradientAffineComponent&);
670 // Configs for preconditioner. The input side tends to be better conditioned ->
671 // smaller rank needed, so make them separately configurable.
672 int32 rank_in_;
673 int32 rank_out_;
674 int32 update_period_;
675 BaseFloat num_samples_history_;
676 BaseFloat alpha_;
678 OnlineNaturalGradient preconditioner_in_;
680 OnlineNaturalGradient preconditioner_out_;
682 // If > 0, max_change_per_sample_ is the maximum amount of parameter
683 // change (in L2 norm) that we allow per sample, averaged over the minibatch.
684 // This was introduced in order to control instability.
685 // Instead of the exact L2 parameter change, for
686 // efficiency purposes we limit a bound on the exact
687 // change. The limit is applied via a constant <= 1.0
688 // for each minibatch, A suitable value might be, for
689 // example, 10 or so; larger if there are more
690 // parameters.
691 BaseFloat max_change_per_sample_;
693 // update_count_ records how many updates we have done.
694 double update_count_;
696 // active_scaling_count_ records how many updates we have done,
697 // where the scaling factor is active (not 1.0).
698 double active_scaling_count_;
700 // max_change_scale_stats_ records the sum of scaling factors
701 // in each update, so we can compute the averaged scaling factor
702 // in Info().
703 double max_change_scale_stats_;
705 // Sets the configs rank, alpha and eta in the preconditioner objects,
706 // from the class variables.
707 void SetNaturalGradientConfigs();
709 virtual void Update(
710 const std::string &debug_info,
711 const CuMatrixBase<BaseFloat> &in_value,
712 const CuMatrixBase<BaseFloat> &out_deriv);
713 };
716 /// FixedAffineComponent is an affine transform that is supplied
717 /// at network initialization time and is not trainable.
718 class FixedAffineComponent: public Component {
719 public:
720 FixedAffineComponent() { }
721 virtual std::string Type() const { return "FixedAffineComponent"; }
722 virtual std::string Info() const;
724 /// matrix should be of size input-dim+1 to output-dim, last col is offset
725 void Init(const CuMatrixBase<BaseFloat> &matrix);
727 // The ConfigLine cfl contains just the option matrix=<string>,
728 // where the string is the filename of a Kaldi-format matrix to read.
729 virtual void InitFromConfig(ConfigLine *cfl);
731 virtual int32 Properties() const { return kSimpleComponent|kBackpropAdds; }
732 virtual int32 InputDim() const { return linear_params_.NumCols(); }
733 virtual int32 OutputDim() const { return linear_params_.NumRows(); }
735 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
736 const CuMatrixBase<BaseFloat> &in,
737 CuMatrixBase<BaseFloat> *out) const;
738 virtual void Backprop(const std::string &debug_info,
739 const ComponentPrecomputedIndexes *indexes,
740 const CuMatrixBase<BaseFloat> &in_value,
741 const CuMatrixBase<BaseFloat> &, // out_value
742 const CuMatrixBase<BaseFloat> &out_deriv,
743 Component *to_update,
744 CuMatrixBase<BaseFloat> *in_deriv) const;
747 virtual Component* Copy() const;
748 virtual void Read(std::istream &is, bool binary);
749 virtual void Write(std::ostream &os, bool binary) const;
751 // Function to provide access to linear_params_.
752 const CuMatrix<BaseFloat> &LinearParams() const { return linear_params_; }
753 protected:
754 friend class AffineComponent;
755 CuMatrix<BaseFloat> linear_params_;
756 CuVector<BaseFloat> bias_params_;
758 KALDI_DISALLOW_COPY_AND_ASSIGN(FixedAffineComponent);
759 };
761 /// SumGroupComponent is used to sum up groups of posteriors.
762 /// It's used to introduce a kind of Gaussian-mixture-model-like
763 /// idea into neural nets. This is basically a degenerate case of
764 /// MixtureProbComponent; we had to implement it separately to
765 /// be efficient for CUDA (we can use this one regardless whether
766 /// we have CUDA or not; it's the normal case we want anyway).
767 ///
768 /// There are two forms of initialization in a config file: one
769 /// where the number of elements are specified for each group
770 /// individually as a vector, and one where only the total input
771 /// dimension and the output dimension (number of groups) is specified.
772 /// The second is used when all groups have the same size.
773 class SumGroupComponent: public Component {
774 public:
775 virtual int32 InputDim() const { return input_dim_; }
776 virtual int32 OutputDim() const { return output_dim_; }
777 void Init(const std::vector<int32> &sizes); // the vector is of the input dim
778 // (>= 1) for each output dim.
779 void Init(int32 input_dim, int32 output_dim);
780 void GetSizes(std::vector<int32> *sizes) const; // Get a vector saying, for
781 // each output-dim, how many
782 // inputs were summed over.
783 virtual void InitFromConfig(ConfigLine *cfl);
784 SumGroupComponent() { }
785 virtual std::string Type() const { return "SumGroupComponent"; }
786 virtual int32 Properties() const { return kSimpleComponent|kLinearInInput; }
787 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
788 const CuMatrixBase<BaseFloat> &in,
789 CuMatrixBase<BaseFloat> *out) const;
790 virtual void Backprop(const std::string &debug_info,
791 const ComponentPrecomputedIndexes *indexes,
792 const CuMatrixBase<BaseFloat> &in_value,
793 const CuMatrixBase<BaseFloat> &, // out_value
794 const CuMatrixBase<BaseFloat> &out_deriv,
795 Component *to_update,
796 CuMatrixBase<BaseFloat> *in_deriv) const;
797 virtual Component* Copy() const;
798 virtual void Read(std::istream &is, bool binary);
799 virtual void Write(std::ostream &os, bool binary) const;
801 private:
802 KALDI_DISALLOW_COPY_AND_ASSIGN(SumGroupComponent);
803 // Note: Int32Pair is just struct{ int32 first; int32 second }; it's defined
804 // in cu-matrixdim.h as extern "C" which is needed for the CUDA interface.
805 CuArray<Int32Pair> indexes_; // for each output index, the (start, end) input
806 // index.
807 CuArray<int32> reverse_indexes_; // for each input index, the output index.
808 int32 input_dim_;
809 int32 output_dim_;
810 };
813 /// FixedScaleComponent applies a fixed per-element scale; it's similar
814 /// to the Rescale component in the nnet1 setup (and only needed for nnet1
815 /// model conversion).
816 class FixedScaleComponent: public Component {
817 public:
818 FixedScaleComponent() { }
819 virtual std::string Type() const { return "FixedScaleComponent"; }
820 virtual std::string Info() const;
821 virtual int32 Properties() const {
822 return kSimpleComponent|kLinearInInput|kPropagateInPlace|kBackpropInPlace;
823 }
825 void Init(const CuVectorBase<BaseFloat> &scales);
827 // The ConfigLine cfl contains only the option scales=<string>,
828 // where the string is the filename of a Kaldi-format matrix to read.
829 virtual void InitFromConfig(ConfigLine *cfl);
831 virtual int32 InputDim() const { return scales_.Dim(); }
832 virtual int32 OutputDim() const { return scales_.Dim(); }
834 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
835 const CuMatrixBase<BaseFloat> &in,
836 CuMatrixBase<BaseFloat> *out) const;
837 virtual void Backprop(const std::string &debug_info,
838 const ComponentPrecomputedIndexes *indexes,
839 const CuMatrixBase<BaseFloat> &, // in_value
840 const CuMatrixBase<BaseFloat> &, // out_value
841 const CuMatrixBase<BaseFloat> &out_deriv,
842 Component *, // to_update
843 CuMatrixBase<BaseFloat> *in_deriv) const;
844 virtual Component* Copy() const;
845 virtual void Read(std::istream &is, bool binary);
846 virtual void Write(std::ostream &os, bool binary) const;
848 protected:
849 friend class AffineComponent; // necessary for collapse
850 CuVector<BaseFloat> scales_;
851 KALDI_DISALLOW_COPY_AND_ASSIGN(FixedScaleComponent);
852 };
855 /// FixedBiasComponent applies a fixed per-element bias; it's similar
856 /// to the AddShift component in the nnet1 setup (and only needed for nnet1
857 /// model conversion.
858 class FixedBiasComponent: public Component {
859 public:
860 FixedBiasComponent() { }
861 virtual std::string Type() const { return "FixedBiasComponent"; }
862 virtual std::string Info() const;
864 virtual int32 Properties() const {
865 return kSimpleComponent|kPropagateInPlace|kBackpropInPlace;
866 }
868 void Init(const CuVectorBase<BaseFloat> &scales);
870 // The ConfigLine cfl contains only the option bias=<string>,
871 // where the string is the filename of a Kaldi-format matrix to read.
872 virtual void InitFromConfig(ConfigLine *cfl);
873 virtual int32 InputDim() const { return bias_.Dim(); }
874 virtual int32 OutputDim() const { return bias_.Dim(); }
875 using Component::Propagate; // to avoid name hiding
876 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
877 const CuMatrixBase<BaseFloat> &in,
878 CuMatrixBase<BaseFloat> *out) const;
879 virtual void Backprop(const std::string &debug_info,
880 const ComponentPrecomputedIndexes *indexes,
881 const CuMatrixBase<BaseFloat> &, // in_value,
882 const CuMatrixBase<BaseFloat> &, // out_value
883 const CuMatrixBase<BaseFloat> &out_deriv,
884 Component *, // to_update
885 CuMatrixBase<BaseFloat> *in_deriv) const;
886 virtual Component* Copy() const;
887 virtual void Read(std::istream &is, bool binary);
888 virtual void Write(std::ostream &os, bool binary) const;
890 protected:
891 CuVector<BaseFloat> bias_;
892 KALDI_DISALLOW_COPY_AND_ASSIGN(FixedBiasComponent);
893 };
895 // NoOpComponent just duplicates its input. We don't anticipate this being used
896 // very often, but it may sometimes make your life easier
897 class NoOpComponent: public NonlinearComponent {
898 public:
899 explicit NoOpComponent(int32 dim): NonlinearComponent(dim) { }
900 explicit NoOpComponent(const NoOpComponent &other): NonlinearComponent(other) { }
901 NoOpComponent() { }
902 virtual std::string Type() const { return "NoOpComponent"; }
903 virtual int32 Properties() const {
904 return kSimpleComponent|kLinearInInput|kPropagateInPlace;
905 }
906 virtual Component* Copy() const { return new NoOpComponent(*this); }
907 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
908 const CuMatrixBase<BaseFloat> &in,
909 CuMatrixBase<BaseFloat> *out) const;
910 virtual void Backprop(const std::string &debug_info,
911 const ComponentPrecomputedIndexes *indexes,
912 const CuMatrixBase<BaseFloat> &, //in_value
913 const CuMatrixBase<BaseFloat> &, // out_value,
914 const CuMatrixBase<BaseFloat> &out_deriv,
915 Component *to_update,
916 CuMatrixBase<BaseFloat> *in_deriv) const;
917 private:
918 NoOpComponent &operator = (const NoOpComponent &other); // Disallow.
919 };
921 // ClipGradientComponent just duplicates its input, but clips gradients
922 // during backpropagation if they cross a predetermined threshold.
923 // This component will be used to prevent gradient explosion problem in
924 // recurrent neural networks
925 class ClipGradientComponent: public Component {
926 public:
927 ClipGradientComponent(int32 dim, BaseFloat clipping_threshold,
928 bool norm_based_clipping, int32 num_clipped,
929 int32 count) {
930 Init(dim, clipping_threshold, norm_based_clipping, num_clipped, count);}
932 ClipGradientComponent(): dim_(0), clipping_threshold_(-1),
933 norm_based_clipping_(false), num_clipped_(0), count_(0) { }
935 virtual int32 InputDim() const { return dim_; }
936 virtual int32 OutputDim() const { return dim_; }
937 virtual void InitFromConfig(ConfigLine *cfl);
938 void Init(int32 dim, BaseFloat clipping_threshold, bool norm_based_clipping,
939 int32 num_clipped, int32 count);
941 virtual std::string Type() const { return "ClipGradientComponent"; }
943 virtual int32 Properties() const {
944 return kSimpleComponent|kLinearInInput|kPropagateInPlace|kBackpropInPlace;
945 }
947 virtual void ZeroStats();
949 virtual Component* Copy() const {
950 return new ClipGradientComponent(dim_,
951 clipping_threshold_,
952 norm_based_clipping_,
953 num_clipped_,
954 count_);}
956 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
957 const CuMatrixBase<BaseFloat> &in,
958 CuMatrixBase<BaseFloat> *out) const;
959 virtual void Backprop(const std::string &debug_info,
960 const ComponentPrecomputedIndexes *indexes,
961 const CuMatrixBase<BaseFloat> &, //in_value
962 const CuMatrixBase<BaseFloat> &, // out_value,
963 const CuMatrixBase<BaseFloat> &out_deriv,
964 Component *to_update,
965 CuMatrixBase<BaseFloat> *in_deriv) const;
967 virtual void Scale(BaseFloat scale);
968 virtual void Add(BaseFloat alpha, const Component &other);
969 virtual void Read(std::istream &is, bool binary); // This Read function
970 // requires that the Component has the correct type.
971 /// Write component to stream
972 virtual void Write(std::ostream &os, bool binary) const;
973 virtual std::string Info() const;
974 private:
975 int32 dim_; // input/output dimension
976 BaseFloat clipping_threshold_; // threshold to be used for clipping
977 // could correspond to max-row-norm (if
978 // norm_based_clipping_ == true) or
979 // max-absolute-value (otherwise)
980 bool norm_based_clipping_; // if true the max-row-norm will be clipped
981 // else element-wise absolute value clipping is
982 // done
985 ClipGradientComponent &operator =
986 (const ClipGradientComponent &other); // Disallow.
988 protected:
989 // variables to store stats
990 // An element corresponds to rows of derivative matrix, when
991 // norm_based_clipping_ is true,
992 // else it corresponds to each element of the derivative matrix
993 // Note: no stats are stored when norm_based_clipping_ is false
994 int32 num_clipped_; // number of elements which were clipped
995 int32 count_; // number of elements which were processed
997 };
999 /** PermuteComponent changes the order of the columns (i.e. the feature or
1000 activation dimensions). Output dimension i is mapped to input dimension
1001 column_map_[i], so it's like doing:
1002 for each row:
1003 for each feature/activation dimension i:
1004 output(row, i) = input(row, column_map_[i]).
1006 */
1007 class PermuteComponent: public Component {
1008 public:
1009 PermuteComponent() {}
1010 PermuteComponent(const std::vector<int32> &column_map) { Init(column_map); }
1012 virtual int32 InputDim() const { return column_map_.Dim(); }
1013 virtual int32 OutputDim() const { return column_map_.Dim(); }
1014 virtual void InitFromConfig(ConfigLine *cfl);
1015 void Init(const std::vector<int32> &column_map);
1017 virtual std::string Type() const { return "PermuteComponent"; }
1019 virtual int32 Properties() const {
1020 return kSimpleComponent|kLinearInInput;
1021 }
1023 virtual void ZeroStats() {}
1025 virtual Component* Copy() const;
1027 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1028 const CuMatrixBase<BaseFloat> &in,
1029 CuMatrixBase<BaseFloat> *out) const;
1030 virtual void Backprop(const std::string &debug_info,
1031 const ComponentPrecomputedIndexes *indexes,
1032 const CuMatrixBase<BaseFloat> &, //in_value
1033 const CuMatrixBase<BaseFloat> &, // out_value,
1034 const CuMatrixBase<BaseFloat> &out_deriv,
1035 Component *to_update,
1036 CuMatrixBase<BaseFloat> *in_deriv) const;
1038 virtual void Scale(BaseFloat scale) {}
1039 virtual void Add(BaseFloat alpha, const Component &other) {}
1040 virtual void Read(std::istream &is, bool binary); // This Read function
1041 // requires that the Component has the correct type.
1042 /// Write component to stream
1043 virtual void Write(std::ostream &os, bool binary) const;
1044 virtual std::string Info() const;
1045 private:
1046 // computes the reverse column map. Must not be called if column_map_.Dim()
1047 // == 0
1048 void ComputeReverseColumnMap();
1049 CuArray<int32> column_map_;
1050 // the following is a derived variable, not written to disk.
1051 // It is used in backprop.
1052 CuArray<int32> reverse_column_map_;
1053 PermuteComponent &operator =
1054 (const PermuteComponent &other); // Disallow.
1055 };
1060 // PerElementScaleComponent scales each dimension of its input with a separate
1061 // trainable scale; it's like a linear component with a diagonal matrix.
1062 class PerElementScaleComponent: public UpdatableComponent {
1063 public:
1064 virtual int32 InputDim() const { return scales_.Dim(); }
1065 virtual int32 OutputDim() const { return scales_.Dim(); }
1067 virtual std::string Info() const;
1068 virtual void InitFromConfig(ConfigLine *cfl);
1070 PerElementScaleComponent() { } // use Init to really initialize.
1071 virtual std::string Type() const { return "PerElementScaleComponent"; }
1072 virtual int32 Properties() const {
1073 return kSimpleComponent|kUpdatableComponent|kLinearInInput|
1074 kLinearInParameters|kBackpropNeedsInput|kPropagateInPlace;
1075 }
1077 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1078 const CuMatrixBase<BaseFloat> &in,
1079 CuMatrixBase<BaseFloat> *out) const;
1080 virtual void Backprop(const std::string &debug_info,
1081 const ComponentPrecomputedIndexes *indexes,
1082 const CuMatrixBase<BaseFloat> &in_value,
1083 const CuMatrixBase<BaseFloat> &, // out_value
1084 const CuMatrixBase<BaseFloat> &out_deriv,
1085 Component *to_update,
1086 CuMatrixBase<BaseFloat> *in_deriv) const;
1088 virtual void Read(std::istream &is, bool binary);
1089 virtual void Write(std::ostream &os, bool binary) const;
1091 virtual Component* Copy() const;
1094 // Some functions from base-class UpdatableComponent.
1095 virtual void Scale(BaseFloat scale);
1096 virtual void Add(BaseFloat alpha, const Component &other);
1097 virtual void SetZero(bool treat_as_gradient);
1098 virtual void PerturbParams(BaseFloat stddev);
1099 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1100 virtual int32 NumParameters() const;
1101 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1102 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1104 // Some functions that are specific to this class.
1105 explicit PerElementScaleComponent(const PerElementScaleComponent &other);
1107 void Init(int32 dim, BaseFloat param_mean, BaseFloat param_stddev);
1108 void Init(std::string vector_filename);
1110 protected:
1111 friend class AffineComponent; // necessary for collapse
1112 // This function Update() is for extensibility; child classes may override
1113 // this, e.g. for natural gradient update.
1114 virtual void Update(
1115 const std::string &debug_info,
1116 const CuMatrixBase<BaseFloat> &in_value,
1117 const CuMatrixBase<BaseFloat> &out_deriv) {
1118 UpdateSimple(in_value, out_deriv);
1119 }
1120 // UpdateSimple is used when *this is a gradient. Child classes may override
1121 // this if needed, but typically won't need to.
1122 virtual void UpdateSimple(
1123 const CuMatrixBase<BaseFloat> &in_value,
1124 const CuMatrixBase<BaseFloat> &out_deriv);
1126 const PerElementScaleComponent &operator
1127 = (const PerElementScaleComponent &other); // Disallow.
1128 CuVector<BaseFloat> scales_;
1129 };
1132 // PerElementOffsetComponent offsets each dimension of its input with a separate
1133 // trainable bias; it's like an affine component with fixed weight matrix which is always equal to I.
1134 class PerElementOffsetComponent: public UpdatableComponent {
1135 public:
1136 virtual int32 InputDim() const { return offsets_.Dim(); }
1137 virtual int32 OutputDim() const { return offsets_.Dim(); }
1139 virtual std::string Info() const;
1140 virtual void InitFromConfig(ConfigLine *cfl);
1142 PerElementOffsetComponent() { } // use Init to really initialize.
1143 virtual std::string Type() const { return "PerElementOffsetComponent"; }
1144 virtual int32 Properties() const {
1145 return kSimpleComponent|kUpdatableComponent|
1146 kBackpropInPlace|kPropagateInPlace;
1147 }
1149 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1150 const CuMatrixBase<BaseFloat> &in,
1151 CuMatrixBase<BaseFloat> *out) const;
1152 virtual void Backprop(const std::string &debug_info,
1153 const ComponentPrecomputedIndexes *indexes,
1154 const CuMatrixBase<BaseFloat> &, // in_value
1155 const CuMatrixBase<BaseFloat> &, // out_value
1156 const CuMatrixBase<BaseFloat> &out_deriv,
1157 Component *to_update,
1158 CuMatrixBase<BaseFloat> *in_deriv) const;
1160 virtual void Read(std::istream &is, bool binary);
1161 virtual void Write(std::ostream &os, bool binary) const;
1163 virtual Component* Copy() const;
1166 // Some functions from base-class UpdatableComponent.
1167 virtual void Scale(BaseFloat scale);
1168 virtual void Add(BaseFloat alpha, const Component &other);
1169 virtual void SetZero(bool treat_as_gradient);
1170 virtual void PerturbParams(BaseFloat stddev);
1171 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1172 virtual int32 NumParameters() const;
1173 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1174 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1176 // Some functions that are specific to this class.
1177 explicit PerElementOffsetComponent(const PerElementOffsetComponent &other);
1179 void Init(int32 dim, BaseFloat param_mean,
1180 BaseFloat param_stddev);
1181 void Init(std::string vector_filename);
1183 protected:
1184 const PerElementOffsetComponent &operator
1185 = (const PerElementOffsetComponent &other); // Disallow.
1186 CuVector<BaseFloat> offsets_;
1187 };
1190 // NaturalGradientPerElementScaleComponent is like PerElementScaleComponent but
1191 // it uses a natural gradient update for the per-element scales, and enforces a
1192 // maximum amount of change per minibatch, for stability.
1193 class NaturalGradientPerElementScaleComponent: public PerElementScaleComponent {
1194 public:
1196 virtual std::string Info() const;
1198 virtual void InitFromConfig(ConfigLine *cfl);
1200 NaturalGradientPerElementScaleComponent() { } // use Init to really initialize.
1201 virtual std::string Type() const {
1202 return "NaturalGradientPerElementScaleComponent";
1203 }
1205 virtual void Read(std::istream &is, bool binary);
1206 virtual void Write(std::ostream &os, bool binary) const;
1208 virtual Component* Copy() const;
1210 // Some functions that are specific to this class:
1211 explicit NaturalGradientPerElementScaleComponent(
1212 const NaturalGradientPerElementScaleComponent &other);
1214 void Init(int32 dim, BaseFloat param_mean,
1215 BaseFloat param_stddev, int32 rank, int32 update_period,
1216 BaseFloat num_samples_history, BaseFloat alpha,
1217 BaseFloat max_change_per_minibatch);
1218 void Init(std::string vector_filename,
1219 int32 rank, int32 update_period, BaseFloat num_samples_history,
1220 BaseFloat alpha, BaseFloat max_change_per_minibatch);
1222 private:
1223 // configuration value for imposing max-change...
1224 BaseFloat max_change_per_minibatch_;
1226 // unlike the NaturalGradientAffineComponent, there is only one dimension to
1227 // consider as the parameters are a vector not a matrix, so we only need one
1228 // preconditioner.
1229 // The preconditioner stores its own configuration values; we write and read
1230 // these, but not the preconditioner object itself.
1231 OnlineNaturalGradient preconditioner_;
1233 // Override of the parent-class Update() function, called only
1234 // if this->is_gradient_ = false; this implements the natural
1235 // gradient update.
1236 virtual void Update(
1237 const std::string &debug_info,
1238 const CuMatrixBase<BaseFloat> &in_value,
1239 const CuMatrixBase<BaseFloat> &out_deriv);
1241 const NaturalGradientPerElementScaleComponent &operator
1242 = (const NaturalGradientPerElementScaleComponent &other); // Disallow.
1243 };
1245 /**
1246 * ConvolutionalComponent implements 2d-convolution.
1247 * It uses 3D filters on 3D inputs, but the 3D filters hop only over
1248 * 2 dimensions as it has same size as the input along the 3rd dimension.
1249 * Input : A matrix where each row is a vectorized 3D-tensor.
1250 * The 3D tensor has dimensions
1251 * x: (e.g. time)
1252 * y: (e.g. frequency)
1253 * z: (e.g. channels like features/delta/delta-delta)
1254 *
1255 * The component supports input vectorizations of type zyx and yzx.
1256 * The default vectorization type is zyx.
1257 * e.g. for input vectorization of type zyx the input is vectorized by
1258 * spanning axes z, y and x of the tensor in that order.
1259 * Given 3d tensor A with sizes (2, 2, 2) along the three dimensions
1260 * the zyx vectorized input looks like
1261 * A(0,0,0) A(0,0,1) A(0,1,0) A(0,1,1) A(1,0,0) A(1,0,1) A(1,1,0) A(1,1,1)
1262 *
1263 *
1264 * Output : The output is also a 3D tensor vectorized in the zyx format.
1265 * The channel axis (z) in the output corresponds to the output of
1266 * different filters. The first channel corresponds to the first filter
1267 * i.e., first row of the filter_params_ matrix.
1268 *
1269 * Note: The component has to support yzx input vectorization as the binaries
1270 * like add-deltas generate yz vectorized output. These input vectors are
1271 * concatenated using the Append descriptor across time steps to form a yzx
1272 * vectorized 3D tensor input.
1273 * e.g. Append(Offset(input, -1), input, Offset(input, 1))
1274 *
1275 *
1276 * For information on the hyperparameters and parameters of this component see
1277 * the variable declarations.
1278 *
1279 * Propagation:
1280 * ------------
1281 * Convolution operation consists of a dot-products between the filter tensor
1282 * and input tensor patch, for various shifts of filter tensor along the x and y
1283 * axes input tensor. (Note: there is no shift along z-axis as the filter and
1284 * input tensor have same size along this axis).
1285 *
1286 * For a particular shift (i,j) of the filter tensor
1287 * along input tensor dimensions x and y, the elements of the input tensor which
1288 * overlap with the filter form the input tensor patch. This patch is vectorized
1289 * in zyx format. All the patches corresponding to various samples in the
1290 * mini-batch are stacked into a matrix, where each row corresponds to one
1291 * patch. Let this matrix be represented by X_{i,j}. The dot products with
1292 * various filters are computed simultaneously by computing the matrix product
1293 * with the filter_params_ matrix (W)
1294 * Y_{i,j} = X_{i,j}*W^T.
1295 * Each row of W corresponds to one filter 3D tensor vectorized in zyx format.
1296 *
1297 * All the matrix products corresponding to various shifts (i,j) of the
1298 * filter tensor are computed simultaneously using the AddMatMatBatched
1299 * call of CuMatrixBase class.
1300 *
1301 * BackPropagation:
1302 * ----------------
1303 * Backpropagation to compute the input derivative (\nabla X_{i,j})
1304 * consists of the a series of matrix products.
1305 * \nablaX_{i,j} = \nablaY_{i,j}*W where \nablaY_{i,j} corresponds to the
1306 * output derivative for a particular shift of the filter.
1307 *
1308 * Once again these matrix products are computed simultaneously.
1309 *
1310 * Update:
1311 * -------
1312 * The weight gradient is computed as
1313 * \nablaW = \Sum_{i,j} (X_{i,j}^T *\nablaY_{i,j})
1314 *
1315 */
1316 class ConvolutionComponent: public UpdatableComponent {
1317 public:
1318 enum TensorVectorizationType {
1319 kYzx = 0,
1320 kZyx = 1
1321 };
1323 ConvolutionComponent();
1324 // constructor using another component
1325 ConvolutionComponent(const ConvolutionComponent &component);
1326 // constructor using parameters
1327 ConvolutionComponent(
1328 const CuMatrixBase<BaseFloat> &filter_params,
1329 const CuVectorBase<BaseFloat> &bias_params,
1330 int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
1331 int32 filt_x_dim, int32 filt_y_dim,
1332 int32 filt_x_step, int32 filt_y_step,
1333 TensorVectorizationType input_vectorization,
1334 BaseFloat learning_rate);
1336 virtual int32 InputDim() const;
1337 virtual int32 OutputDim() const;
1339 virtual std::string Info() const;
1340 virtual void InitFromConfig(ConfigLine *cfl);
1341 virtual std::string Type() const { return "ConvolutionComponent"; }
1342 virtual int32 Properties() const {
1343 return kSimpleComponent|kUpdatableComponent|kBackpropNeedsInput|
1344 kBackpropAdds|kPropagateAdds;
1345 }
1347 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1348 const CuMatrixBase<BaseFloat> &in,
1349 CuMatrixBase<BaseFloat> *out) const;
1350 virtual void Backprop(const std::string &debug_info,
1351 const ComponentPrecomputedIndexes *indexes,
1352 const CuMatrixBase<BaseFloat> &in_value,
1353 const CuMatrixBase<BaseFloat> &, // out_value,
1354 const CuMatrixBase<BaseFloat> &out_deriv,
1355 Component *to_update_in,
1356 CuMatrixBase<BaseFloat> *in_deriv) const;
1357 void Update(const std::string &debug_info,
1358 const CuMatrixBase<BaseFloat> &in_value,
1359 const CuMatrixBase<BaseFloat> &out_deriv,
1360 const std::vector<CuSubMatrix<BaseFloat> *>& out_deriv_batch);
1364 virtual void Read(std::istream &is, bool binary);
1365 virtual void Write(std::ostream &os, bool binary) const;
1367 virtual Component* Copy() const;
1369 // Some functions from base-class UpdatableComponent.
1370 virtual void Scale(BaseFloat scale);
1371 virtual void Add(BaseFloat alpha, const Component &other);
1372 virtual void SetZero(bool treat_as_gradient);
1373 virtual void PerturbParams(BaseFloat stddev);
1374 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1375 virtual int32 NumParameters() const;
1376 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1377 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1379 // Some functions that are specific to this class.
1380 void SetParams(const VectorBase<BaseFloat> &bias,
1381 const MatrixBase<BaseFloat> &filter);
1382 const CuVector<BaseFloat> &BiasParams() { return bias_params_; }
1383 const CuMatrix<BaseFloat> &LinearParams() { return filter_params_; }
1384 void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
1385 int32 filt_x_dim, int32 filt_y_dim,
1386 int32 filt_x_step, int32 filt_y_step, int32 num_filters,
1387 TensorVectorizationType input_vectorization,
1388 BaseFloat param_stddev, BaseFloat bias_stddev);
1389 // there is no filt_z_dim parameter as the length of the filter along
1390 // z-dimension is same as the input
1391 void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
1392 int32 filt_x_dim, int32 filt_y_dim,
1393 int32 filt_x_step, int32 filt_y_step,
1394 TensorVectorizationType input_vectorization,
1395 std::string matrix_filename);
1397 // resize the component, setting the parameters to zero, while
1398 // leaving any other configuration values the same
1399 void Resize(int32 input_dim, int32 output_dim);
1401 void Update(const std::string &debug_info,
1402 const CuMatrixBase<BaseFloat> &in_value,
1403 const CuMatrixBase<BaseFloat> &out_deriv);
1406 private:
1407 int32 input_x_dim_; // size of the input along x-axis
1408 // (e.g. number of time steps)
1410 int32 input_y_dim_; // size of input along y-axis
1411 // (e.g. number of mel-frequency bins)
1413 int32 input_z_dim_; // size of input along z-axis
1414 // (e.g. number of channels is 3 if the input has
1415 // features + delta + delta-delta features
1417 int32 filt_x_dim_; // size of the filter along x-axis
1419 int32 filt_y_dim_; // size of the filter along y-axis
1421 // there is no filt_z_dim_ as it is always assumed to be
1422 // the same as input_z_dim_
1424 int32 filt_x_step_; // the number of steps taken along x-axis of input
1425 // before computing the next dot-product
1426 // of filter and input
1428 int32 filt_y_step_; // the number of steps taken along y-axis of input
1429 // before computing the next dot-product of the filter
1430 // and input
1432 // there is no filt_z_step_ as only dot product is possible along this axis
1434 TensorVectorizationType input_vectorization_; // type of vectorization of the
1435 // input 3D tensor. Accepts zyx and yzx formats
1437 CuMatrix<BaseFloat> filter_params_;
1438 // the filter (or kernel) matrix is a matrix of vectorized 3D filters
1439 // where each row in the matrix corresponds to one filter.
1440 // The 3D filter tensor is vectorizedin zyx format.
1441 // The first row of the matrix corresponds to the first filter and so on.
1442 // Keep in mind the vectorization type and order of filters when using file
1443 // based initialization.
1445 CuVector<BaseFloat> bias_params_;
1446 // the filter-specific bias vector (i.e., there is a seperate bias added
1447 // to the output of each filter).
1448 bool is_gradient_;
1450 void InputToInputPatches(const CuMatrixBase<BaseFloat>& in,
1451 CuMatrix<BaseFloat> *patches) const;
1452 void InderivPatchesToInderiv(const CuMatrix<BaseFloat>& in_deriv_patches,
1453 CuMatrixBase<BaseFloat> *in_deriv) const;
1454 const ConvolutionComponent &operator = (const ConvolutionComponent &other); // Disallow.
1455 };
1457 /**
1458 * MaxPoolingComponent :
1459 * Maxpooling component was firstly used in ConvNet for selecting an
1460 * representative activation in an area. It inspired Maxout nonlinearity.
1461 * Each output element of this component is the maximum of a block of
1462 * input elements where the block has a 3D dimension (pool_x_size_,
1463 * pool_y_size_, pool_z_size_).
1464 * Blocks could overlap if the shift value on any axis is smaller
1465 * than its corresponding pool size (e.g. pool_x_step_ < pool_x_size_).
1466 * If the shift values are euqal to their pool size, there is no
1467 * overlap; while if they all equal 1, the blocks overlap to
1468 * the greatest possible extent.
1469 *
1470 * This component is designed to be used after a ConvolutionComponent
1471 * so that the input matrix is propagated from a 2d-convolutional layer.
1472 * This component implements 3d-maxpooling which performs
1473 * max pooling along the three axes.
1474 * Input : A matrix where each row is a vectorized 3D-tensor.
1475 * The 3D tensor has dimensions
1476 * x: (e.g. time)
1477 * y: (e.g. frequency)
1478 * z: (e.g. channels like number of filters in the ConvolutionComponent)
1479 *
1480 * The component assumes input vectorizations of type zyx
1481 * which is the default output vectorization type of a ConvolutionComponent.
1482 * e.g. for input vectorization of type zyx the input is vectorized by
1483 * spanning axes z, y and x of the tensor in that order.
1484 * Given 3d tensor A with sizes (2, 2, 2) along the three dimensions
1485 * the zyx vectorized input looks like
1486 * A(0,0,0) A(0,0,1) A(0,1,0) A(0,1,1) A(1,0,0) A(1,0,1) A(1,1,0) A(1,1,1)
1487 *
1488 * Output : The output is also a 3D tensor vectorized in the zyx format.
1489 *
1490 * For information on the hyperparameters and parameters of this component see
1491 * the variable declarations.
1492 *
1493 *
1494 */
1496 class MaxpoolingComponent: public Component {
1497 public:
1499 MaxpoolingComponent(): input_x_dim_(0), input_y_dim_(0), input_z_dim_(0),
1500 pool_x_size_(0), pool_y_size_(0), pool_z_size_(0),
1501 pool_x_step_(0), pool_y_step_(0), pool_z_step_(0) { }
1502 // constructor using another component
1503 MaxpoolingComponent(const MaxpoolingComponent &component):
1504 input_x_dim_(component.input_x_dim_),
1505 input_y_dim_(component.input_y_dim_),
1506 input_z_dim_(component.input_z_dim_),
1507 pool_x_size_(component.pool_x_size_),
1508 pool_y_size_(component.pool_y_size_),
1509 pool_z_size_(component.pool_z_size_),
1510 pool_x_step_(component.pool_x_step_),
1511 pool_y_step_(component.pool_y_step_),
1512 pool_z_step_(component.pool_z_step_) { }
1514 virtual int32 InputDim() const;
1515 virtual int32 OutputDim() const;
1516 virtual void Check() const;
1518 virtual std::string Info() const;
1519 virtual void InitFromConfig(ConfigLine *cfl);
1520 virtual std::string Type() const { return "MaxpoolingComponent"; }
1521 virtual int32 Properties() const {
1522 return kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput|
1523 kBackpropAdds;
1524 }
1526 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1527 const CuMatrixBase<BaseFloat> &in,
1528 CuMatrixBase<BaseFloat> *out) const;
1529 virtual void Backprop(const std::string &debug_info,
1530 const ComponentPrecomputedIndexes *indexes,
1531 const CuMatrixBase<BaseFloat> &in_value,
1532 const CuMatrixBase<BaseFloat> &out_value,
1533 const CuMatrixBase<BaseFloat> &out_deriv,
1534 Component *, // to_update,
1535 CuMatrixBase<BaseFloat> *in_deriv) const;
1537 virtual void Read(std::istream &is, bool binary); // This Read function
1538 // requires that the Component has the correct type.
1540 /// Write component to stream
1541 virtual void Write(std::ostream &os, bool binary) const;
1542 virtual Component* Copy() const { return new MaxpoolingComponent(*this); }
1544 void InputToInputPatches(const CuMatrixBase<BaseFloat>& in,
1545 CuMatrix<BaseFloat> *patches) const;
1546 void InderivPatchesToInderiv(const CuMatrix<BaseFloat>& in_deriv_patches,
1547 CuMatrixBase<BaseFloat> *in_deriv) const;
1549 protected:
1550 int32 input_x_dim_; // size of the input along x-axis
1551 // (e.g. number of time steps)
1552 int32 input_y_dim_; // size of input along y-axis
1553 // (e.g. number of mel-frequency bins)
1554 int32 input_z_dim_; // size of input along z-axis
1555 // (e.g. number of filters in the ConvolutionComponent)
1557 int32 pool_x_size_; // size of the pooling window along x-axis
1558 int32 pool_y_size_; // size of the pooling window along y-axis
1559 int32 pool_z_size_; // size of the pooling window along z-axis
1561 int32 pool_x_step_; // the number of steps taken along x-axis of input
1562 // before computing the next pool
1563 int32 pool_y_step_; // the number of steps taken along y-axis of input
1564 // before computing the next pool
1565 int32 pool_z_step_; // the number of steps taken along z-axis of input
1566 // before computing the next pool
1568 };
1571 /**
1572 CompositeComponent is a component representing a sequence of
1573 [simple] components. The config line would be something like the following
1574 (imagine this is all on one line):
1576 component name=composite1 type=CompositeComponent max-rows-process=2048 num-components=3 \
1577 component1='type=BlockAffineComponent input-dim=1000 output-dim=10000 num-blocks=100' \
1578 component2='type=RectifiedLinearComponent dim=10000' \
1579 component3='type=BlockAffineComponent input-dim=10000 output-dim=1000 num-blocks=100'
1581 The reason you might want to use this component, instead of directly using
1582 the same sequence of components in the config file, is to save GPU memory (at
1583 the expense of more compute)-- because doing it like this means we have to
1584 re-do parts of the forward pass in the backprop phase, but we avoid using
1585 much memory for very long (and you can make the memory usage very small by
1586 making max-rows-process small). We inherit from UpdatableComponent just in
1587 case one or more of the components in the sequence are updatable.
1589 It is an error to nest a CompositeComponent inside a CompositeComponent.
1590 The same effect can be accomplished by specifying a smaller max-rows-process
1591 in a single CompositeComponent.
1592 */
1593 class CompositeComponent: public UpdatableComponent {
1594 public:
1595 virtual int32 InputDim() const;
1596 virtual int32 OutputDim() const;
1598 virtual std::string Info() const;
1600 virtual void InitFromConfig(ConfigLine *cfl);
1602 virtual Component* Copy() const;
1604 CompositeComponent() { } // use Init() or InitFromConfig() to really initialize.
1606 // Initialize from this list of components; takes ownership of the pointers.
1607 void Init(const std::vector<Component*> &components,
1608 int32 max_rows_process);
1610 virtual std::string Type() const { return "CompositeComponent"; }
1612 // The properties depend on the properties of the constituent components. As
1613 // a special case, we never return kStoresStats in the properties: by default
1614 // we store things like activation stats (e.g. for nonlinear components like
1615 // ReLU) as part of the backprop. This means we may wastefully store stats
1616 // even when not requested, but it does save time as a separate StoreStats()
1617 // call would involve propagating the internals.
1618 virtual int32 Properties() const;
1620 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1621 const CuMatrixBase<BaseFloat> &in,
1622 CuMatrixBase<BaseFloat> *out) const;
1623 virtual void Backprop(const std::string &debug_info,
1624 const ComponentPrecomputedIndexes *indexes,
1625 const CuMatrixBase<BaseFloat> &in_value,
1626 const CuMatrixBase<BaseFloat> &, // out_value
1627 const CuMatrixBase<BaseFloat> &out_deriv,
1628 Component *to_update,
1629 CuMatrixBase<BaseFloat> *in_deriv) const;
1631 // note, we don't implement StoreStats() as it would be inefficient. Instead,
1632 // by default we call StoreStats() on all members that have the flag set,
1633 // inside the Backprop.
1634 virtual void ZeroStats();
1636 virtual void Read(std::istream &is, bool binary);
1637 virtual void Write(std::ostream &os, bool binary) const;
1639 // Don't implement Copy() at this level: implement it in the child class.
1641 // Some functions from base-class UpdatableComponent.
1642 virtual void SetLearningRate(BaseFloat lrate);
1643 virtual void Scale(BaseFloat scale);
1644 virtual void Add(BaseFloat alpha, const Component &other);
1645 virtual void SetZero(bool treat_as_gradient);
1646 virtual void PerturbParams(BaseFloat stddev);
1647 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1648 virtual int32 NumParameters() const;
1649 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1650 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1652 // note: we dont implement the StoreStats function as it would be quite
1653 // expensive; instead, by default we call StoreStats() for any components that
1654 // want to store stats, as part of the backprop pass. This is not 100% ideal
1655 // but it will usually do what you want. We can revisit this later if needed.
1657 // Functions to iterate over the internal components
1659 int32 NumComponents() const { return components_.size();}
1660 /// Gets the ith component in this component.
1661 /// The ordering is the same as in the config line. The caller
1662 /// does not own the received component.
1663 const Component* GetComponent(int32 i) const;
1664 /// Sets the ith component. After this call, CompositeComponent owns
1665 /// the reference to the argument component. Frees the previous
1666 /// ith component.
1667 void SetComponent(int32 i, Component *component);
1669 virtual ~CompositeComponent() { DeletePointers(&components_); }
1670 protected:
1671 // returns true if at least one of 'components_' returns the kUpdatable flag
1672 // in its flags.
1673 bool IsUpdatable() const;
1675 // the maximum number of
1676 int32 max_rows_process_;
1677 std::vector<Component*> components_;
1679 };
1682 } // namespace nnet3
1683 } // namespace kaldi
1686 #endif