1 // nnet3/nnet-simple-component.h
3 // Copyright 2011-2013 Karel Vesely
4 // 2012-2015 Johns Hopkins University (author: Daniel Povey)
5 // 2013 Xiaohui Zhang
6 // 2014-2015 Vijayaditya Peddinti
7 // 2014-2015 Guoguo Chen
8 // 2015 Daniel Galvez
9 // 2015 Tom Ko
11 // See ../../COPYING for clarification regarding multiple authors
12 //
13 // Licensed under the Apache License, Version 2.0 (the "License");
14 // you may not use this file except in compliance with the License.
15 // You may obtain a copy of the License at
16 //
17 // http://www.apache.org/licenses/LICENSE-2.0
18 //
19 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
21 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
22 // MERCHANTABLITY OR NON-INFRINGEMENT.
23 // See the Apache 2 License for the specific language governing permissions and
24 // limitations under the License.
26 #ifndef KALDI_NNET3_NNET_SIMPLE_COMPONENT_H_
27 #define KALDI_NNET3_NNET_SIMPLE_COMPONENT_H_
29 #include "nnet3/nnet-common.h"
30 #include "nnet3/nnet-component-itf.h"
31 #include "nnet3/natural-gradient-online.h"
32 #include <iostream>
34 namespace kaldi {
35 namespace nnet3 {
37 /// @file nnet-simple-component.h
38 /// This file contains declarations of components that are "simple", meaning
39 /// they don't care about the indexes they are operating on, produce one
40 /// output for one input, and return the kSimpleComponent flag in their
41 /// Properties(): for example, tanh and affine components. In
42 /// nnet-general-component.h there are components that don't fit this pattern.
44 // This "nnet3" version of the p-norm component only supports the 2-norm.
45 class PnormComponent: public Component {
46 public:
47 void Init(int32 input_dim, int32 output_dim);
48 explicit PnormComponent(int32 input_dim, int32 output_dim) {
49 Init(input_dim, output_dim);
50 }
51 virtual int32 Properties() const {
52 return kSimpleComponent|kLinearInInput|kBackpropNeedsInput|kBackpropNeedsOutput;
53 }
54 PnormComponent(): input_dim_(0), output_dim_(0) { }
55 virtual std::string Type() const { return "PnormComponent"; }
56 virtual void InitFromConfig(ConfigLine *cfl);
57 virtual int32 InputDim() const { return input_dim_; }
58 virtual int32 OutputDim() const { return output_dim_; }
59 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
60 const CuMatrixBase<BaseFloat> &in,
61 CuMatrixBase<BaseFloat> *out) const;
62 virtual void Backprop(const std::string &debug_info,
63 const ComponentPrecomputedIndexes *indexes,
64 const CuMatrixBase<BaseFloat> &in_value,
65 const CuMatrixBase<BaseFloat> &out_value,
66 const CuMatrixBase<BaseFloat> &out_deriv,
67 Component *to_update,
68 CuMatrixBase<BaseFloat> *in_deriv) const;
69 virtual Component* Copy() const { return new PnormComponent(input_dim_,
70 output_dim_); }
72 virtual void Read(std::istream &is, bool binary); // This Read function
73 // requires that the Component has the correct type.
75 /// Write component to stream
76 virtual void Write(std::ostream &os, bool binary) const;
78 protected:
79 int32 input_dim_;
80 int32 output_dim_;
81 };
83 // This component randomly zeros dropout_proportion of the input
84 // and the derivatives are backpropagated through the nonzero inputs.
85 // Typically this component used during training but not in test time.
86 // The idea is described under the name Dropout, in the paper
87 // "Dropout: A Simple Way to Prevent Neural Networks from Overfitting".
88 class DropoutComponent : public RandomComponent {
89 public:
90 void Init(int32 dim, BaseFloat dropout_proportion = 0.0);
92 DropoutComponent(int32 dim, BaseFloat dropout = 0.0) { Init(dim, dropout); }
94 DropoutComponent(): dim_(0), dropout_proportion_(0.0) { }
96 virtual int32 Properties() const {
97 return kLinearInInput|kBackpropInPlace|kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput;
98 }
99 virtual std::string Type() const { return "DropoutComponent"; }
101 virtual void InitFromConfig(ConfigLine *cfl);
103 virtual int32 InputDim() const { return dim_; }
105 virtual int32 OutputDim() const { return dim_; }
107 virtual void Read(std::istream &is, bool binary);
109 // Write component to stream
110 virtual void Write(std::ostream &os, bool binary) const;
112 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
113 const CuMatrixBase<BaseFloat> &in,
114 CuMatrixBase<BaseFloat> *out) const;
115 virtual void Backprop(const std::string &debug_info,
116 const ComponentPrecomputedIndexes *indexes,
117 const CuMatrixBase<BaseFloat> &in_value,
118 const CuMatrixBase<BaseFloat> &out_value,
119 const CuMatrixBase<BaseFloat> &out_deriv,
120 Component *to_update,
121 CuMatrixBase<BaseFloat> *in_deriv) const;
122 virtual Component* Copy() const { return new DropoutComponent(dim_,
123 dropout_proportion_); }
124 virtual std::string Info() const;
126 void SetDropoutProportion(BaseFloat dropout_proportion) { dropout_proportion_ = dropout_proportion; }
128 private:
129 int32 dim_;
130 /// dropout-proportion is the proportion that is dropped out,
131 /// e.g. if 0.1, we set 10% to zero value.
132 BaseFloat dropout_proportion_;
134 };
136 class ElementwiseProductComponent: public Component {
137 public:
138 void Init(int32 input_dim, int32 output_dim);
139 explicit ElementwiseProductComponent(int32 input_dim, int32 output_dim) {
140 Init(input_dim, output_dim);
141 }
142 virtual int32 Properties() const {
143 return kSimpleComponent|kBackpropNeedsInput;
144 }
145 ElementwiseProductComponent(): input_dim_(0), output_dim_(0) { }
146 virtual std::string Type() const { return "ElementwiseProductComponent"; }
147 virtual void InitFromConfig(ConfigLine *cfl);
148 virtual int32 InputDim() const { return input_dim_; }
149 virtual int32 OutputDim() const { return output_dim_; }
150 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
151 const CuMatrixBase<BaseFloat> &in,
152 CuMatrixBase<BaseFloat> *out) const;
153 virtual void Backprop(const std::string &debug_info,
154 const ComponentPrecomputedIndexes *indexes,
155 const CuMatrixBase<BaseFloat> &in_value,
156 const CuMatrixBase<BaseFloat> &out_value,
157 const CuMatrixBase<BaseFloat> &out_deriv,
158 Component *to_update,
159 CuMatrixBase<BaseFloat> *in_deriv) const;
160 virtual Component* Copy() const { return new ElementwiseProductComponent(input_dim_,
161 output_dim_); }
163 virtual void Read(std::istream &is, bool binary); // This Read function
164 // requires that the Component has the correct type.
166 /// Write component to stream
167 virtual void Write(std::ostream &os, bool binary) const;
169 protected:
170 int32 input_dim_;
171 int32 output_dim_;
172 };
174 class NormalizeComponent: public Component {
175 public:
176 void Init(int32 input_dim, BaseFloat target_rms, bool add_log_stddev);
177 explicit NormalizeComponent(int32 input_dim,
178 BaseFloat target_rms = 1.0,
179 bool add_log_stddev = false) {
180 Init(input_dim, target_rms, add_log_stddev);
181 }
182 explicit NormalizeComponent(const NormalizeComponent &other);
183 virtual int32 Properties() const {
184 return (add_log_stddev_ ?
185 kSimpleComponent|kBackpropNeedsInput|kBackpropAdds :
186 kSimpleComponent|kBackpropNeedsInput|kPropagateInPlace|
187 kBackpropAdds|kBackpropInPlace);
188 }
189 NormalizeComponent(): target_rms_(1.0), add_log_stddev_(false) { }
190 virtual std::string Type() const { return "NormalizeComponent"; }
191 virtual void InitFromConfig(ConfigLine *cfl);
192 virtual Component* Copy() const { return new NormalizeComponent(*this); }
193 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
194 const CuMatrixBase<BaseFloat> &in,
195 CuMatrixBase<BaseFloat> *out) const;
196 virtual void Backprop(const std::string &debug_info,
197 const ComponentPrecomputedIndexes *indexes,
198 const CuMatrixBase<BaseFloat> &in_value,
199 const CuMatrixBase<BaseFloat> &, // out_value
200 const CuMatrixBase<BaseFloat> &out_deriv,
201 Component *to_update,
202 CuMatrixBase<BaseFloat> *in_deriv) const;
204 virtual void Read(std::istream &is, bool binary);
205 virtual void Write(std::ostream &os, bool binary) const;
206 virtual int32 InputDim() const { return input_dim_; }
207 virtual int32 OutputDim() const {
208 return (input_dim_ + (add_log_stddev_ ? 1 : 0));
209 }
210 virtual std::string Info() const;
211 private:
212 NormalizeComponent &operator = (const NormalizeComponent &other); // Disallow.
213 enum { kExpSquaredNormFloor = -66 };
214 static const BaseFloat kSquaredNormFloor;
215 int32 input_dim_;
216 BaseFloat target_rms_; // The target rms for outputs.
217 // about 0.7e-20. We need a value that's exactly representable in
218 // float and whose inverse square root is also exactly representable
219 // in float (hence, an even power of two).
221 bool add_log_stddev_; // If true, log(max(epsi, sqrt(row_in^T row_in / D)))
222 // is an extra dimension of the output.
223 };
226 class SigmoidComponent: public NonlinearComponent {
227 public:
228 explicit SigmoidComponent(const SigmoidComponent &other): NonlinearComponent(other) { }
229 SigmoidComponent() { }
230 virtual std::string Type() const { return "SigmoidComponent"; }
231 virtual int32 Properties() const {
232 return kSimpleComponent|kBackpropNeedsOutput|kPropagateInPlace|kStoresStats;
233 }
234 virtual Component* Copy() const { return new SigmoidComponent(*this); }
235 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
236 const CuMatrixBase<BaseFloat> &in,
237 CuMatrixBase<BaseFloat> *out) const;
238 virtual void Backprop(const std::string &debug_info,
239 const ComponentPrecomputedIndexes *indexes,
240 const CuMatrixBase<BaseFloat> &, //in_value
241 const CuMatrixBase<BaseFloat> &out_value,
242 const CuMatrixBase<BaseFloat> &out_deriv,
243 Component *to_update,
244 CuMatrixBase<BaseFloat> *in_deriv) const;
245 virtual void StoreStats(const CuMatrixBase<BaseFloat> &out_value);
246 private:
247 // this function is called from Backprop code and only does something if the
248 // self-repair-scale config value is set.
249 void RepairGradients(const CuMatrixBase<BaseFloat> &out_value,
250 CuMatrixBase<BaseFloat> *in_deriv) const;
252 SigmoidComponent &operator = (const SigmoidComponent &other); // Disallow.
253 };
255 class TanhComponent: public NonlinearComponent {
256 public:
257 explicit TanhComponent(const TanhComponent &other): NonlinearComponent(other) { }
258 TanhComponent() { }
259 virtual std::string Type() const { return "TanhComponent"; }
260 virtual Component* Copy() const { return new TanhComponent(*this); }
261 virtual int32 Properties() const {
262 return kSimpleComponent|kBackpropNeedsOutput|kPropagateInPlace|kStoresStats;
263 }
264 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
265 const CuMatrixBase<BaseFloat> &in,
266 CuMatrixBase<BaseFloat> *out) const;
267 virtual void Backprop(const std::string &debug_info,
268 const ComponentPrecomputedIndexes *indexes,
269 const CuMatrixBase<BaseFloat> &, //in_value
270 const CuMatrixBase<BaseFloat> &out_value,
271 const CuMatrixBase<BaseFloat> &out_deriv,
272 Component *to_update,
273 CuMatrixBase<BaseFloat> *in_deriv) const;
274 virtual void StoreStats(const CuMatrixBase<BaseFloat> &out_value);
275 private:
276 // this function is called from Backprop code and only does something if the
277 // self-repair-scale config value is set.
278 void RepairGradients(const CuMatrixBase<BaseFloat> &out_value,
279 CuMatrixBase<BaseFloat> *in_deriv) const;
281 TanhComponent &operator = (const TanhComponent &other); // Disallow.
282 };
285 class RectifiedLinearComponent: public NonlinearComponent {
286 public:
287 explicit RectifiedLinearComponent(const RectifiedLinearComponent &other):
288 NonlinearComponent(other) { }
289 RectifiedLinearComponent() { }
290 virtual std::string Type() const { return "RectifiedLinearComponent"; }
291 virtual Component* Copy() const { return new RectifiedLinearComponent(*this); }
292 virtual int32 Properties() const {
293 return kSimpleComponent|kLinearInInput|kBackpropNeedsOutput|kPropagateInPlace|
294 kStoresStats;
295 }
296 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
297 const CuMatrixBase<BaseFloat> &in,
298 CuMatrixBase<BaseFloat> *out) const;
299 virtual void Backprop(const std::string &debug_info,
300 const ComponentPrecomputedIndexes *indexes,
301 const CuMatrixBase<BaseFloat> &, //in_value
302 const CuMatrixBase<BaseFloat> &out_value,
303 const CuMatrixBase<BaseFloat> &out_deriv,
304 Component *to_update,
305 CuMatrixBase<BaseFloat> *in_deriv) const;
306 virtual void StoreStats(const CuMatrixBase<BaseFloat> &out_value);
308 private:
309 // this function is called from Backprop code and only does something if the
310 // self-repair-scale config value is set.
311 void RepairGradients(CuMatrixBase<BaseFloat> *in_deriv) const;
313 RectifiedLinearComponent &operator = (const RectifiedLinearComponent &other); // Disallow.
314 };
316 /**
317 This component is a fixed (non-trainable) nonlinearity that sums its inputs
318 to produce outputs. Currently the only supported configuration is that its
319 input-dim is interpreted as consisting of n blocks, and the output is just a
320 summation over the n blocks, where n = input-dim / output-dim, so for instance
321 output[n] = input[n] + input[block-size + n] + .... .
322 Later if needed we can add a configuration variable that allows you to sum
323 over 'interleaved' input.
324 */
325 class SumReduceComponent: public Component {
326 public:
327 void Init(int32 input_dim, int32 output_dim);
328 explicit SumReduceComponent(int32 input_dim, int32 output_dim) {
329 Init(input_dim, output_dim);
330 }
331 virtual int32 Properties() const {
332 return kSimpleComponent|kLinearInInput;
333 }
334 SumReduceComponent(): input_dim_(0), output_dim_(0) { }
335 virtual std::string Type() const { return "SumReduceComponent"; }
336 virtual void InitFromConfig(ConfigLine *cfl);
337 virtual int32 InputDim() const { return input_dim_; }
338 virtual int32 OutputDim() const { return output_dim_; }
339 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
340 const CuMatrixBase<BaseFloat> &in,
341 CuMatrixBase<BaseFloat> *out) const;
342 virtual void Backprop(const std::string &debug_info,
343 const ComponentPrecomputedIndexes *indexes,
344 const CuMatrixBase<BaseFloat> &, // in_value
345 const CuMatrixBase<BaseFloat> &, // out_value,
346 const CuMatrixBase<BaseFloat> &out_deriv,
347 Component *, // to_update
348 CuMatrixBase<BaseFloat> *in_deriv) const;
349 virtual Component* Copy() const { return new SumReduceComponent(input_dim_,
350 output_dim_); }
352 virtual void Read(std::istream &is, bool binary); // This Read function
353 // requires that the Component has the correct type.
355 /// Write component to stream
356 virtual void Write(std::ostream &os, bool binary) const;
358 protected:
359 int32 input_dim_;
360 int32 output_dim_;
361 };
364 class FixedAffineComponent;
365 class FixedScaleComponent;
366 class PerElementScaleComponent;
367 class PerElementOffsetComponent;
369 // Affine means a linear function plus an offset.
370 // Note: although this class can be instantiated, it also
371 // functions as a base-class for more specialized versions of
372 // AffineComponent.
373 class AffineComponent: public UpdatableComponent {
374 friend class SoftmaxComponent; // Friend declaration relates to mixing up.
375 public:
377 virtual int32 InputDim() const { return linear_params_.NumCols(); }
378 virtual int32 OutputDim() const { return linear_params_.NumRows(); }
380 virtual std::string Info() const;
381 virtual void InitFromConfig(ConfigLine *cfl);
383 AffineComponent() { } // use Init to really initialize.
384 virtual std::string Type() const { return "AffineComponent"; }
385 virtual int32 Properties() const {
386 return kSimpleComponent|kUpdatableComponent|kLinearInParameters|
387 kBackpropNeedsInput|kBackpropAdds;
388 }
391 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
392 const CuMatrixBase<BaseFloat> &in,
393 CuMatrixBase<BaseFloat> *out) const;
394 virtual void Backprop(const std::string &debug_info,
395 const ComponentPrecomputedIndexes *indexes,
396 const CuMatrixBase<BaseFloat> &in_value,
397 const CuMatrixBase<BaseFloat> &, // out_value
398 const CuMatrixBase<BaseFloat> &out_deriv,
399 Component *to_update,
400 CuMatrixBase<BaseFloat> *in_deriv) const;
402 virtual void Read(std::istream &is, bool binary);
403 virtual void Write(std::ostream &os, bool binary) const;
405 virtual Component* Copy() const;
408 // Some functions from base-class UpdatableComponent.
409 virtual void Scale(BaseFloat scale);
410 virtual void Add(BaseFloat alpha, const Component &other);
411 virtual void SetZero(bool treat_as_gradient);
412 virtual void PerturbParams(BaseFloat stddev);
413 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
414 virtual int32 NumParameters() const;
415 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
416 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
418 // Some functions that are specific to this class.
420 // This new function is used when mixing up:
421 virtual void SetParams(const VectorBase<BaseFloat> &bias,
422 const MatrixBase<BaseFloat> &linear);
423 const CuVector<BaseFloat> &BiasParams() const { return bias_params_; }
424 const CuMatrix<BaseFloat> &LinearParams() const { return linear_params_; }
425 explicit AffineComponent(const AffineComponent &other);
426 // The next constructor is used in converting from nnet1.
427 AffineComponent(const CuMatrixBase<BaseFloat> &linear_params,
428 const CuVectorBase<BaseFloat> &bias_params,
429 BaseFloat learning_rate);
430 void Init(int32 input_dim, int32 output_dim,
431 BaseFloat param_stddev, BaseFloat bias_stddev);
432 void Init(std::string matrix_filename);
434 // This function resizes the dimensions of the component, setting the
435 // parameters to zero, while leaving any other configuration values the same.
436 virtual void Resize(int32 input_dim, int32 output_dim);
438 // The following functions are used for collapsing multiple layers
439 // together. They return a pointer to a new Component equivalent to
440 // the sequence of two components. We haven't implemented this for
441 // FixedLinearComponent yet.
442 Component *CollapseWithNext(const AffineComponent &next) const ;
443 Component *CollapseWithNext(const FixedAffineComponent &next) const;
444 Component *CollapseWithNext(const FixedScaleComponent &next) const;
445 Component *CollapseWithPrevious(const FixedAffineComponent &prev) const;
447 protected:
448 friend class NaturalGradientAffineComponent;
449 // This function Update() is for extensibility; child classes may override
450 // this, e.g. for natural gradient update.
451 virtual void Update(
452 const std::string &debug_info,
453 const CuMatrixBase<BaseFloat> &in_value,
454 const CuMatrixBase<BaseFloat> &out_deriv) {
455 UpdateSimple(in_value, out_deriv);
456 }
457 // UpdateSimple is used when *this is a gradient. Child classes may override
458 // this if needed, but typically won't need to.
459 virtual void UpdateSimple(
460 const CuMatrixBase<BaseFloat> &in_value,
461 const CuMatrixBase<BaseFloat> &out_deriv);
463 const AffineComponent &operator = (const AffineComponent &other); // Disallow.
464 CuMatrix<BaseFloat> linear_params_;
465 CuVector<BaseFloat> bias_params_;
466 };
468 class RepeatedAffineComponent;
470 /// This class implements an affine transform using a block diagonal matrix
471 /// e.g., one whose weight matrix is all zeros except for blocks on the
472 /// diagonal. All these blocks have the same dimensions.
473 /// input-dim: num cols of block diagonal matrix.
474 /// output-dim: num rows of block diagonal matrix.
475 /// num-blocks: number of blocks in diagonal of the matrix.
476 /// num-blocks must divide both input-dim and output-dim
477 class BlockAffineComponent : public UpdatableComponent {
478 public:
479 virtual int32 InputDim() const { return linear_params_.NumCols() * num_blocks_; }
480 virtual int32 OutputDim() const { return linear_params_.NumRows(); }
482 virtual std::string Info() const;
483 virtual void InitFromConfig(ConfigLine *cfl);
485 BlockAffineComponent() { }
486 virtual std::string Type() const { return "BlockAffineComponent"; }
487 virtual int32 Properties() const {
488 return kSimpleComponent|kUpdatableComponent|kLinearInParameters|
489 kBackpropNeedsInput|kBackpropAdds;
490 }
492 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
493 const CuMatrixBase<BaseFloat> &in,
494 CuMatrixBase<BaseFloat> *out) const;
496 virtual void Backprop(const std::string &debug_info,
497 const ComponentPrecomputedIndexes *indexes,
498 const CuMatrixBase<BaseFloat> &in_value,
499 const CuMatrixBase<BaseFloat> &, // out_value
500 const CuMatrixBase<BaseFloat> &out_deriv,
501 Component *to_update,
502 CuMatrixBase<BaseFloat> *in_deriv) const;
504 virtual void Read(std::istream &is, bool binary);
505 virtual void Write(std::ostream &os, bool binary) const;
507 virtual Component* Copy() const;
509 // Functions from base-class UpdatableComponent.
510 virtual void Scale(BaseFloat scale);
511 virtual void Add(BaseFloat alpha, const Component &other);
512 virtual void SetZero(bool treat_as_gradient);
513 virtual void PerturbParams(BaseFloat stddev);
514 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
515 virtual int32 NumParameters() const;
516 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
517 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
519 // BlockAffine-specific functions.
520 void Init(int32 input_dim, int32 output_dim, int32 num_blocks,
521 BaseFloat param_stddev, BaseFloat bias_mean,
522 BaseFloat bias_stddev);
523 explicit BlockAffineComponent(const BlockAffineComponent &other);
524 explicit BlockAffineComponent(const RepeatedAffineComponent &rac);
525 protected:
526 // The matrix linear_params_ has a block structure, with num_blocks_ blocks of
527 // equal size. The blocks are stored in linear_params_ as
528 // [ M
529 // N
530 // O ] but we actually treat it as the matrix:
531 // [ M 0 0
532 // 0 N 0
533 // 0 0 O ]
534 CuMatrix<BaseFloat> linear_params_;
535 CuVector<BaseFloat> bias_params_;
536 int32 num_blocks_;
537 private:
538 const BlockAffineComponent &operator = (const BlockAffineComponent &other); // Disallow.
539 };
541 class RepeatedAffineComponent: public UpdatableComponent {
542 public:
544 virtual int32 InputDim() const { return linear_params_.NumCols() * num_repeats_; }
545 virtual int32 OutputDim() const { return linear_params_.NumRows() * num_repeats_; }
547 virtual std::string Info() const;
548 virtual void InitFromConfig(ConfigLine *cfl);
550 RepeatedAffineComponent() { } // use Init to really initialize.
551 virtual std::string Type() const { return "RepeatedAffineComponent"; }
552 virtual int32 Properties() const {
553 return kSimpleComponent|kUpdatableComponent|kLinearInParameters|
554 kBackpropNeedsInput|kBackpropAdds|kInputContiguous|kOutputContiguous;
555 }
556 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
557 const CuMatrixBase<BaseFloat> &in,
558 CuMatrixBase<BaseFloat> *out) const;
559 virtual void Backprop(const std::string &debug_info,
560 const ComponentPrecomputedIndexes *indexes,
561 const CuMatrixBase<BaseFloat> &in_value,
562 const CuMatrixBase<BaseFloat> &, // out_value
563 const CuMatrixBase<BaseFloat> &out_deriv,
564 Component *to_update,
565 CuMatrixBase<BaseFloat> *in_deriv) const;
567 virtual void Read(std::istream &is, bool binary);
568 virtual void Write(std::ostream &os, bool binary) const;
570 virtual Component* Copy() const;
572 // Some functions from base-class UpdatableComponent.
573 virtual void Scale(BaseFloat scale);
574 virtual void Add(BaseFloat alpha, const Component &other);
575 virtual void SetZero(bool treat_as_gradient);
576 virtual void PerturbParams(BaseFloat stddev);
577 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
578 virtual int32 NumParameters() const;
579 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
580 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
582 // Some functions that are specific to this class.
583 const CuVector<BaseFloat> &BiasParams() const { return bias_params_; }
584 const CuMatrix<BaseFloat> &LinearParams() const { return linear_params_; }
585 explicit RepeatedAffineComponent(const RepeatedAffineComponent &other);
587 void Init(int32 input_dim, int32 output_dim, int32 num_repeats,
588 BaseFloat param_stddev, BaseFloat bias_mean,
589 BaseFloat bias_stddev);
590 friend BlockAffineComponent::BlockAffineComponent(const RepeatedAffineComponent &rac);
591 protected:
592 // This function Update(), called from backprop, is broken out for
593 // extensibility to natural gradient update.
594 virtual void Update(
595 const CuMatrixBase<BaseFloat> &in_value,
596 const CuMatrixBase<BaseFloat> &out_deriv);
598 // This function does nothing here but is redefined in child-class
599 // NaturalGradientRepeatedAffineComponent. This help avoid repeated code.
600 virtual void SetNaturalGradientConfigs() { }
602 const RepeatedAffineComponent &operator = (const RepeatedAffineComponent &other); // Disallow.
603 CuMatrix<BaseFloat> linear_params_;
604 CuVector<BaseFloat> bias_params_;
605 int32 num_repeats_;
606 };
608 class NaturalGradientRepeatedAffineComponent: public RepeatedAffineComponent {
609 public:
610 // Use Init() to really initialize.
611 NaturalGradientRepeatedAffineComponent() { }
613 // Most of the public functions are inherited from RepeatedAffineComponent.
614 virtual std::string Type() const {
615 return "NaturalGradientRepeatedAffineComponent";
616 }
618 virtual Component* Copy() const;
620 // Copy constructor
621 explicit NaturalGradientRepeatedAffineComponent(
622 const NaturalGradientRepeatedAffineComponent &other);
623 private:
624 virtual void Update(
625 const CuMatrixBase<BaseFloat> &in_value,
626 const CuMatrixBase<BaseFloat> &out_deriv);
628 const NaturalGradientRepeatedAffineComponent &operator=(
629 const NaturalGradientRepeatedAffineComponent &other); // Disallow.
631 // Applies the default configuration to preconditioner_in_.
632 virtual void SetNaturalGradientConfigs();
634 // For efficiency reasons we only apply the natural gradient to the input
635 // side, i.e. not to the space of output derivatives-- we believe the input
636 // side is the more important side. We don't make the natural-gradient
637 // configurable; we just give it a reasonable configuration.
638 // Instead of using the individual data-points, for efficiency reasons we use
639 // the distribution of per-minibatch summed derivatives over each dimension of
640 // the output space, as the source for the Fisher matrix.
641 OnlineNaturalGradient preconditioner_in_;
642 };
644 class SoftmaxComponent: public NonlinearComponent {
645 public:
646 explicit SoftmaxComponent(const SoftmaxComponent &other):
647 NonlinearComponent(other) { }
648 SoftmaxComponent() { }
649 virtual std::string Type() const { return "SoftmaxComponent"; }
650 virtual int32 Properties() const {
651 return kSimpleComponent|kBackpropNeedsOutput|kStoresStats;
652 }
653 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
654 const CuMatrixBase<BaseFloat> &in,
655 CuMatrixBase<BaseFloat> *out) const;
656 virtual void Backprop(const std::string &debug_info,
657 const ComponentPrecomputedIndexes *indexes,
658 const CuMatrixBase<BaseFloat> &in_value,
659 const CuMatrixBase<BaseFloat> &out_value,
660 const CuMatrixBase<BaseFloat> &out_deriv,
661 Component *to_update,
662 CuMatrixBase<BaseFloat> *in_deriv) const;
663 virtual void StoreStats(const CuMatrixBase<BaseFloat> &out_value);
665 virtual Component* Copy() const { return new SoftmaxComponent(*this); }
666 private:
667 SoftmaxComponent &operator = (const SoftmaxComponent &other); // Disallow.
668 };
670 class LogSoftmaxComponent: public NonlinearComponent {
671 public:
672 explicit LogSoftmaxComponent(const LogSoftmaxComponent &other):
673 NonlinearComponent(other) { }
674 LogSoftmaxComponent() { }
675 virtual std::string Type() const { return "LogSoftmaxComponent"; }
676 virtual int32 Properties() const {
677 return kSimpleComponent|kBackpropNeedsOutput|kStoresStats;
678 }
679 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
680 const CuMatrixBase<BaseFloat> &in,
681 CuMatrixBase<BaseFloat> *out) const;
682 virtual void Backprop(const std::string &debug_info,
683 const ComponentPrecomputedIndexes *indexes,
684 const CuMatrixBase<BaseFloat> &in_value,
685 const CuMatrixBase<BaseFloat> &out_value,
686 const CuMatrixBase<BaseFloat> &out_deriv,
687 Component *to_update,
688 CuMatrixBase<BaseFloat> *in_deriv) const;
690 virtual Component* Copy() const { return new LogSoftmaxComponent(*this); }
691 private:
692 LogSoftmaxComponent &operator = (const LogSoftmaxComponent &other); // Disallow.
693 };
695 /// Keywords: natural gradient descent, NG-SGD, naturalgradient. For
696 /// the top-level of the natural gradient code look here, and also in
697 /// nnet-precondition-online.h.
698 /// NaturalGradientAffineComponent is
699 /// a version of AffineComponent that has a non-(multiple of unit) learning-rate
700 /// matrix. See nnet-precondition-online.h for a description of the technique.
701 /// It is described, under the name Online NG-SGD, in the paper "Parallel
702 /// training of DNNs with Natural Gradient and Parameter Averaging" (ICLR
703 /// workshop, 2015) by Daniel Povey, Xiaohui Zhang and Sanjeev Khudanpur.
704 class NaturalGradientAffineComponent: public AffineComponent {
705 public:
706 virtual std::string Type() const { return "NaturalGradientAffineComponent"; }
707 virtual void Read(std::istream &is, bool binary);
708 virtual void Write(std::ostream &os, bool binary) const;
709 void Init(int32 input_dim, int32 output_dim,
710 BaseFloat param_stddev, BaseFloat bias_stddev, BaseFloat bias_mean,
711 int32 rank_in, int32 rank_out, int32 update_period,
712 BaseFloat num_samples_history, BaseFloat alpha,
713 BaseFloat max_change_per_sample);
714 void Init(int32 rank_in, int32 rank_out, int32 update_period,
715 BaseFloat num_samples_history,
716 BaseFloat alpha, BaseFloat max_change_per_sample,
717 std::string matrix_filename);
718 // this constructor does not really initialize, use Init() or Read().
719 NaturalGradientAffineComponent();
720 virtual void Resize(int32 input_dim, int32 output_dim);
721 virtual void InitFromConfig(ConfigLine *cfl);
722 virtual std::string Info() const;
723 virtual Component* Copy() const;
724 virtual void Scale(BaseFloat scale);
725 virtual void Add(BaseFloat alpha, const Component &other);
726 // copy constructor
727 explicit NaturalGradientAffineComponent(
728 const NaturalGradientAffineComponent &other);
729 virtual void ZeroStats();
731 private:
732 // disallow assignment operator.
733 NaturalGradientAffineComponent &operator= (
734 const NaturalGradientAffineComponent&);
736 // Configs for preconditioner. The input side tends to be better conditioned ->
737 // smaller rank needed, so make them separately configurable.
738 int32 rank_in_;
739 int32 rank_out_;
740 int32 update_period_;
741 BaseFloat num_samples_history_;
742 BaseFloat alpha_;
744 OnlineNaturalGradient preconditioner_in_;
746 OnlineNaturalGradient preconditioner_out_;
748 // If > 0, max_change_per_sample_ is the maximum amount of parameter
749 // change (in L2 norm) that we allow per sample, averaged over the minibatch.
750 // This was introduced in order to control instability.
751 // Instead of the exact L2 parameter change, for
752 // efficiency purposes we limit a bound on the exact
753 // change. The limit is applied via a constant <= 1.0
754 // for each minibatch, A suitable value might be, for
755 // example, 10 or so; larger if there are more
756 // parameters.
757 BaseFloat max_change_per_sample_;
759 // update_count_ records how many updates we have done.
760 double update_count_;
762 // active_scaling_count_ records how many updates we have done,
763 // where the scaling factor is active (not 1.0).
764 double active_scaling_count_;
766 // max_change_scale_stats_ records the sum of scaling factors
767 // in each update, so we can compute the averaged scaling factor
768 // in Info().
769 double max_change_scale_stats_;
771 // Sets the configs rank, alpha and eta in the preconditioner objects,
772 // from the class variables.
773 void SetNaturalGradientConfigs();
775 virtual void Update(
776 const std::string &debug_info,
777 const CuMatrixBase<BaseFloat> &in_value,
778 const CuMatrixBase<BaseFloat> &out_deriv);
779 };
782 /// FixedAffineComponent is an affine transform that is supplied
783 /// at network initialization time and is not trainable.
784 class FixedAffineComponent: public Component {
785 public:
786 FixedAffineComponent() { }
787 virtual std::string Type() const { return "FixedAffineComponent"; }
788 virtual std::string Info() const;
790 // Copy constructor from AffineComponent-- can be used when we're done
791 // training a particular part of the model and want to efficiently disable
792 // further training.
793 FixedAffineComponent(const AffineComponent &c);
795 /// matrix should be of size input-dim+1 to output-dim, last col is offset
796 void Init(const CuMatrixBase<BaseFloat> &matrix);
798 // The ConfigLine cfl contains just the option matrix=<string>,
799 // where the string is the filename of a Kaldi-format matrix to read.
800 virtual void InitFromConfig(ConfigLine *cfl);
802 virtual int32 Properties() const { return kSimpleComponent|kBackpropAdds; }
803 virtual int32 InputDim() const { return linear_params_.NumCols(); }
804 virtual int32 OutputDim() const { return linear_params_.NumRows(); }
806 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
807 const CuMatrixBase<BaseFloat> &in,
808 CuMatrixBase<BaseFloat> *out) const;
809 virtual void Backprop(const std::string &debug_info,
810 const ComponentPrecomputedIndexes *indexes,
811 const CuMatrixBase<BaseFloat> &in_value,
812 const CuMatrixBase<BaseFloat> &, // out_value
813 const CuMatrixBase<BaseFloat> &out_deriv,
814 Component *to_update,
815 CuMatrixBase<BaseFloat> *in_deriv) const;
818 virtual Component* Copy() const;
819 virtual void Read(std::istream &is, bool binary);
820 virtual void Write(std::ostream &os, bool binary) const;
822 // Function to provide access to linear_params_.
823 const CuMatrix<BaseFloat> &LinearParams() const { return linear_params_; }
824 protected:
825 friend class AffineComponent;
826 CuMatrix<BaseFloat> linear_params_;
827 CuVector<BaseFloat> bias_params_;
829 KALDI_DISALLOW_COPY_AND_ASSIGN(FixedAffineComponent);
830 };
832 /// SumGroupComponent is used to sum up groups of posteriors.
833 /// It's used to introduce a kind of Gaussian-mixture-model-like
834 /// idea into neural nets. This is basically a degenerate case of
835 /// MixtureProbComponent; we had to implement it separately to
836 /// be efficient for CUDA (we can use this one regardless whether
837 /// we have CUDA or not; it's the normal case we want anyway).
838 ///
839 /// There are two forms of initialization in a config file: one
840 /// where the number of elements are specified for each group
841 /// individually as a vector, and one where only the total input
842 /// dimension and the output dimension (number of groups) is specified.
843 /// The second is used when all groups have the same size.
844 class SumGroupComponent: public Component {
845 public:
846 virtual int32 InputDim() const { return input_dim_; }
847 virtual int32 OutputDim() const { return output_dim_; }
848 void Init(const std::vector<int32> &sizes); // the vector is of the input dim
849 // (>= 1) for each output dim.
850 void Init(int32 input_dim, int32 output_dim);
851 void GetSizes(std::vector<int32> *sizes) const; // Get a vector saying, for
852 // each output-dim, how many
853 // inputs were summed over.
854 virtual void InitFromConfig(ConfigLine *cfl);
855 SumGroupComponent() { }
856 virtual std::string Type() const { return "SumGroupComponent"; }
857 virtual int32 Properties() const { return kSimpleComponent|kLinearInInput; }
858 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
859 const CuMatrixBase<BaseFloat> &in,
860 CuMatrixBase<BaseFloat> *out) const;
861 virtual void Backprop(const std::string &debug_info,
862 const ComponentPrecomputedIndexes *indexes,
863 const CuMatrixBase<BaseFloat> &in_value,
864 const CuMatrixBase<BaseFloat> &, // out_value
865 const CuMatrixBase<BaseFloat> &out_deriv,
866 Component *to_update,
867 CuMatrixBase<BaseFloat> *in_deriv) const;
868 virtual Component* Copy() const;
869 virtual void Read(std::istream &is, bool binary);
870 virtual void Write(std::ostream &os, bool binary) const;
872 private:
873 KALDI_DISALLOW_COPY_AND_ASSIGN(SumGroupComponent);
874 // Note: Int32Pair is just struct{ int32 first; int32 second }; it's defined
875 // in cu-matrixdim.h as extern "C" which is needed for the CUDA interface.
876 CuArray<Int32Pair> indexes_; // for each output index, the (start, end) input
877 // index.
878 CuArray<int32> reverse_indexes_; // for each input index, the output index.
879 int32 input_dim_;
880 int32 output_dim_;
881 };
884 /// FixedScaleComponent applies a fixed per-element scale; it's similar
885 /// to the Rescale component in the nnet1 setup (and only needed for nnet1
886 /// model conversion).
887 class FixedScaleComponent: public Component {
888 public:
889 FixedScaleComponent() { }
890 virtual std::string Type() const { return "FixedScaleComponent"; }
891 virtual std::string Info() const;
892 virtual int32 Properties() const {
893 return kSimpleComponent|kLinearInInput|kPropagateInPlace|kBackpropInPlace;
894 }
896 void Init(const CuVectorBase<BaseFloat> &scales);
898 // The ConfigLine cfl contains only the option scales=<string>,
899 // where the string is the filename of a Kaldi-format matrix to read.
900 virtual void InitFromConfig(ConfigLine *cfl);
902 virtual int32 InputDim() const { return scales_.Dim(); }
903 virtual int32 OutputDim() const { return scales_.Dim(); }
905 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
906 const CuMatrixBase<BaseFloat> &in,
907 CuMatrixBase<BaseFloat> *out) const;
908 virtual void Backprop(const std::string &debug_info,
909 const ComponentPrecomputedIndexes *indexes,
910 const CuMatrixBase<BaseFloat> &, // in_value
911 const CuMatrixBase<BaseFloat> &, // out_value
912 const CuMatrixBase<BaseFloat> &out_deriv,
913 Component *, // to_update
914 CuMatrixBase<BaseFloat> *in_deriv) const;
915 virtual Component* Copy() const;
916 virtual void Read(std::istream &is, bool binary);
917 virtual void Write(std::ostream &os, bool binary) const;
919 protected:
920 friend class AffineComponent; // necessary for collapse
921 CuVector<BaseFloat> scales_;
922 KALDI_DISALLOW_COPY_AND_ASSIGN(FixedScaleComponent);
923 };
926 /// FixedBiasComponent applies a fixed per-element bias; it's similar
927 /// to the AddShift component in the nnet1 setup (and only needed for nnet1
928 /// model conversion.
929 class FixedBiasComponent: public Component {
930 public:
931 FixedBiasComponent() { }
932 virtual std::string Type() const { return "FixedBiasComponent"; }
933 virtual std::string Info() const;
935 virtual int32 Properties() const {
936 return kSimpleComponent|kPropagateInPlace|kBackpropInPlace;
937 }
939 void Init(const CuVectorBase<BaseFloat> &scales);
941 // The ConfigLine cfl contains only the option bias=<string>,
942 // where the string is the filename of a Kaldi-format matrix to read.
943 virtual void InitFromConfig(ConfigLine *cfl);
944 virtual int32 InputDim() const { return bias_.Dim(); }
945 virtual int32 OutputDim() const { return bias_.Dim(); }
946 using Component::Propagate; // to avoid name hiding
947 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
948 const CuMatrixBase<BaseFloat> &in,
949 CuMatrixBase<BaseFloat> *out) const;
950 virtual void Backprop(const std::string &debug_info,
951 const ComponentPrecomputedIndexes *indexes,
952 const CuMatrixBase<BaseFloat> &, // in_value,
953 const CuMatrixBase<BaseFloat> &, // out_value
954 const CuMatrixBase<BaseFloat> &out_deriv,
955 Component *, // to_update
956 CuMatrixBase<BaseFloat> *in_deriv) const;
957 virtual Component* Copy() const;
958 virtual void Read(std::istream &is, bool binary);
959 virtual void Write(std::ostream &os, bool binary) const;
961 protected:
962 CuVector<BaseFloat> bias_;
963 KALDI_DISALLOW_COPY_AND_ASSIGN(FixedBiasComponent);
964 };
966 // NoOpComponent just duplicates its input. We don't anticipate this being used
967 // very often, but it may sometimes make your life easier
968 class NoOpComponent: public NonlinearComponent {
969 public:
970 explicit NoOpComponent(const NoOpComponent &other): NonlinearComponent(other) { }
971 NoOpComponent() { }
972 virtual std::string Type() const { return "NoOpComponent"; }
973 virtual int32 Properties() const {
974 return kSimpleComponent|kLinearInInput|kPropagateInPlace;
975 }
976 virtual Component* Copy() const { return new NoOpComponent(*this); }
977 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
978 const CuMatrixBase<BaseFloat> &in,
979 CuMatrixBase<BaseFloat> *out) const;
980 virtual void Backprop(const std::string &debug_info,
981 const ComponentPrecomputedIndexes *indexes,
982 const CuMatrixBase<BaseFloat> &, //in_value
983 const CuMatrixBase<BaseFloat> &, // out_value,
984 const CuMatrixBase<BaseFloat> &out_deriv,
985 Component *to_update,
986 CuMatrixBase<BaseFloat> *in_deriv) const;
987 private:
988 NoOpComponent &operator = (const NoOpComponent &other); // Disallow.
989 };
991 // ClipGradientComponent just duplicates its input, but clips gradients
992 // during backpropagation if they cross a predetermined threshold.
993 // This component will be used to prevent gradient explosion problem in
994 // recurrent neural networks
995 class ClipGradientComponent: public Component {
996 public:
997 ClipGradientComponent(int32 dim, BaseFloat clipping_threshold,
998 bool norm_based_clipping,
999 BaseFloat self_repair_clipped_proportion_threshold,
1000 BaseFloat self_repair_target,
1001 BaseFloat self_repair_scale,
1002 int32 num_clipped,
1003 int32 count,
1004 int32 num_self_repaired,
1005 int32 num_backpropped) {
1006 Init(dim, clipping_threshold, norm_based_clipping,
1007 self_repair_clipped_proportion_threshold,
1008 self_repair_target,
1009 self_repair_scale,
1010 num_clipped, count,
1011 num_self_repaired, num_backpropped);}
1013 ClipGradientComponent(): dim_(0), clipping_threshold_(-1),
1014 norm_based_clipping_(false),
1015 self_repair_clipped_proportion_threshold_(1.0),
1016 self_repair_target_(0.0),
1017 self_repair_scale_(0.0),
1018 num_clipped_(0), count_(0),
1019 num_self_repaired_(0), num_backpropped_(0) { }
1021 virtual int32 InputDim() const { return dim_; }
1022 virtual int32 OutputDim() const { return dim_; }
1023 virtual void InitFromConfig(ConfigLine *cfl);
1024 void Init(int32 dim, BaseFloat clipping_threshold, bool norm_based_clipping,
1025 BaseFloat self_repair_clipped_proportion_threshold,
1026 BaseFloat self_repair_target,
1027 BaseFloat self_repair_scale,
1028 int32 num_clipped, int32 count,
1029 int32 num_self_repaired, int32 num_backpropped);
1031 virtual std::string Type() const { return "ClipGradientComponent"; }
1033 virtual int32 Properties() const {
1034 return kSimpleComponent|kLinearInInput|kPropagateInPlace|kBackpropInPlace|
1035 kBackpropNeedsInput;
1036 }
1038 virtual void ZeroStats();
1040 virtual Component* Copy() const {
1041 return new ClipGradientComponent(dim_,
1042 clipping_threshold_,
1043 norm_based_clipping_,
1044 self_repair_clipped_proportion_threshold_,
1045 self_repair_target_,
1046 self_repair_scale_,
1047 num_clipped_,
1048 count_,
1049 num_self_repaired_,
1050 num_backpropped_);}
1052 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1053 const CuMatrixBase<BaseFloat> &in,
1054 CuMatrixBase<BaseFloat> *out) const;
1055 virtual void Backprop(const std::string &debug_info,
1056 const ComponentPrecomputedIndexes *indexes,
1057 const CuMatrixBase<BaseFloat> &in_value,
1058 const CuMatrixBase<BaseFloat> &, // out_value,
1059 const CuMatrixBase<BaseFloat> &out_deriv,
1060 Component *to_update,
1061 CuMatrixBase<BaseFloat> *in_deriv) const;
1063 virtual void Scale(BaseFloat scale);
1064 virtual void Add(BaseFloat alpha, const Component &other);
1065 virtual void Read(std::istream &is, bool binary); // This Read function
1066 // requires that the Component has the correct type.
1067 /// Write component to stream
1068 virtual void Write(std::ostream &os, bool binary) const;
1069 virtual std::string Info() const;
1070 virtual ~ClipGradientComponent() {
1071 if (num_self_repaired_ > 0)
1072 KALDI_LOG << "ClipGradientComponent(node_name=" << debug_info_
1073 << ")'s self-repair was activated " << num_self_repaired_
1074 << " time(s) out of " << num_backpropped_
1075 << " times of calling Backprop() in this training job.";
1076 }
1077 private:
1078 int32 dim_; // input/output dimension
1079 BaseFloat clipping_threshold_; // threshold to be used for clipping
1080 // could correspond to max-row-norm (if
1081 // norm_based_clipping_ == true) or
1082 // max-absolute-value (otherwise)
1083 bool norm_based_clipping_; // if true the max-row-norm will be clipped
1084 // else element-wise absolute value clipping is
1085 // done
1087 // some configuration values relating to self-repairing.
1088 BaseFloat self_repair_clipped_proportion_threshold_; // the threshold of
1089 // clipped-proportion
1090 // for self-repair to be
1091 // activated
1092 BaseFloat self_repair_target_; // the target value towards which self-repair
1093 // is trying to set for in-deriv
1094 BaseFloat self_repair_scale_; // constant scaling the self-repair vector
1095 std::string debug_info_; // component-node name, used in the destructor to
1096 // print out stats of self-repair
1098 // this function is called from Backprop code, and only does something if the
1099 // self-repair-scale config value is set and the current clipped proportion
1100 // exceeds the threshold. What it does is to add a term to in-deriv that
1101 // forces the input to the ClipGradientComponent to be close to some small
1102 // value (e.g., 0.0 or 0.5, depending on what the input is, e.g.,
1103 // Sigmoid or Tanh or Affine). The hope is that if the input is forced to be
1104 // small, the parameters on the path will also tend to be small, which may
1105 // help tamp down the divergence caused by gradient explosion.
1106 void RepairGradients(const std::string &debug_info,
1107 const CuMatrixBase<BaseFloat> &in_value,
1108 CuMatrixBase<BaseFloat> *in_deriv,
1109 ClipGradientComponent *to_update) const;
1111 ClipGradientComponent &operator =
1112 (const ClipGradientComponent &other); // Disallow.
1114 protected:
1115 // variables to store stats
1116 // An element corresponds to rows of derivative matrix, when
1117 // norm_based_clipping_ is true,
1118 // else it corresponds to each element of the derivative matrix
1119 // Note: no stats are stored when norm_based_clipping_ is false
1120 int32 num_clipped_; // number of elements which were clipped
1121 int32 count_; // number of elements which were processed
1122 int32 num_self_repaired_; // number of times self-repair is activated
1123 int32 num_backpropped_; //number of times backprop is called
1125 };
1127 /** PermuteComponent changes the order of the columns (i.e. the feature or
1128 activation dimensions). Output dimension i is mapped to input dimension
1129 column_map_[i], so it's like doing:
1130 for each row:
1131 for each feature/activation dimension i:
1132 output(row, i) = input(row, column_map_[i]).
1134 */
1135 class PermuteComponent: public Component {
1136 public:
1137 PermuteComponent() {}
1138 PermuteComponent(const std::vector<int32> &column_map) { Init(column_map); }
1140 virtual int32 InputDim() const { return column_map_.Dim(); }
1141 virtual int32 OutputDim() const { return column_map_.Dim(); }
1142 virtual void InitFromConfig(ConfigLine *cfl);
1143 void Init(const std::vector<int32> &column_map);
1145 virtual std::string Type() const { return "PermuteComponent"; }
1147 virtual int32 Properties() const {
1148 return kSimpleComponent|kLinearInInput;
1149 }
1151 virtual void ZeroStats() {}
1153 virtual Component* Copy() const;
1155 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1156 const CuMatrixBase<BaseFloat> &in,
1157 CuMatrixBase<BaseFloat> *out) const;
1158 virtual void Backprop(const std::string &debug_info,
1159 const ComponentPrecomputedIndexes *indexes,
1160 const CuMatrixBase<BaseFloat> &, //in_value
1161 const CuMatrixBase<BaseFloat> &, // out_value,
1162 const CuMatrixBase<BaseFloat> &out_deriv,
1163 Component *to_update,
1164 CuMatrixBase<BaseFloat> *in_deriv) const;
1166 virtual void Scale(BaseFloat scale) {}
1167 virtual void Add(BaseFloat alpha, const Component &other) {}
1168 virtual void Read(std::istream &is, bool binary); // This Read function
1169 // requires that the Component has the correct type.
1170 /// Write component to stream
1171 virtual void Write(std::ostream &os, bool binary) const;
1172 virtual std::string Info() const;
1173 private:
1174 // computes the reverse column map. Must not be called if column_map_.Dim()
1175 // == 0
1176 void ComputeReverseColumnMap();
1177 CuArray<int32> column_map_;
1178 // the following is a derived variable, not written to disk.
1179 // It is used in backprop.
1180 CuArray<int32> reverse_column_map_;
1181 PermuteComponent &operator =
1182 (const PermuteComponent &other); // Disallow.
1183 };
1188 // PerElementScaleComponent scales each dimension of its input with a separate
1189 // trainable scale; it's like a linear component with a diagonal matrix.
1190 class PerElementScaleComponent: public UpdatableComponent {
1191 public:
1192 virtual int32 InputDim() const { return scales_.Dim(); }
1193 virtual int32 OutputDim() const { return scales_.Dim(); }
1195 virtual std::string Info() const;
1196 virtual void InitFromConfig(ConfigLine *cfl);
1198 PerElementScaleComponent() { } // use Init to really initialize.
1199 virtual std::string Type() const { return "PerElementScaleComponent"; }
1200 virtual int32 Properties() const {
1201 return kSimpleComponent|kUpdatableComponent|kLinearInInput|
1202 kLinearInParameters|kBackpropNeedsInput|kPropagateInPlace;
1203 }
1205 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1206 const CuMatrixBase<BaseFloat> &in,
1207 CuMatrixBase<BaseFloat> *out) const;
1208 virtual void Backprop(const std::string &debug_info,
1209 const ComponentPrecomputedIndexes *indexes,
1210 const CuMatrixBase<BaseFloat> &in_value,
1211 const CuMatrixBase<BaseFloat> &, // out_value
1212 const CuMatrixBase<BaseFloat> &out_deriv,
1213 Component *to_update,
1214 CuMatrixBase<BaseFloat> *in_deriv) const;
1216 virtual void Read(std::istream &is, bool binary);
1217 virtual void Write(std::ostream &os, bool binary) const;
1219 virtual Component* Copy() const;
1222 // Some functions from base-class UpdatableComponent.
1223 virtual void Scale(BaseFloat scale);
1224 virtual void Add(BaseFloat alpha, const Component &other);
1225 virtual void SetZero(bool treat_as_gradient);
1226 virtual void PerturbParams(BaseFloat stddev);
1227 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1228 virtual int32 NumParameters() const;
1229 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1230 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1232 // Some functions that are specific to this class.
1233 explicit PerElementScaleComponent(const PerElementScaleComponent &other);
1235 void Init(int32 dim, BaseFloat param_mean, BaseFloat param_stddev);
1236 void Init(std::string vector_filename);
1238 protected:
1239 friend class AffineComponent; // necessary for collapse
1240 // This function Update() is for extensibility; child classes may override
1241 // this, e.g. for natural gradient update.
1242 virtual void Update(
1243 const std::string &debug_info,
1244 const CuMatrixBase<BaseFloat> &in_value,
1245 const CuMatrixBase<BaseFloat> &out_deriv) {
1246 UpdateSimple(in_value, out_deriv);
1247 }
1248 // UpdateSimple is used when *this is a gradient. Child classes may override
1249 // this if needed, but typically won't need to.
1250 virtual void UpdateSimple(
1251 const CuMatrixBase<BaseFloat> &in_value,
1252 const CuMatrixBase<BaseFloat> &out_deriv);
1254 const PerElementScaleComponent &operator
1255 = (const PerElementScaleComponent &other); // Disallow.
1256 CuVector<BaseFloat> scales_;
1257 };
1260 // PerElementOffsetComponent offsets each dimension of its input with a separate
1261 // trainable bias; it's like an affine component with fixed weight matrix which is always equal to I.
1262 class PerElementOffsetComponent: public UpdatableComponent {
1263 public:
1264 virtual int32 InputDim() const { return offsets_.Dim(); }
1265 virtual int32 OutputDim() const { return offsets_.Dim(); }
1267 virtual std::string Info() const;
1268 virtual void InitFromConfig(ConfigLine *cfl);
1270 PerElementOffsetComponent() { } // use Init to really initialize.
1271 virtual std::string Type() const { return "PerElementOffsetComponent"; }
1272 virtual int32 Properties() const {
1273 return kSimpleComponent|kUpdatableComponent|
1274 kBackpropInPlace|kPropagateInPlace;
1275 }
1277 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1278 const CuMatrixBase<BaseFloat> &in,
1279 CuMatrixBase<BaseFloat> *out) const;
1280 virtual void Backprop(const std::string &debug_info,
1281 const ComponentPrecomputedIndexes *indexes,
1282 const CuMatrixBase<BaseFloat> &, // in_value
1283 const CuMatrixBase<BaseFloat> &, // out_value
1284 const CuMatrixBase<BaseFloat> &out_deriv,
1285 Component *to_update,
1286 CuMatrixBase<BaseFloat> *in_deriv) const;
1288 virtual void Read(std::istream &is, bool binary);
1289 virtual void Write(std::ostream &os, bool binary) const;
1291 virtual Component* Copy() const;
1294 // Some functions from base-class UpdatableComponent.
1295 virtual void Scale(BaseFloat scale);
1296 virtual void Add(BaseFloat alpha, const Component &other);
1297 virtual void SetZero(bool treat_as_gradient);
1298 virtual void PerturbParams(BaseFloat stddev);
1299 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1300 virtual int32 NumParameters() const;
1301 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1302 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1304 // Some functions that are specific to this class.
1305 explicit PerElementOffsetComponent(const PerElementOffsetComponent &other);
1307 void Init(int32 dim, BaseFloat param_mean,
1308 BaseFloat param_stddev);
1309 void Init(std::string vector_filename);
1311 protected:
1312 const PerElementOffsetComponent &operator
1313 = (const PerElementOffsetComponent &other); // Disallow.
1314 CuVector<BaseFloat> offsets_;
1315 };
1318 // ConstantFunctionComponent returns constant function of its input,
1319 // i.e. its output does not depend on its input. It is the same as
1320 // an affine component with the linear term fixed at zero.
1321 // It is optionally trainable, and optionally you can use natural
1322 // gradient. The input is required only because the framework
1323 // requires components to have an input.
1324 class ConstantFunctionComponent: public UpdatableComponent {
1325 public:
1326 virtual int32 InputDim() const { return input_dim_; }
1327 virtual int32 OutputDim() const { return output_.Dim(); }
1329 virtual std::string Info() const;
1330 // possible parameter values with their defaults:
1331 // input-dim=-1 is-updatable=true use-natural-gradient=true output-dim=-1
1332 // output-mean=0 output-stddev=0
1333 virtual void InitFromConfig(ConfigLine *cfl);
1335 ConstantFunctionComponent();
1337 ConstantFunctionComponent(const ConstantFunctionComponent &other);
1339 virtual std::string Type() const { return "ConstantFunctionComponent"; }
1340 virtual int32 Properties() const {
1341 return kSimpleComponent|
1342 (is_updatable_ ? kUpdatableComponent|kLinearInParameters : 0) |
1343 (InputDim() == OutputDim() ? kPropagateInPlace|kBackpropInPlace: 0) |
1344 kBackpropAdds;
1345 }
1346 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1347 const CuMatrixBase<BaseFloat> &in,
1348 CuMatrixBase<BaseFloat> *out) const;
1349 virtual void Backprop(const std::string &debug_info,
1350 const ComponentPrecomputedIndexes *indexes,
1351 const CuMatrixBase<BaseFloat> &, // in_value
1352 const CuMatrixBase<BaseFloat> &, // out_value
1353 const CuMatrixBase<BaseFloat> &out_deriv,
1354 Component *to_update,
1355 CuMatrixBase<BaseFloat> *in_deriv) const;
1357 virtual void Read(std::istream &is, bool binary);
1358 virtual void Write(std::ostream &os, bool binary) const;
1360 virtual Component* Copy() const;
1362 // Some functions from base-class UpdatableComponent.
1363 virtual void Scale(BaseFloat scale);
1364 virtual void Add(BaseFloat alpha, const Component &other);
1365 virtual void SetZero(bool treat_as_gradient);
1366 virtual void PerturbParams(BaseFloat stddev);
1367 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1368 virtual int32 NumParameters() const;
1369 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1370 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1371 private:
1372 int32 input_dim_;
1373 // the output value-- a vector.
1374 CuVector<BaseFloat> output_;
1376 bool is_updatable_;
1377 // if true, and if updatable, do natural-gradient update.
1378 bool use_natural_gradient_;
1379 OnlineNaturalGradient preconditioner_;
1381 const ConstantFunctionComponent &operator
1382 = (const ConstantFunctionComponent &other); // Disallow.
1383 };
1387 // NaturalGradientPerElementScaleComponent is like PerElementScaleComponent but
1388 // it uses a natural gradient update for the per-element scales, and enforces a
1389 // maximum amount of change per minibatch, for stability.
1390 class NaturalGradientPerElementScaleComponent: public PerElementScaleComponent {
1391 public:
1393 virtual std::string Info() const;
1395 virtual void InitFromConfig(ConfigLine *cfl);
1397 NaturalGradientPerElementScaleComponent() { } // use Init to really initialize.
1398 virtual std::string Type() const {
1399 return "NaturalGradientPerElementScaleComponent";
1400 }
1402 virtual void Read(std::istream &is, bool binary);
1403 virtual void Write(std::ostream &os, bool binary) const;
1405 virtual Component* Copy() const;
1407 // Some functions that are specific to this class:
1408 explicit NaturalGradientPerElementScaleComponent(
1409 const NaturalGradientPerElementScaleComponent &other);
1411 void Init(int32 dim, BaseFloat param_mean,
1412 BaseFloat param_stddev, int32 rank, int32 update_period,
1413 BaseFloat num_samples_history, BaseFloat alpha,
1414 BaseFloat max_change_per_minibatch);
1415 void Init(std::string vector_filename,
1416 int32 rank, int32 update_period, BaseFloat num_samples_history,
1417 BaseFloat alpha, BaseFloat max_change_per_minibatch);
1419 private:
1420 // configuration value for imposing max-change...
1421 BaseFloat max_change_per_minibatch_;
1423 // unlike the NaturalGradientAffineComponent, there is only one dimension to
1424 // consider as the parameters are a vector not a matrix, so we only need one
1425 // preconditioner.
1426 // The preconditioner stores its own configuration values; we write and read
1427 // these, but not the preconditioner object itself.
1428 OnlineNaturalGradient preconditioner_;
1430 // Override of the parent-class Update() function, called only
1431 // if this->is_gradient_ = false; this implements the natural
1432 // gradient update.
1433 virtual void Update(
1434 const std::string &debug_info,
1435 const CuMatrixBase<BaseFloat> &in_value,
1436 const CuMatrixBase<BaseFloat> &out_deriv);
1438 const NaturalGradientPerElementScaleComponent &operator
1439 = (const NaturalGradientPerElementScaleComponent &other); // Disallow.
1440 };
1442 /**
1443 * ConvolutionalComponent implements 2d-convolution.
1444 * It uses 3D filters on 3D inputs, but the 3D filters hop only over
1445 * 2 dimensions as it has same size as the input along the 3rd dimension.
1446 * Input : A matrix where each row is a vectorized 3D-tensor.
1447 * The 3D tensor has dimensions
1448 * x: (e.g. time)
1449 * y: (e.g. frequency)
1450 * z: (e.g. channels like features/delta/delta-delta)
1451 *
1452 * The component supports input vectorizations of type zyx and yzx.
1453 * The default vectorization type is zyx.
1454 * e.g. for input vectorization of type zyx the input is vectorized by
1455 * spanning axes z, y and x of the tensor in that order.
1456 * Given 3d tensor A with sizes (2, 2, 2) along the three dimensions
1457 * the zyx vectorized input looks like
1458 * A(0,0,0) A(0,0,1) A(0,1,0) A(0,1,1) A(1,0,0) A(1,0,1) A(1,1,0) A(1,1,1)
1459 *
1460 *
1461 * Output : The output is also a 3D tensor vectorized in the zyx format.
1462 * The channel axis (z) in the output corresponds to the output of
1463 * different filters. The first channel corresponds to the first filter
1464 * i.e., first row of the filter_params_ matrix.
1465 *
1466 * Note: The component has to support yzx input vectorization as the binaries
1467 * like add-deltas generate yz vectorized output. These input vectors are
1468 * concatenated using the Append descriptor across time steps to form a yzx
1469 * vectorized 3D tensor input.
1470 * e.g. Append(Offset(input, -1), input, Offset(input, 1))
1471 *
1472 *
1473 * For information on the hyperparameters and parameters of this component see
1474 * the variable declarations.
1475 *
1476 * Propagation:
1477 * ------------
1478 * Convolution operation consists of a dot-products between the filter tensor
1479 * and input tensor patch, for various shifts of filter tensor along the x and y
1480 * axes input tensor. (Note: there is no shift along z-axis as the filter and
1481 * input tensor have same size along this axis).
1482 *
1483 * For a particular shift (i,j) of the filter tensor
1484 * along input tensor dimensions x and y, the elements of the input tensor which
1485 * overlap with the filter form the input tensor patch. This patch is vectorized
1486 * in zyx format. All the patches corresponding to various samples in the
1487 * mini-batch are stacked into a matrix, where each row corresponds to one
1488 * patch. Let this matrix be represented by X_{i,j}. The dot products with
1489 * various filters are computed simultaneously by computing the matrix product
1490 * with the filter_params_ matrix (W)
1491 * Y_{i,j} = X_{i,j}*W^T.
1492 * Each row of W corresponds to one filter 3D tensor vectorized in zyx format.
1493 *
1494 * All the matrix products corresponding to various shifts (i,j) of the
1495 * filter tensor are computed simultaneously using the AddMatMatBatched
1496 * call of CuMatrixBase class.
1497 *
1498 * BackPropagation:
1499 * ----------------
1500 * Backpropagation to compute the input derivative (\nabla X_{i,j})
1501 * consists of the a series of matrix products.
1502 * \nablaX_{i,j} = \nablaY_{i,j}*W where \nablaY_{i,j} corresponds to the
1503 * output derivative for a particular shift of the filter.
1504 *
1505 * Once again these matrix products are computed simultaneously.
1506 *
1507 * Update:
1508 * -------
1509 * The weight gradient is computed as
1510 * \nablaW = \Sum_{i,j} (X_{i,j}^T *\nablaY_{i,j})
1511 *
1512 */
1513 class ConvolutionComponent: public UpdatableComponent {
1514 public:
1515 enum TensorVectorizationType {
1516 kYzx = 0,
1517 kZyx = 1
1518 };
1520 ConvolutionComponent();
1521 // constructor using another component
1522 ConvolutionComponent(const ConvolutionComponent &component);
1523 // constructor using parameters
1524 ConvolutionComponent(
1525 const CuMatrixBase<BaseFloat> &filter_params,
1526 const CuVectorBase<BaseFloat> &bias_params,
1527 int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
1528 int32 filt_x_dim, int32 filt_y_dim,
1529 int32 filt_x_step, int32 filt_y_step,
1530 TensorVectorizationType input_vectorization,
1531 BaseFloat learning_rate);
1533 virtual int32 InputDim() const;
1534 virtual int32 OutputDim() const;
1536 virtual std::string Info() const;
1537 virtual void InitFromConfig(ConfigLine *cfl);
1538 virtual std::string Type() const { return "ConvolutionComponent"; }
1539 virtual int32 Properties() const {
1540 return kSimpleComponent|kUpdatableComponent|kBackpropNeedsInput|
1541 kBackpropAdds|kPropagateAdds;
1542 }
1544 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1545 const CuMatrixBase<BaseFloat> &in,
1546 CuMatrixBase<BaseFloat> *out) const;
1547 virtual void Backprop(const std::string &debug_info,
1548 const ComponentPrecomputedIndexes *indexes,
1549 const CuMatrixBase<BaseFloat> &in_value,
1550 const CuMatrixBase<BaseFloat> &, // out_value,
1551 const CuMatrixBase<BaseFloat> &out_deriv,
1552 Component *to_update_in,
1553 CuMatrixBase<BaseFloat> *in_deriv) const;
1554 void Update(const std::string &debug_info,
1555 const CuMatrixBase<BaseFloat> &in_value,
1556 const CuMatrixBase<BaseFloat> &out_deriv,
1557 const std::vector<CuSubMatrix<BaseFloat> *>& out_deriv_batch);
1561 virtual void Read(std::istream &is, bool binary);
1562 virtual void Write(std::ostream &os, bool binary) const;
1564 virtual Component* Copy() const;
1566 // Some functions from base-class UpdatableComponent.
1567 virtual void Scale(BaseFloat scale);
1568 virtual void Add(BaseFloat alpha, const Component &other);
1569 virtual void SetZero(bool treat_as_gradient);
1570 virtual void PerturbParams(BaseFloat stddev);
1571 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1572 virtual int32 NumParameters() const;
1573 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1574 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1576 // Some functions that are specific to this class.
1577 void SetParams(const VectorBase<BaseFloat> &bias,
1578 const MatrixBase<BaseFloat> &filter);
1579 const CuVector<BaseFloat> &BiasParams() const { return bias_params_; }
1580 const CuMatrix<BaseFloat> &LinearParams() const { return filter_params_; }
1581 void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
1582 int32 filt_x_dim, int32 filt_y_dim,
1583 int32 filt_x_step, int32 filt_y_step, int32 num_filters,
1584 TensorVectorizationType input_vectorization,
1585 BaseFloat param_stddev, BaseFloat bias_stddev);
1586 // there is no filt_z_dim parameter as the length of the filter along
1587 // z-dimension is same as the input
1588 void Init(int32 input_x_dim, int32 input_y_dim, int32 input_z_dim,
1589 int32 filt_x_dim, int32 filt_y_dim,
1590 int32 filt_x_step, int32 filt_y_step,
1591 TensorVectorizationType input_vectorization,
1592 std::string matrix_filename);
1594 // resize the component, setting the parameters to zero, while
1595 // leaving any other configuration values the same
1596 void Resize(int32 input_dim, int32 output_dim);
1598 void Update(const std::string &debug_info,
1599 const CuMatrixBase<BaseFloat> &in_value,
1600 const CuMatrixBase<BaseFloat> &out_deriv);
1603 private:
1604 int32 input_x_dim_; // size of the input along x-axis
1605 // (e.g. number of time steps)
1607 int32 input_y_dim_; // size of input along y-axis
1608 // (e.g. number of mel-frequency bins)
1610 int32 input_z_dim_; // size of input along z-axis
1611 // (e.g. number of channels is 3 if the input has
1612 // features + delta + delta-delta features
1614 int32 filt_x_dim_; // size of the filter along x-axis
1616 int32 filt_y_dim_; // size of the filter along y-axis
1618 // there is no filt_z_dim_ as it is always assumed to be
1619 // the same as input_z_dim_
1621 int32 filt_x_step_; // the number of steps taken along x-axis of input
1622 // before computing the next dot-product
1623 // of filter and input
1625 int32 filt_y_step_; // the number of steps taken along y-axis of input
1626 // before computing the next dot-product of the filter
1627 // and input
1629 // there is no filt_z_step_ as only dot product is possible along this axis
1631 TensorVectorizationType input_vectorization_; // type of vectorization of the
1632 // input 3D tensor. Accepts zyx and yzx formats
1634 CuMatrix<BaseFloat> filter_params_;
1635 // the filter (or kernel) matrix is a matrix of vectorized 3D filters
1636 // where each row in the matrix corresponds to one filter.
1637 // The 3D filter tensor is vectorizedin zyx format.
1638 // The first row of the matrix corresponds to the first filter and so on.
1639 // Keep in mind the vectorization type and order of filters when using file
1640 // based initialization.
1642 CuVector<BaseFloat> bias_params_;
1643 // the filter-specific bias vector (i.e., there is a seperate bias added
1644 // to the output of each filter).
1645 bool is_gradient_;
1647 void InputToInputPatches(const CuMatrixBase<BaseFloat>& in,
1648 CuMatrix<BaseFloat> *patches) const;
1649 void InderivPatchesToInderiv(const CuMatrix<BaseFloat>& in_deriv_patches,
1650 CuMatrixBase<BaseFloat> *in_deriv) const;
1651 const ConvolutionComponent &operator = (const ConvolutionComponent &other); // Disallow.
1652 };
1655 /*
1656 * MaxPoolingComponent :
1657 * Maxpooling component was firstly used in ConvNet for selecting an
1658 * representative activation in an area. It inspired Maxout nonlinearity.
1659 * Each output element of this component is the maximum of a block of
1660 * input elements where the block has a 3D dimension (pool_x_size_,
1661 * pool_y_size_, pool_z_size_).
1662 * Blocks could overlap if the shift value on any axis is smaller
1663 * than its corresponding pool size (e.g. pool_x_step_ < pool_x_size_).
1664 * If the shift values are euqal to their pool size, there is no
1665 * overlap; while if they all equal 1, the blocks overlap to
1666 * the greatest possible extent.
1667 *
1668 * This component is designed to be used after a ConvolutionComponent
1669 * so that the input matrix is propagated from a 2d-convolutional layer.
1670 * This component implements 3d-maxpooling which performs
1671 * max pooling along the three axes.
1672 * Input : A matrix where each row is a vectorized 3D-tensor.
1673 * The 3D tensor has dimensions
1674 * x: (e.g. time)
1675 * y: (e.g. frequency)
1676 * z: (e.g. channels like number of filters in the ConvolutionComponent)
1677 *
1678 * The component assumes input vectorizations of type zyx
1679 * which is the default output vectorization type of a ConvolutionComponent.
1680 * e.g. for input vectorization of type zyx the input is vectorized by
1681 * spanning axes z, y and x of the tensor in that order.
1682 * Given 3d tensor A with sizes (2, 2, 2) along the three dimensions
1683 * the zyx vectorized input looks like
1684 * A(0,0,0) A(0,0,1) A(0,1,0) A(0,1,1) A(1,0,0) A(1,0,1) A(1,1,0) A(1,1,1)
1685 *
1686 * Output : The output is also a 3D tensor vectorized in the zyx format.
1687 *
1688 * For information on the hyperparameters and parameters of this component see
1689 * the variable declarations.
1690 *
1691 *
1692 */
1694 class MaxpoolingComponent: public Component {
1695 public:
1697 MaxpoolingComponent(): input_x_dim_(0), input_y_dim_(0), input_z_dim_(0),
1698 pool_x_size_(0), pool_y_size_(0), pool_z_size_(0),
1699 pool_x_step_(0), pool_y_step_(0), pool_z_step_(0) { }
1700 // constructor using another component
1701 MaxpoolingComponent(const MaxpoolingComponent &component);
1703 virtual int32 InputDim() const;
1704 virtual int32 OutputDim() const;
1705 virtual void Check() const;
1707 virtual std::string Info() const;
1708 virtual void InitFromConfig(ConfigLine *cfl);
1709 virtual std::string Type() const { return "MaxpoolingComponent"; }
1710 virtual int32 Properties() const {
1711 return kSimpleComponent|kBackpropNeedsInput|kBackpropNeedsOutput|
1712 kBackpropAdds;
1713 }
1715 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1716 const CuMatrixBase<BaseFloat> &in,
1717 CuMatrixBase<BaseFloat> *out) const;
1718 virtual void Backprop(const std::string &debug_info,
1719 const ComponentPrecomputedIndexes *indexes,
1720 const CuMatrixBase<BaseFloat> &in_value,
1721 const CuMatrixBase<BaseFloat> &out_value,
1722 const CuMatrixBase<BaseFloat> &out_deriv,
1723 Component *, // to_update,
1724 CuMatrixBase<BaseFloat> *in_deriv) const;
1726 virtual void Read(std::istream &is, bool binary); // This Read function
1727 // requires that the Component has the correct type.
1729 /// Write component to stream
1730 virtual void Write(std::ostream &os, bool binary) const;
1731 virtual Component* Copy() const { return new MaxpoolingComponent(*this); }
1733 void InputToInputPatches(const CuMatrixBase<BaseFloat>& in,
1734 CuMatrix<BaseFloat> *patches) const;
1735 void InderivPatchesToInderiv(const CuMatrix<BaseFloat>& in_deriv_patches,
1736 CuMatrixBase<BaseFloat> *in_deriv) const;
1738 protected:
1739 int32 input_x_dim_; // size of the input along x-axis
1740 // (e.g. number of time steps)
1741 int32 input_y_dim_; // size of input along y-axis
1742 // (e.g. number of mel-frequency bins)
1743 int32 input_z_dim_; // size of input along z-axis
1744 // (e.g. number of filters in the ConvolutionComponent)
1746 int32 pool_x_size_; // size of the pooling window along x-axis
1747 int32 pool_y_size_; // size of the pooling window along y-axis
1748 int32 pool_z_size_; // size of the pooling window along z-axis
1750 int32 pool_x_step_; // the number of steps taken along x-axis of input
1751 // before computing the next pool
1752 int32 pool_y_step_; // the number of steps taken along y-axis of input
1753 // before computing the next pool
1754 int32 pool_z_step_; // the number of steps taken along z-axis of input
1755 // before computing the next pool
1757 };
1760 /**
1761 CompositeComponent is a component representing a sequence of
1762 [simple] components. The config line would be something like the following
1763 (imagine this is all on one line):
1765 component name=composite1 type=CompositeComponent max-rows-process=2048 num-components=3 \
1766 component1='type=BlockAffineComponent input-dim=1000 output-dim=10000 num-blocks=100' \
1767 component2='type=RectifiedLinearComponent dim=10000' \
1768 component3='type=BlockAffineComponent input-dim=10000 output-dim=1000 num-blocks=100'
1770 The reason you might want to use this component, instead of directly using
1771 the same sequence of components in the config file, is to save GPU memory (at
1772 the expense of more compute)-- because doing it like this means we have to
1773 re-do parts of the forward pass in the backprop phase, but we avoid using
1774 much memory for very long (and you can make the memory usage very small by
1775 making max-rows-process small). We inherit from UpdatableComponent just in
1776 case one or more of the components in the sequence are updatable.
1778 It is an error to nest a CompositeComponent inside a CompositeComponent.
1779 The same effect can be accomplished by specifying a smaller max-rows-process
1780 in a single CompositeComponent.
1781 */
1782 class CompositeComponent: public UpdatableComponent {
1783 public:
1784 virtual int32 InputDim() const;
1785 virtual int32 OutputDim() const;
1787 virtual std::string Info() const;
1789 virtual void InitFromConfig(ConfigLine *cfl);
1791 virtual Component* Copy() const;
1793 CompositeComponent() { } // use Init() or InitFromConfig() to really initialize.
1795 // Initialize from this list of components; takes ownership of the pointers.
1796 void Init(const std::vector<Component*> &components,
1797 int32 max_rows_process);
1799 virtual std::string Type() const { return "CompositeComponent"; }
1801 // The properties depend on the properties of the constituent components. As
1802 // a special case, we never return kStoresStats in the properties: by default
1803 // we store things like activation stats (e.g. for nonlinear components like
1804 // ReLU) as part of the backprop. This means we may wastefully store stats
1805 // even when not requested, but it does save time as a separate StoreStats()
1806 // call would involve propagating the internals.
1807 virtual int32 Properties() const;
1809 virtual void Propagate(const ComponentPrecomputedIndexes *indexes,
1810 const CuMatrixBase<BaseFloat> &in,
1811 CuMatrixBase<BaseFloat> *out) const;
1812 virtual void Backprop(const std::string &debug_info,
1813 const ComponentPrecomputedIndexes *indexes,
1814 const CuMatrixBase<BaseFloat> &in_value,
1815 const CuMatrixBase<BaseFloat> &, // out_value
1816 const CuMatrixBase<BaseFloat> &out_deriv,
1817 Component *to_update,
1818 CuMatrixBase<BaseFloat> *in_deriv) const;
1820 // note, we don't implement StoreStats() as it would be inefficient. Instead,
1821 // by default we call StoreStats() on all members that have the flag set,
1822 // inside the Backprop.
1823 virtual void ZeroStats();
1825 virtual void Read(std::istream &is, bool binary);
1826 virtual void Write(std::ostream &os, bool binary) const;
1828 // Don't implement Copy() at this level: implement it in the child class.
1830 // Some functions from base-class UpdatableComponent.
1831 virtual void SetUnderlyingLearningRate(BaseFloat lrate);
1832 virtual void SetActualLearningRate(BaseFloat lrate);
1833 virtual void Scale(BaseFloat scale);
1834 virtual void Add(BaseFloat alpha, const Component &other);
1835 virtual void SetZero(bool treat_as_gradient);
1836 virtual void PerturbParams(BaseFloat stddev);
1837 virtual BaseFloat DotProduct(const UpdatableComponent &other) const;
1838 virtual int32 NumParameters() const;
1839 virtual void Vectorize(VectorBase<BaseFloat> *params) const;
1840 virtual void UnVectorize(const VectorBase<BaseFloat> ¶ms);
1842 // note: we dont implement the StoreStats function as it would be quite
1843 // expensive; instead, by default we call StoreStats() for any components that
1844 // want to store stats, as part of the backprop pass. This is not 100% ideal
1845 // but it will usually do what you want. We can revisit this later if needed.
1847 // Functions to iterate over the internal components
1849 int32 NumComponents() const { return components_.size();}
1850 /// Gets the ith component in this component.
1851 /// The ordering is the same as in the config line. The caller
1852 /// does not own the received component.
1853 const Component* GetComponent(int32 i) const;
1854 /// Sets the ith component. After this call, CompositeComponent owns
1855 /// the reference to the argument component. Frees the previous
1856 /// ith component.
1857 void SetComponent(int32 i, Component *component);
1859 virtual ~CompositeComponent() { DeletePointers(&components_); }
1860 private:
1861 // returns the stride type, kDefaultStride or kStrideEqualNumCols,
1862 // at the output of the i'th component.
1863 inline MatrixStrideType GetStrideType(int32 i) const;
1865 // returns true if at least one of 'components_' returns the kUpdatable flag
1866 // in its flags.
1867 bool IsUpdatable() const;
1869 // the maximum number of
1870 int32 max_rows_process_;
1871 std::vector<Component*> components_;
1873 };
1876 } // namespace nnet3
1877 } // namespace kaldi
1880 #endif