]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - processor-sdk/kaldi.git/commitdiff
Merge remote-tracking branch 'gaofeng/dropout_scripts' into shortcut-dropout
authorDaniel Povey <dpovey@gmail.com>
Fri, 27 Jan 2017 02:35:12 +0000 (21:35 -0500)
committerDaniel Povey <dpovey@gmail.com>
Fri, 27 Jan 2017 02:35:12 +0000 (21:35 -0500)
Conflicts:
egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py

1  2 
egs/wsj/s5/steps/libs/nnet3/xconfig/lstm.py

index 5e928a0f7c322053168c9145583257c636b0bcc6,6f0e1e0f1c69cb81e85883f23b214ef12923a126..4b0908563fda4f129226705d81c97a3f61d40d03
@@@ -251,7 -248,8 +251,8 @@@ class XconfigLstmpLayer(XconfigLayerBas
                          'self-repair-scale-nonlinearity' : 0.00001,
                          'zeroing-interval' : 20,
                          'zeroing-threshold' : 15.0,
-                         'dropout-proportion' : -1.0 # -1.0 stands for no dropout will be added
 -                        'dropout-proportion' : -1.0, # -1.0 stands for no dropout will be added
 -                        'dropout-per-frame' : 'false' # default normal dropout mode
++                        'dropout-proportion' : -1.0, # If -1.0, no dropout components will be added
++                        'dropout-per-frame' : False  # If false, regular dropout, not per frame.
                         }
  
      def set_derived_configs(self):
               self.config['dropout-proportion'] < 0.0) and
               self.config['dropout-proportion'] != -1.0 ):
               raise RuntimeError("dropout-proportion has invalid value {0}."
--                                "".format(self.config['dropout-proportion']))
 -
 -        if (self.config['dropout-per-frame'] != 'false' and
 -            self.config['dropout-per-frame'] != 'true'):
 -            raise xparser_error("dropout-per-frame has invalid value {0}.".format(self.config['dropout-per-frame']))
++                                .format(self.config['dropout-proportion']))
  
      def auxiliary_outputs(self):
          return ['c_t']
                                  abs(delay)))
          affine_str = self.config['ng-affine-options']
          pes_str = self.config['ng-per-element-scale-options']
--        lstm_dropout_value = self.config['dropout-proportion']
--        lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion'])
 -        lstm_dropout_per_frame_value = self.config['dropout-per-frame']
 -        lstm_dropout_per_frame_str = 'dropout-per-frame='+str(self.config['dropout-per-frame'])
++        dropout_proportion = self.config['dropout-proportion']
++        dropout_per_frame = 'true' if self.config['dropout-per-frame'] else 'false'
  
          # Natural gradient per element scale parameters
          # TODO: decide if we want to keep exposing these options
          configs.append("component name={0}.o type=SigmoidComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str))
          configs.append("component name={0}.g type=TanhComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str))
          configs.append("component name={0}.h type=TanhComponent dim={1} {2}".format(name, cell_dim, repair_nonlin_str))
 -        if lstm_dropout_value != -1.0:
 -            configs.append("component name={0}.dropout type=DropoutComponent dim={1} {2} {3}".format(name, cell_dim, lstm_dropout_str, lstm_dropout_per_frame_str))
--
++        if dropout_proportion != -1.0:
++            configs.append("component name={0}.dropout type=DropoutComponent dim={1} "
++                           "dropout-proportion={2} dropout-per-frame={3}"
++                           .format(name, cell_dim, dropout_proportion, dropout_per_frame))
          configs.append("# Defining the components for other cell computations")
          configs.append("component name={0}.c1 type=ElementwiseProductComponent input-dim={1} output-dim={2}".format(name, 2 * cell_dim, cell_dim))
          configs.append("component name={0}.c2 type=ElementwiseProductComponent input-dim={1} output-dim={2}".format(name, 2 * cell_dim, cell_dim))
          configs.append("# i_t")
          configs.append("component-node name={0}.i1_t component={0}.W_i.xr input=Append({1}, IfDefined(Offset({2}, {3})))".format(name, input_descriptor, recurrent_connection, delay))
          configs.append("component-node name={0}.i2_t component={0}.w_i.c  input={1}".format(name, delayed_c_t_descriptor))
-         configs.append("component-node name={0}.i_t component={0}.i input=Sum({0}.i1_t, {0}.i2_t)".format(name))
 -        if lstm_dropout_value != -1.0:
++        if dropout_proportion != -1.0:
+             configs.append("component-node name={0}.i_t_predrop component={0}.i input=Sum({0}.i1_t, {0}.i2_t)".format(name))
+             configs.append("component-node name={0}.i_t component={0}.dropout input={0}.i_t_predrop".format(name))
+         else:
+             configs.append("component-node name={0}.i_t component={0}.i input=Sum({0}.i1_t, {0}.i2_t)".format(name))
  
          configs.append("# f_t")
          configs.append("component-node name={0}.f1_t component={0}.W_f.xr input=Append({1}, IfDefined(Offset({2}, {3})))".format(name, input_descriptor, recurrent_connection, delay))
          configs.append("component-node name={0}.f2_t component={0}.w_f.c  input={1}".format(name, delayed_c_t_descriptor))
-         configs.append("component-node name={0}.f_t component={0}.f input=Sum({0}.f1_t, {0}.f2_t)".format(name))
 -        if lstm_dropout_value != -1.0:
++        if dropout_proportion != -1.0:
+             configs.append("component-node name={0}.f_t_predrop component={0}.f input=Sum({0}.f1_t, {0}.f2_t)".format(name))
+             configs.append("component-node name={0}.f_t component={0}.dropout input={0}.f_t_predrop".format(name))
+         else:
+             configs.append("component-node name={0}.f_t component={0}.f input=Sum({0}.f1_t, {0}.f2_t)".format(name))
  
          configs.append("# o_t")
          configs.append("component-node name={0}.o1_t component={0}.W_o.xr input=Append({1}, IfDefined(Offset({2}, {3})))".format(name, input_descriptor, recurrent_connection, delay))
          configs.append("component-node name={0}.o2_t component={0}.w_o.c input={0}.c_t".format(name))
-         configs.append("component-node name={0}.o_t component={0}.o input=Sum({0}.o1_t, {0}.o2_t)".format(name))
 -        if lstm_dropout_value != -1.0:
++        if dropout_proportion != -1.0:
+             configs.append("component-node name={0}.o_t_predrop component={0}.o input=Sum({0}.o1_t, {0}.o2_t)".format(name))
+             configs.append("component-node name={0}.o_t component={0}.dropout input={0}.o_t_predrop".format(name))
+         else:
+             configs.append("component-node name={0}.o_t component={0}.o input=Sum({0}.o1_t, {0}.o2_t)".format(name))
  
          configs.append("# h_t")
          configs.append("component-node name={0}.h_t component={0}.h input={0}.c_t".format(name))
@@@ -671,10 -772,10 +679,11 @@@ class XconfigFastLstmpLayer(XconfigLaye
                          # the affine layer contains 4 of our old layers -> use a
                          # larger max-change than the normal value of 0.75.
                          'ng-affine-options' : ' max-change=1.5',
 +                        'decay-time':  -1.0,
                          'zeroing-interval' : 20,
-                         'zeroing-threshold' : 15.0
+                         'zeroing-threshold' : 15.0,
 -                        'dropout-proportion' : -1.0 ,# -1.0 stands for no dropout will be added
 -                        'dropout-per-frame' : 'false'  # default normal dropout mode
++                        'dropout-proportion' : -1.0, # If -1.0, no dropout components will be added
++                        'dropout-per-frame' : False  # If false, regular dropout, not per frame.
                          }
  
      def set_derived_configs(self):
              self.config['non-recurrent-projection-dim'] = \
                 self.config['recurrent-projection-dim']
  
 -        if ((self.config['dropout-proportion'] > 1.0 or
 -             self.config['dropout-proportion'] < 0.0) and
 -             self.config['dropout-proportion'] != -1.0 ):
 -             raise xparser_error("dropout-proportion has invalid value {0}.".format(self.config['dropout-proportion']))
 -
 -        if (self.config['dropout-per-frame'] != 'false' and
 -            self.config['dropout-per-frame'] != 'true'):
 -            raise xparser_error("dropout-per-frame has invalid value {0}.".format(self.config['dropout-per-frame']))
      def check_configs(self):
          for key in ['cell-dim', 'recurrent-projection-dim',
                      'non-recurrent-projection-dim']:
              self.config['cell-dim']):
              raise RuntimeError("recurrent+non-recurrent projection dim exceeds "
                                  "cell dim")
++        if ((self.config['dropout-proportion'] > 1.0 or
++             self.config['dropout-proportion'] < 0.0) and
++             self.config['dropout-proportion'] != -1.0 ):
++            raise RuntimeError("dropout-proportion has invalid value {0}.".format(self.config['dropout-proportion']))
++
  
  
      def auxiliary_outputs(self):
                        "".format(self.config['clipping-threshold'],
                                  self.config['zeroing-threshold'],
                                  self.config['zeroing-interval'],
 -                                abs(delay)))
 -        affine_str = self.config['ng-affine-options']
 +                                abs(delay), recurrence_scale))
 +
          lstm_str = self.config['lstm-nonlinearity-options']
 -        lstm_dropout_value = self.config['dropout-proportion']
 -        lstm_dropout_str = 'dropout-proportion='+str(self.config['dropout-proportion'])
 -        lstm_dropout_per_frame_value = self.config['dropout-per-frame']
 -        lstm_dropout_per_frame_str = 'dropout-per-frame='+str(self.config['dropout-per-frame'])
++        dropout_proportion = self.config['dropout-proportion']
++        dropout_per_frame = 'true' if self.config['dropout-per-frame'] else 'false'
  
          configs = []
  
          configs.append("# Component for backprop truncation, to avoid gradient blowup in long training examples.")
          configs.append("component name={0}.cr_trunc type=BackpropTruncationComponent "
                         "dim={1} {2}".format(name, cell_dim + rec_proj_dim, bptrunc_str))
 -        if lstm_dropout_value != -1.0:
 -            configs.append("component name={0}.cr_trunc.dropout type=DropoutComponent dim={1} {2} {3}".format(name, cell_dim + rec_proj_dim, lstm_dropout_str, lstm_dropout_per_frame_str))
++        if dropout_proportion != -1.0:
++            configs.append("component name={0}.cr_trunc.dropout type=DropoutComponent dim={1} "
++                           "dropout-proportion={2} dropout-per-frame={3}"
++                           .format(name, cell_dim + rec_proj_dim, dropout_proportion, dropout_per_frame))
          configs.append("# Component specific to 'projected' LSTM (LSTMP), contains both recurrent");
          configs.append("# and non-recurrent projections")
          configs.append("component name={0}.W_rp type=NaturalGradientAffineComponent input-dim={1} "
          configs.append("# Note: it's not 100% efficient that we have to stitch the c")
          configs.append("# and r back together to truncate them but it probably");
          configs.append("# makes the deriv truncation more accurate .")
 -        if lstm_dropout_value != -1.0:
 -            configs.append("component-node name={0}.cr_trunc component={0}.cr_trunc "
 -                           "input=Append({0}.c, {0}.r)".format(name))
 +        configs.append("component-node name={0}.cr_trunc component={0}.cr_trunc "
 +                       "input=Append({0}.c, {0}.r)".format(name))
-         configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc "
-                        "dim-offset=0 dim={1}".format(name, cell_dim))
-         configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc "
-                        "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim))
-         configs.append("### End LSTM Layer '{0}'".format(name))
++        if dropout_proportion != -1.0:
+             configs.append("component-node name={0}.cr_trunc.dropout component={0}.cr_trunc.dropout input={0}.cr_trunc".format(name))
+             configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc.dropout "
+                            "dim-offset=0 dim={1}".format(name, cell_dim))
+             configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc.dropout "
+                            "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim))
 -            configs.append("### End LSTM Layer '{0}'".format(name))
+         else:
 -            configs.append("component-node name={0}.cr_trunc component={0}.cr_trunc "
 -                           "input=Append({0}.c, {0}.r)".format(name))
+             configs.append("dim-range-node name={0}.c_trunc input-node={0}.cr_trunc "
+                            "dim-offset=0 dim={1}".format(name, cell_dim))
+             configs.append("dim-range-node name={0}.r_trunc input-node={0}.cr_trunc "
+                            "dim-offset={1} dim={2}".format(name, cell_dim, rec_proj_dim))
 -            configs.append("### End LSTM Layer '{0}'".format(name))
++                configs.append("### End LSTM Layer '{0}'".format(name))
++
          return configs