egs/rm/s5/local/chain/tuning/run_tdnn_wsj_rm_1a.sh

   1 #!/bin/bash
   2
   3 # This script uses weight transfer as a transfer learning method to transfer
   4 # already trained neural net model on wsj to rm.
   5 #
   6 # Model preparation: The last layer (prefinal and output layer) from
   7 # already-trained wsj model is removed and 3 randomly initialized layer
   8 # (new tdnn layer, prefinal, and output) are added to the model.
   9 #
  10 # Training: The transferred layers are retrained with smaller learning-rate,
  11 # while new added layers are trained with larger learning rate using rm data.
  12 # The chain config is as in run_tdnn_5n.sh and the result is:
  13 #System tdnn_5n tdnn_wsj_rm_1a
  14 #WER      2.71     1.68
  15 set -e
  16
  17 # configs for 'chain'
  18 stage=0
  19 train_stage=-10
  20 get_egs_stage=-10
  21 dir=exp/chain/tdnn_wsj_rm_1a
  22 xent_regularize=0.1
  23
  24 # configs for transfer learning
  25 src_mdl=../../wsj/s5/exp/chain/tdnn1d_sp/final.mdl # Input chain model
  26                                                    # trained on source dataset (wsj).
  27                                                    # This model is transfered to the target domain.
  28
  29 src_mfcc_config=../../wsj/s5/conf/mfcc_hires.conf # mfcc config used to extract higher dim
  30                                                   # mfcc features for ivector and DNN training
  31                                                   # in the source domain.
  32 src_ivec_extractor_dir=  # Source ivector extractor dir used to extract ivector for
  33                          # source data. The ivector for target data is extracted using this extractor.
  34                          # It should be nonempty, if ivector is used in the source model training.
  35
  36 common_egs_dir=
  37 primary_lr_factor=0.25 # The learning-rate factor for transferred layers from source
  38                        # model. e.g. if 0, the paramters transferred from source model
  39                        # are fixed.
  40                        # The learning-rate factor for new added layers is 1.0.
  41
  42 nnet_affix=_online_wsj
  43 # End configuration section.
  44
  45 echo "$0 $@"  # Print the command line for logging
  46
  47 . ./cmd.sh
  48 . ./path.sh
  49 . ./utils/parse_options.sh
  50
  51 if ! cuda-compiled; then
  52   cat <<EOF && exit 1
  53 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
  54 If you want to use GPUs (and have them), go to src/, and configure and make on a machine
  55 where "nvcc" is installed.
  56 EOF
  57 fi
  58
  59 required_files="$src_mfcc_config $src_mdl"
  60 use_ivector=false
  61 ivector_dim=$(nnet3-am-info --print-args=false $src_mdl | grep "ivector-dim" | cut -d" " -f2)
  62 if [ -z $ivector_dim ]; then ivector_dim=0 ; fi
  63
  64 if [ ! -z $src_ivec_extractor_dir ]; then
  65   if [ $ivector_dim -eq 0 ]; then
  66     echo "$0: Source ivector extractor dir '$src_ivec_extractor_dir' is specified "
  67     echo "but ivector is not used in training the source model '$src_mdl'."
  68   else
  69     required_files="$required_files $src_ivec_extractor_dir/final.dubm $src_ivec_extractor_dir/final.mat $src_ivec_extractor_dir/final.ie"
  70     use_ivector=true
  71   fi
  72 else
  73   if [ $ivector_dim -gt 0 ]; then
  74     echo "$0: ivector is used in training the source model '$src_mdl' but no "
  75     echo " --src-ivec-extractor-dir option as ivector dir for source model is specified." && exit 1;
  76   fi
  77 fi
  78
  79 for f in $required_files; do
  80   if [ ! -f $f ]; then
  81     echo "$0: no such file $f." && exit 1;
  82   fi
  83 done
  84
  85 # The iVector-extraction and feature-dumping parts are the same as the standard
  86 # nnet3 setup, and you can skip them by setting "--stage 4" if you have already
  87 # run those things.
  88
  89 ali_dir=exp/tri3b_ali
  90 treedir=exp/chain/tri4_5n_tree
  91 lang=data/lang_chain_5n
  92
  93 local/online/run_nnet2_common.sh  --stage $stage \
  94                                   --ivector-dim $ivector_dim \
  95                                   --nnet-affix "$nnet_affix" \
  96                                   --mfcc-config $src_mfcc_config \
  97                                   --extractor $src_ivec_extractor_dir || exit 1;
  98
  99 if [ $stage -le 4 ]; then
 100   # Get the alignments as lattices (gives the chain training more freedom).
 101   # use the same num-jobs as the alignments
 102   nj=$(cat $ali_dir/num_jobs) || exit 1;
 103   steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/train \
 104     data/lang exp/tri3b exp/tri3b_lats || exit 1;
 105   rm exp/tri3b_lats/fsts.*.gz 2>/dev/null || true # save space
 106 fi
 107
 108 if [ $stage -le 5 ]; then
 109   # Create a version of the lang/ directory that has one state per phone in the
 110   # topo file. [note, it really has two states.. the first one is only repeated
 111   # once, the second one has zero or more repeats.]
 112   rm -r $lang 2>/dev/null || true
 113   cp -r data/lang $lang
 114   silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
 115   nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
 116   # Use our special topology... note that later on may have to tune this
 117   # topology.
 118   steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
 119 fi
 120
 121 if [ $stage -le 6 ]; then
 122   # Build a tree using our new topology.
 123   steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
 124     --leftmost-questions-truncate -1 \
 125     --cmd "$train_cmd" 1200 data/train $lang $ali_dir $treedir || exit 1;
 126 fi
 127
 128 if [ $stage -le 7 ]; then
 129   echo "$0: Create neural net configs using the xconfig parser for";
 130   echo " generating new layers, that are specific to rm. These layers ";
 131   echo " are added to the transferred part of the wsj network.";
 132   num_targets=$(tree-info --print-args=false $treedir/tree |grep num-pdfs|awk '{print $2}')
 133   learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
 134   mkdir -p $dir
 135   mkdir -p $dir/configs
 136   cat <<EOF > $dir/configs/network.xconfig
 137   relu-renorm-layer name=tdnn-target input=Append(tdnn6.renorm@-3,tdnn6.renorm) dim=450
 138   ## adding the layers for chain branch
 139   relu-renorm-layer name=prefinal-chain input=tdnn-target dim=450 target-rms=0.5
 140   output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
 141   relu-renorm-layer name=prefinal-xent input=tdnn-target dim=450 target-rms=0.5
 142   output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
 143 EOF
 144   steps/nnet3/xconfig_to_configs.py --existing-model $src_mdl \
 145     --xconfig-file  $dir/configs/network.xconfig  \
 146     --config-dir $dir/configs/
 147
 148   # Set the learning-rate-factor to be primary_lr_factor for transferred layers "
 149   # and adding new layers to them.
 150   $train_cmd $dir/log/generate_input_mdl.log \
 151     nnet3-copy --edits="set-learning-rate-factor name=* learning-rate-factor=$primary_lr_factor" $src_mdl - \| \
 152       nnet3-init --srand=1 - $dir/configs/final.config $dir/input.raw  || exit 1;
 153 fi
 154
 155 if [ $stage -le 8 ]; then
 156   echo "$0: generate egs for chain to train new model on rm dataset."
 157   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
 158     utils/create_split_dir.pl \
 159      /export/b0{3,4,5,6}/$USER/kaldi-data/egs/rm-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
 160   fi
 161   ivector_dir=
 162   if $use_ivector; then ivector_dir="exp/nnet2${nnet_affix}/ivectors" ; fi
 163
 164   steps/nnet3/chain/train.py --stage $train_stage \
 165     --cmd "$decode_cmd" \
 166     --trainer.input-model $dir/input.raw \
 167     --feat.online-ivector-dir "$ivector_dir" \
 168     --chain.xent-regularize $xent_regularize \
 169     --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
 170     --chain.xent-regularize 0.1 \
 171     --chain.leaky-hmm-coefficient 0.1 \
 172     --chain.l2-regularize 0.00005 \
 173     --chain.apply-deriv-weights false \
 174     --chain.lm-opts="--num-extra-lm-states=200" \
 175     --egs.dir "$common_egs_dir" \
 176     --egs.opts "--frames-overlap-per-eg 0" \
 177     --egs.chunk-width 150 \
 178     --trainer.num-chunk-per-minibatch=128 \
 179     --trainer.frames-per-iter 1000000 \
 180     --trainer.num-epochs 2 \
 181     --trainer.optimization.num-jobs-initial=2 \
 182     --trainer.optimization.num-jobs-final=4 \
 183     --trainer.optimization.initial-effective-lrate=0.005 \
 184     --trainer.optimization.final-effective-lrate=0.0005 \
 185     --trainer.max-param-change 2.0 \
 186     --cleanup.remove-egs true \
 187     --feat-dir data/train_hires \
 188     --tree-dir $treedir \
 189     --lat-dir exp/tri3b_lats \
 190     --dir $dir || exit 1;
 191 fi
 192
 193 if [ $stage -le 9 ]; then
 194   # Note: it might appear that this $lang directory is mismatched, and it is as
 195   # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
 196   # the lang directory.
 197   ivec_opt=""
 198   if $use_ivector;then
 199     ivec_opt="--online-ivector-dir exp/nnet2${nnet_affix}/ivectors_test"
 200   fi
 201   utils/mkgraph.sh --self-loop-scale 1.0 data/lang $dir $dir/graph
 202   steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
 203     --scoring-opts "--min-lmwt 1" \
 204     --nj 20 --cmd "$decode_cmd" $ivec_opt \
 205     $dir/graph data/test_hires $dir/decode || exit 1;
 206 fi
 207 wait;
 208 exit 0;