egs/babel/s5d/local/chain/run_tdnn_bab2.sh

   1 #!/bin/bash
   2
   3
   4 # by default, with cleanup:
   5 # local/chain/run_tdnn.sh
   6 # %WER 47.7 | 19252 60586 | 56.5 27.2 16.3 4.3 47.7 31.8 | -0.468 | exp/chain_cleaned/tdnnbab2_sp_bi/decode_dev10h.pem/score_9/penalty_0.0/dev10h.pem.ctm.sys
   7
   8 set -e -o pipefail
   9
  10 # First the options that are passed through to run_ivector_common.sh
  11 # (some of which are also used in this script directly).
  12 stage=17
  13 nj=30
  14 min_seg_len=1.55
  15 train_set=train_cleaned
  16 gmm=tri5_cleaned  # the gmm for the target data
  17 langdir=data/langp/tri5_ali
  18 num_threads_ubm=12
  19 nnet3_affix=_cleaned  # cleanup affix for nnet3 and chain dirs, e.g. _cleaned
  20
  21 # The rest are configs specific to this script.  Most of the parameters
  22 # are just hardcoded at this level, in the commands below.
  23 train_stage=-10
  24 tree_affix=  # affix for tree directory, e.g. "a" or "b", in case we change the configuration.
  25 tdnn_affix=bab2  #affix for TDNN directory, e.g. "a" or "b", in case we change the configuration.
  26 common_egs_dir=exp/chain_cleaned/tdnn_sp_bi/egs  # you can set this to use previously dumped egs.
  27
  28 # End configuration section.
  29 echo "$0 $@"  # Print the command line for logging
  30
  31 . ./cmd.sh
  32 . ./path.sh
  33 . ./utils/parse_options.sh
  34
  35
  36 if ! cuda-compiled; then
  37   cat <<EOF && exit 1
  38 This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
  39 If you want to use GPUs (and have them), go to src/, and configure and make on a machine
  40 where "nvcc" is installed.
  41 EOF
  42 fi
  43
  44 local/chain/run_ivector_common.sh --stage $stage \
  45                                   --nj $nj \
  46                                   --min-seg-len $min_seg_len \
  47                                   --train-set $train_set \
  48                                   --gmm $gmm \
  49                                   --num-threads-ubm $num_threads_ubm \
  50                                   --nnet3-affix "$nnet3_affix"
  51
  52
  53 gmm_dir=exp/$gmm
  54 ali_dir=exp/${gmm}_ali_${train_set}_sp_comb
  55 tree_dir=exp/chain${nnet3_affix}/tree_bi${tree_affix}
  56 lat_dir=exp/chain${nnet3_affix}/${gmm}_${train_set}_sp_comb_lats
  57 dir=exp/chain${nnet3_affix}/tdnn${tdnn_affix}_sp_bi
  58 train_data_dir=data/${train_set}_sp_hires_comb
  59 lores_train_data_dir=data/${train_set}_sp_comb
  60 train_ivector_dir=exp/nnet3${nnet3_affix}/ivectors_${train_set}_sp_hires_comb
  61
  62
  63 for f in $gmm_dir/final.mdl $train_data_dir/feats.scp $train_ivector_dir/ivector_online.scp \
  64     $lores_train_data_dir/feats.scp $ali_dir/ali.1.gz $gmm_dir/final.mdl; do
  65   [ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
  66 done
  67
  68 if [ $stage -le 14 ]; then
  69   echo "$0: creating lang directory with one state per phone."
  70   # Create a version of the lang/ directory that has one state per phone in the
  71   # topo file. [note, it really has two states.. the first one is only repeated
  72   # once, the second one has zero or more repeats.]
  73   if [ -d data/lang_chain ]; then
  74     if [ data/lang_chain/L.fst -nt data/lang/L.fst ]; then
  75       echo "$0: data/lang_chain already exists, not overwriting it; continuing"
  76     else
  77       echo "$0: data/lang_chain already exists and seems to be older than data/lang..."
  78       echo " ... not sure what to do.  Exiting."
  79       exit 1;
  80     fi
  81   else
  82     cp -r $langdir data/lang_chain
  83     silphonelist=$(cat data/lang_chain/phones/silence.csl) || exit 1;
  84     nonsilphonelist=$(cat data/lang_chain/phones/nonsilence.csl) || exit 1;
  85     # Use our special topology... note that later on may have to tune this
  86     # topology.
  87     steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >data/lang_chain/topo
  88   fi
  89 fi
  90
  91 if [ $stage -le 15 ]; then
  92   # Get the alignments as lattices (gives the chain training more freedom).
  93   # use the same num-jobs as the alignments
  94   steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \
  95     $langdir $gmm_dir $lat_dir
  96   rm $lat_dir/fsts.*.gz # save space
  97 fi
  98
  99 if [ $stage -le 16 ]; then
 100   # Build a tree using our new topology.  We know we have alignments for the
 101   # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
 102   # those.
 103   if [ -f $tree_dir/final.mdl ]; then
 104     echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
 105     exit 1;
 106   fi
 107   steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
 108       --context-opts "--context-width=2 --central-position=1" \
 109       --cmd "$train_cmd" 4000 ${lores_train_data_dir} data/lang_chain $ali_dir $tree_dir
 110 fi
 111
 112 if [ $stage -le 17 ]; then
 113   mkdir -p $dir
 114
 115   echo "$0: creating neural net configs";
 116
 117   steps/nnet3/tdnn/make_configs.py \
 118     --self-repair-scale 0.00001 \
 119     --feat-dir data/${train_set}_sp_hires_comb \
 120     --ivector-dir $train_ivector_dir \
 121     --tree-dir $tree_dir \
 122     --relu-dim 450 \
 123     --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \
 124     --use-presoftmax-prior-scale false \
 125     --xent-regularize 0.1 \
 126     --xent-separate-forward-affine true \
 127     --include-log-softmax false \
 128     --final-layer-normalize-target 1.0 \
 129    $dir/configs || exit 1;
 130 fi
 131
 132 if [ $stage -le 18 ]; then
 133   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
 134     utils/create_split_dir.pl \
 135      /export/b0{5,6,7,8}/$USER/kaldi-data/egs/babel-$(date +'%m_%d_%H_%M')/s5d/$RANDOM/$dir/egs/storage $dir/egs/storage
 136   fi
 137   [ ! -d $dir/egs ] && mkdir -p $dir/egs/
 138   touch $dir/egs/.nodelete # keep egs around when that run dies.
 139
 140  steps/nnet3/chain/train.py --stage $train_stage \
 141     --cmd "$decode_cmd" \
 142     --feat.online-ivector-dir $train_ivector_dir \
 143     --feat.cmvn-opts "--norm-means=false --norm-vars=false" \
 144     --chain.xent-regularize 0.1 \
 145     --chain.leaky-hmm-coefficient 0.1 \
 146     --chain.l2-regularize 0.00005 \
 147     --chain.apply-deriv-weights false \
 148     --chain.lm-opts="--num-extra-lm-states=2000" \
 149     --egs.dir "$common_egs_dir" \
 150     --egs.opts "--frames-overlap-per-eg 0" \
 151     --egs.chunk-width 150 \
 152     --trainer.num-chunk-per-minibatch 128 \
 153     --trainer.frames-per-iter 1500000 \
 154     --trainer.num-epochs 2 \
 155     --trainer.optimization.num-jobs-initial 2 \
 156     --trainer.optimization.num-jobs-final 6 \
 157     --trainer.optimization.initial-effective-lrate 0.001 \
 158     --trainer.optimization.final-effective-lrate 0.0001 \
 159     --trainer.max-param-change 2.0 \
 160     --cleanup.remove-egs true \
 161     --feat-dir $train_data_dir \
 162     --tree-dir $tree_dir \
 163     --lat-dir $lat_dir \
 164     --dir $dir
 165 fi
 166
 167
 168
 169 if [ $stage -le 19 ]; then
 170   # Note: it might appear that this data/lang_chain directory is mismatched, and it is as
 171   # far as the 'topo' is concerned, but this script doesn't read the 'topo' from
 172   # the lang directory.
 173   utils/mkgraph.sh --left-biphone --self-loop-scale 1.0 data/langp_test $dir $dir/graph
 174 fi
 175
 176 exit 0