egs/ami/s5/run_mdm.sh

   1 #!/bin/bash -u
   2
   3 . ./cmd.sh
   4 . ./path.sh
   5
   6 # MDM - Multiple Distant Microphones,
   7 nmics=8 #we use all 8 channels, possible other options are 2 and 4
   8 mic=mdm$nmics
   9
  10 # Path where AMI gets downloaded (or where locally available):
  11 #AMI_DIR=$PWD/wav_db # Default,
  12 AMI_DIR=/export/ws15-ffs-data/corpora/ami # JSALT2015 workshop, cluster AWS-EC2,
  13
  14 # MDM_DIR is directory for beamformed waves,
  15 #MDM_DIR=/disk/data1/s1136550/ami/mdm # [Edinburgh]
  16 MDM_DIR=$AMI_DIR/beamformed # [Default]
  17
  18 [ ! -r data/local/lm/final_lm ] && echo "Please, run 'run_prepare_shared.sh' first!" && exit 1
  19 final_lm=`cat data/local/lm/final_lm`
  20 LM=$final_lm.pr1-7
  21
  22 stage=0
  23 . utils/parse_options.sh
  24
  25 # Set bash to 'debug' mode, it will exit on :
  26 # -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
  27 set -e
  28 set -u
  29 set -o pipefail
  30 set -x
  31
  32 # Download AMI corpus (distant channels), You need around 130GB of free space to get whole data ihm+mdm,
  33 if [ $stage -le 0 ]; then
  34   [ -e data/local/downloads/wget_mdm.sh ] && \
  35     echo "data/local/downloads/wget_mdm.sh already exists, better quit than re-download... (use --stage N)" && \
  36     exit 1
  37   local/ami_download.sh mdm $AMI_DIR
  38 fi
  39
  40 # Beamform-it!
  41 if [ $stage -le 1 ]; then
  42   ! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/; make beamformit;'" && exit 1
  43   local/ami_beamform.sh --cmd "$train_cmd" --nj 20 $nmics $AMI_DIR $MDM_DIR
  44 fi
  45
  46 # Prepare mdm data directories,
  47 if [ $stage -le 2 ]; then
  48   local/ami_mdm_data_prep.sh $MDM_DIR $mic
  49   local/ami_mdm_scoring_data_prep.sh $MDM_DIR $mic dev
  50   local/ami_mdm_scoring_data_prep.sh $MDM_DIR $mic eval
  51 fi
  52
  53 # Here starts the normal recipe, which is mostly shared across mic scenarios,
  54 # - for ihm we adapt to speaker by fMLLR,
  55 # - for sdm and mdm we do not adapt for speaker, but for environment only (cmn),
  56
  57 # Feature extraction,
  58 if [ $stage -le 3 ]; then
  59   for dset in train dev eval; do
  60     steps/make_mfcc.sh --nj 15 --cmd "$train_cmd" data/$mic/$dset data/$mic/$dset/log data/$mic/$dset/data
  61     steps/compute_cmvn_stats.sh data/$mic/$dset data/$mic/$dset/log data/$mic/$dset/data
  62   done
  63   for dset in train eval dev; do utils/fix_data_dir.sh data/$mic/$dset; done
  64 fi
  65
  66 if [ $stage -le 4 ]; then
  67   # Taking a subset, now unused, can be handy for quick experiments,
  68   # Full set 77h, reduced set 9.5h,
  69   local/remove_dup_utts.sh 20 data/$mic/train data/$mic/train_nodup # remvove uh-huh,
  70   utils/subset_data_dir.sh --shortest data/$mic/train_nodup 30000 data/$mic/train_30k
  71 fi
  72
  73 # Train systems,
  74 nj=30 # number of parallel jobs,
  75 nj_dev=$(cat data/$mic/dev/spk2utt | wc -l)
  76 nj_eval=$(cat data/$mic/eval/spk2utt | wc -l)
  77
  78 if [ $stage -le 5 ]; then
  79   # Mono,
  80   steps/train_mono.sh --nj $nj --cmd "$train_cmd" --cmvn-opts "--norm-means=true --norm-vars=false" \
  81     data/$mic/train data/lang exp/$mic/mono
  82   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  83     data/$mic/train data/lang exp/$mic/mono exp/$mic/mono_ali
  84
  85   # Deltas,
  86   steps/train_deltas.sh --cmd "$train_cmd" --cmvn-opts "--norm-means=true --norm-vars=false" \
  87     5000 80000 data/$mic/train data/lang exp/$mic/mono_ali exp/$mic/tri1
  88   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  89     data/$mic/train data/lang exp/$mic/tri1 exp/$mic/tri1_ali
  90 fi
  91
  92 if [ $stage -le 6 ]; then
  93   # Deltas again, (full train-set),
  94   steps/train_deltas.sh --cmd "$train_cmd" --cmvn-opts "--norm-means=true --norm-vars=false" \
  95     5000 80000 data/$mic/train data/lang exp/$mic/tri1_ali exp/$mic/tri2a
  96   steps/align_si.sh --nj $nj --cmd "$train_cmd" \
  97     data/$mic/train data/lang exp/$mic/tri2a exp/$mic/tri2_ali
  98   # Decode,
  99   graph_dir=exp/$mic/tri2a/graph_${LM}
 100   $highmem_cmd $graph_dir/mkgraph.log \
 101     utils/mkgraph.sh data/lang_${LM} exp/$mic/tri2a $graph_dir
 102   steps/decode.sh --nj $nj_dev --cmd "$decode_cmd" --config conf/decode.conf \
 103     $graph_dir data/$mic/dev exp/$mic/tri2a/decode_dev_${LM}
 104   steps/decode.sh --nj $nj_eval --cmd "$decode_cmd" --config conf/decode.conf \
 105     $graph_dir data/$mic/eval exp/$mic/tri2a/decode_eval_${LM}
 106 fi
 107
 108 # THE TARGET LDA+MLLT+SAT+BMMI PART GOES HERE:
 109
 110 if [ $stage -le 7 ]; then
 111   # Train tri3a, which is LDA+MLLT,
 112   steps/train_lda_mllt.sh --cmd "$train_cmd" \
 113     --splice-opts "--left-context=3 --right-context=3" \
 114     5000 80000 data/$mic/train data/lang exp/$mic/tri2_ali exp/$mic/tri3a
 115   # Decode,
 116   graph_dir=exp/$mic/tri3a/graph_${LM}
 117   $highmem_cmd $graph_dir/mkgraph.log \
 118     utils/mkgraph.sh data/lang_${LM} exp/$mic/tri3a $graph_dir
 119   steps/decode.sh --nj $nj_dev --cmd "$decode_cmd" --config conf/decode.conf \
 120     $graph_dir data/$mic/dev exp/$mic/tri3a/decode_dev_${LM}
 121   steps/decode.sh --nj $nj_eval --cmd "$decode_cmd" --config conf/decode.conf \
 122     $graph_dir data/$mic/eval exp/$mic/tri3a/decode_eval_${LM}
 123 fi
 124
 125 # skip SAT, and build MMI models
 126 nj_mmi=80
 127 if [ $stage -le 8 ]; then
 128   steps/align_si.sh --nj $nj_mmi --cmd "$train_cmd" \
 129     data/$mic/train data/lang exp/$mic/tri3a exp/$mic/tri3a_ali
 130 fi
 131
 132 # At this point you can already run the DNN script:
 133 # local/nnet/run_dnn_lda_mllt.sh $mic
 134 # exit 0
 135
 136 if [ $stage -le 9 ]; then
 137   steps/make_denlats.sh --nj $nj_mmi --cmd "$decode_cmd" --config conf/decode.conf \
 138     data/$mic/train data/lang exp/$mic/tri3a exp/$mic/tri3a_denlats
 139 fi
 140
 141 # 4 iterations of MMI seems to work well overall. The number of iterations is
 142 # used as an explicit argument even though train_mmi.sh will use 4 iterations by
 143 # default.
 144 if [ $stage -le 10 ]; then
 145   num_mmi_iters=4
 146   steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 --num-iters $num_mmi_iters \
 147     data/$mic/train data/lang exp/$mic/tri3a_ali exp/$mic/tri3a_denlats \
 148     exp/$mic/tri3a_mmi_b0.1
 149 fi
 150 if [ $stage -le 11 ]; then
 151   # Decode,
 152   graph_dir=exp/$mic/tri3a/graph_${LM}
 153   for i in 4 3 2 1; do
 154     decode_dir=exp/$mic/tri3a_mmi_b0.1/decode_dev_${i}.mdl_${LM}
 155     steps/decode.sh --nj $nj_dev --cmd "$decode_cmd" --config conf/decode.conf \
 156       --iter $i $graph_dir data/$mic/dev $decode_dir
 157     decode_dir=exp/$mic/tri3a_mmi_b0.1/decode_eval_${i}.mdl_${LM}
 158     steps/decode.sh --nj $nj_eval --cmd "$decode_cmd" --config conf/decode.conf \
 159       --iter $i $graph_dir data/$mic/eval $decode_dir
 160   done
 161 fi
 162
 163 # DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
 164 # Some of them would be out of date.
 165 if [ $stage -le 12 ]; then
 166   local/nnet/run_dnn_lda_mllt.sh $mic
 167 fi
 168
 169 echo "Done."