1 #!/bin/bash -u
3 . ./cmd.sh
4 . ./path.sh
6 # MDM - Multiple Distant Microphones,
7 nmics=8 #we use all 8 channels, possible other options are 2 and 4
8 mic=mdm$nmics
10 # Path where AMI gets downloaded (or where locally available):
11 AMI_DIR=$PWD/wav_db # Default,
12 case $(hostname -d) in
13 fit.vutbr.cz) AMI_DIR=/mnt/matylda5/iveselyk/KALDI_AMI_WAV ;; # BUT,
14 clsp.jhu.edu) AMI_DIR=/export/corpora4/ami/amicorpus ;; # JHU,
15 cstr.ed.ac.uk) AMI_DIR= ;; # Edinburgh,
16 esac
18 # MDM_DIR is directory for beamformed waves,
19 MDM_DIR=$AMI_DIR/beamformed # Default,
20 #MDM_DIR=/disk/data1/s1136550/ami/mdm # Edinburgh,
22 [ ! -r data/local/lm/final_lm ] && echo "Please, run 'run_prepare_shared.sh' first!" && exit 1
23 final_lm=`cat data/local/lm/final_lm`
24 LM=$final_lm.pr1-7
26 stage=0
27 . utils/parse_options.sh
29 # Set bash to 'debug' mode, it prints the commands (option '-x') and exits on :
30 # -e 'error', -u 'undefined variable', -o pipefail 'error in pipeline',
31 set -euxo pipefail
33 # Download AMI corpus (distant channels), You need around 130GB of free space to get whole data ihm+mdm,
34 # Avoiding re-download, using 'wget --continue ...',
35 if [ $stage -le 0 ]; then
36 [ -e data/local/downloads/wget_mdm.sh ] && \
37 echo "data/local/downloads/wget_mdm.sh already exists, better quit than re-download... (use --stage N)" && \
38 exit 1
39 local/ami_download.sh mdm $AMI_DIR
40 fi
42 # Beamform-it!
43 if [ $stage -le 1 ]; then
44 ! hash BeamformIt && echo "Missing BeamformIt, run 'cd ../../../tools/; make beamformit;'" && exit 1
45 local/ami_beamform.sh --cmd "$train_cmd" --nj 20 $nmics $AMI_DIR $MDM_DIR
46 fi
48 # Prepare mdm data directories,
49 if [ $stage -le 2 ]; then
50 local/ami_mdm_data_prep.sh $MDM_DIR $mic
51 local/ami_mdm_scoring_data_prep.sh $MDM_DIR $mic dev
52 local/ami_mdm_scoring_data_prep.sh $MDM_DIR $mic eval
53 fi
55 # Here starts the normal recipe, which is mostly shared across mic scenarios,
56 # - for ihm we adapt to speaker by fMLLR,
57 # - for sdm and mdm we do not adapt for speaker, but for environment only (cmn),
59 # Feature extraction,
60 if [ $stage -le 3 ]; then
61 for dset in train dev eval; do
62 steps/make_mfcc.sh --nj 15 --cmd "$train_cmd" data/$mic/$dset data/$mic/$dset/log data/$mic/$dset/data
63 steps/compute_cmvn_stats.sh data/$mic/$dset data/$mic/$dset/log data/$mic/$dset/data
64 done
65 for dset in train eval dev; do utils/fix_data_dir.sh data/$mic/$dset; done
66 fi
68 if [ $stage -le 4 ]; then
69 # Taking a subset, now unused, can be handy for quick experiments,
70 # Full set 77h, reduced set 10.8h,
71 utils/subset_data_dir.sh data/$mic/train 15000 data/$mic/train_15k
72 fi
74 # Train systems,
75 nj=30 # number of parallel jobs,
76 nj_dev=$(cat data/$mic/dev/spk2utt | wc -l)
77 nj_eval=$(cat data/$mic/eval/spk2utt | wc -l)
79 if [ $stage -le 5 ]; then
80 # Mono,
81 steps/train_mono.sh --nj $nj --cmd "$train_cmd" --cmvn-opts "--norm-means=true --norm-vars=false" \
82 data/$mic/train data/lang exp/$mic/mono
83 steps/align_si.sh --nj $nj --cmd "$train_cmd" \
84 data/$mic/train data/lang exp/$mic/mono exp/$mic/mono_ali
86 # Deltas,
87 steps/train_deltas.sh --cmd "$train_cmd" --cmvn-opts "--norm-means=true --norm-vars=false" \
88 5000 80000 data/$mic/train data/lang exp/$mic/mono_ali exp/$mic/tri1
89 steps/align_si.sh --nj $nj --cmd "$train_cmd" \
90 data/$mic/train data/lang exp/$mic/tri1 exp/$mic/tri1_ali
91 fi
93 if [ $stage -le 6 ]; then
94 # Deltas again, (full train-set),
95 steps/train_deltas.sh --cmd "$train_cmd" --cmvn-opts "--norm-means=true --norm-vars=false" \
96 5000 80000 data/$mic/train data/lang exp/$mic/tri1_ali exp/$mic/tri2a
97 steps/align_si.sh --nj $nj --cmd "$train_cmd" \
98 data/$mic/train data/lang exp/$mic/tri2a exp/$mic/tri2_ali
99 # Decode,
100 graph_dir=exp/$mic/tri2a/graph_${LM}
101 $highmem_cmd $graph_dir/mkgraph.log \
102 utils/mkgraph.sh data/lang_${LM} exp/$mic/tri2a $graph_dir
103 steps/decode.sh --nj $nj_dev --cmd "$decode_cmd" --config conf/decode.conf \
104 $graph_dir data/$mic/dev exp/$mic/tri2a/decode_dev_${LM}
105 steps/decode.sh --nj $nj_eval --cmd "$decode_cmd" --config conf/decode.conf \
106 $graph_dir data/$mic/eval exp/$mic/tri2a/decode_eval_${LM}
107 fi
109 # THE TARGET LDA+MLLT+SAT+BMMI PART GOES HERE:
111 if [ $stage -le 7 ]; then
112 # Train tri3a, which is LDA+MLLT,
113 steps/train_lda_mllt.sh --cmd "$train_cmd" \
114 --splice-opts "--left-context=3 --right-context=3" \
115 5000 80000 data/$mic/train data/lang exp/$mic/tri2_ali exp/$mic/tri3a
116 # Decode,
117 graph_dir=exp/$mic/tri3a/graph_${LM}
118 $highmem_cmd $graph_dir/mkgraph.log \
119 utils/mkgraph.sh data/lang_${LM} exp/$mic/tri3a $graph_dir
120 steps/decode.sh --nj $nj_dev --cmd "$decode_cmd" --config conf/decode.conf \
121 $graph_dir data/$mic/dev exp/$mic/tri3a/decode_dev_${LM}
122 steps/decode.sh --nj $nj_eval --cmd "$decode_cmd" --config conf/decode.conf \
123 $graph_dir data/$mic/eval exp/$mic/tri3a/decode_eval_${LM}
124 fi
126 # skip SAT, and build MMI models
127 nj_mmi=80
128 if [ $stage -le 8 ]; then
129 steps/align_si.sh --nj $nj_mmi --cmd "$train_cmd" \
130 data/$mic/train data/lang exp/$mic/tri3a exp/$mic/tri3a_ali
131 fi
133 # At this point you can already run the DNN script:
134 # local/nnet/run_dnn_lda_mllt.sh $mic
135 # exit 0
137 if [ $stage -le 9 ]; then
138 steps/make_denlats.sh --nj $nj_mmi --cmd "$decode_cmd" --config conf/decode.conf \
139 data/$mic/train data/lang exp/$mic/tri3a exp/$mic/tri3a_denlats
140 fi
142 # 4 iterations of MMI seems to work well overall. The number of iterations is
143 # used as an explicit argument even though train_mmi.sh will use 4 iterations by
144 # default.
145 if [ $stage -le 10 ]; then
146 num_mmi_iters=4
147 steps/train_mmi.sh --cmd "$train_cmd" --boost 0.1 --num-iters $num_mmi_iters \
148 data/$mic/train data/lang exp/$mic/tri3a_ali exp/$mic/tri3a_denlats \
149 exp/$mic/tri3a_mmi_b0.1
150 fi
151 if [ $stage -le 11 ]; then
152 # Decode,
153 graph_dir=exp/$mic/tri3a/graph_${LM}
154 for i in 4 3 2 1; do
155 decode_dir=exp/$mic/tri3a_mmi_b0.1/decode_dev_${i}.mdl_${LM}
156 steps/decode.sh --nj $nj_dev --cmd "$decode_cmd" --config conf/decode.conf \
157 --iter $i $graph_dir data/$mic/dev $decode_dir
158 decode_dir=exp/$mic/tri3a_mmi_b0.1/decode_eval_${i}.mdl_${LM}
159 steps/decode.sh --nj $nj_eval --cmd "$decode_cmd" --config conf/decode.conf \
160 --iter $i $graph_dir data/$mic/eval $decode_dir
161 done
162 fi
164 # DNN training. This script is based on egs/swbd/s5b/local/run_dnn.sh
165 # Some of them would be out of date.
166 if [ $stage -le 12 ]; then
167 local/nnet/run_dnn_lda_mllt.sh $mic
168 fi
170 # nnet3 systems
171 if [ $stage -le 13 ]; then
172 # Slightly better WERs can be obtained by using --use-sat-alignments true
173 # however the SAT systems have to be built before that
175 #TDNN model + xent training
176 local/nnet3/run_tdnn.sh \
177 --mic $mic \
178 --use-sat-alignments false
180 #LSTM model + xent training
181 local/nnet3/run_lstm.sh \
182 --mic $mic \
183 --stage 10 \
184 --train-stage -5 \
185 --use-sat-alignments false
187 #BLSTM model + xent training
188 local/nnet3/run_blstm.sh \
189 --mic $mic \
190 --stage 10 \
191 --train-stage -5 \
192 --use-sat-alignments false
194 # TDNN model + chain training
195 local/chain/run_tdnn_ami_5.sh --mic sdm1 --affix msl1.5_45wer
197 fi
198 echo "Done."
203 # Older nnet2 scripts. They are still kept here
204 # as we have not yet committed sMBR training scripts for AMI in nnet3
205 #if [ $stage -le 13 ]; then
206 # local/online/run_nnet2_ms_perturbed.sh \
207 # --mic $mic \
208 # --hidden-dim 850 \
209 # --splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer2/-3:3 layer3/-7:2 layer4/-3:3" \
210 # --use-sat-alignments false
211 #
212 # local/online/run_nnet2_ms_sp_disc.sh \
213 # --mic $mic \
214 # --gmm-dir exp/$mic/tri3a \
215 # --srcdir exp/$mic/nnet2_online/nnet_ms_sp
216 #fi
217 #
218 echo "Done."