1 #!/bin/bash
3 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
4 ## This relates to the queue.
6 # This is a shell script, but it's recommended that you run the commands one by
7 # one by copying and pasting into the shell.
9 #wsj0=/ais/gobi2/speech/WSJ/csr_?_senn_d?
10 #wsj1=/ais/gobi2/speech/WSJ/csr_senn_d?
12 #wsj0=/mnt/matylda2/data/WSJ0
13 #wsj1=/mnt/matylda2/data/WSJ1
15 #wsj0=/data/corpora0/LDC93S6B
16 #wsj1=/data/corpora0/LDC94S13B
18 wsj0=/export/corpora5/LDC/LDC93S6B
19 wsj1=/export/corpora5/LDC/LDC94S13B
21 local/wsj_data_prep.sh $wsj0/??-{?,??}.? $wsj1/??-{?,??}.? || exit 1;
23 # Sometimes, we have seen WSJ distributions that do not have subdirectories
24 # like '11-13.1', but instead have 'doc', 'si_et_05', etc. directly under the
25 # wsj0 or wsj1 directories. In such cases, try the following:
26 #
27 # corpus=/exports/work/inf_hcrc_cstr_general/corpora/wsj
28 # local/cstr_wsj_data_prep.sh $corpus
29 # rm data/local/dict/lexiconp.txt
30 # $corpus must contain a 'wsj0' and a 'wsj1' subdirectory for this to work.
31 #
32 # "nosp" refers to the dictionary before silence probabilities and pronunciation
33 # probabilities are added.
34 local/wsj_prepare_dict.sh --dict-suffix "_nosp" || exit 1;
36 utils/prepare_lang.sh data/local/dict_nosp \
37 "<SPOKEN_NOISE>" data/local/lang_tmp_nosp data/lang_nosp || exit 1;
39 local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1;
41 # We suggest to run the next three commands in the background,
42 # as they are not a precondition for the system building and
43 # most of the tests: these commands build a dictionary
44 # containing many of the OOVs in the WSJ LM training data,
45 # and an LM trained directly on that data (i.e. not just
46 # copying the arpa files from the disks from LDC).
47 # Caution: the commands below will only work if $decode_cmd
48 # is setup to use qsub. Else, just remove the --cmd option.
49 # NOTE: If you have a setup corresponding to the older cstr_wsj_data_prep.sh style,
50 # use local/cstr_wsj_extend_dict.sh --dict-suffix "_nosp" $corpus/wsj1/doc/ instead.
51 (
52 local/wsj_extend_dict.sh --dict-suffix "_nosp" $wsj1/13-32.1 && \
53 utils/prepare_lang.sh data/local/dict_nosp_larger \
54 "<SPOKEN_NOISE>" data/local/lang_tmp_nosp_larger data/lang_nosp_bd && \
55 local/wsj_train_lms.sh --dict-suffix "_nosp" &&
56 local/wsj_format_local_lms.sh --lang-suffix "_nosp" # &&
57 ) &
59 # Now make MFCC features.
60 # mfccdir should be some place with a largish disk where you
61 # want to store MFCC features.
63 for x in test_eval92 test_eval93 test_dev93 train_si284; do
64 steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 data/$x || exit 1;
65 steps/compute_cmvn_stats.sh data/$x || exit 1;
66 done
68 utils/subset_data_dir.sh --first data/train_si284 7138 data/train_si84 || exit 1
70 # Now make subset with the shortest 2k utterances from si-84.
71 utils/subset_data_dir.sh --shortest data/train_si84 2000 data/train_si84_2kshort || exit 1;
73 # Now make subset with half of the data from si-84.
74 utils/subset_data_dir.sh data/train_si84 3500 data/train_si84_half || exit 1;
77 # Note: the --boost-silence option should probably be omitted by default
78 # for normal setups. It doesn't always help. [it's to discourage non-silence
79 # models from modeling silence.]
80 steps/train_mono.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
81 data/train_si84_2kshort data/lang_nosp exp/mono0a || exit 1;
83 (
84 utils/mkgraph.sh data/lang_nosp_test_tgpr \
85 exp/mono0a exp/mono0a/graph_nosp_tgpr && \
86 steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/mono0a/graph_nosp_tgpr \
87 data/test_dev93 exp/mono0a/decode_nosp_tgpr_dev93 && \
88 steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/mono0a/graph_nosp_tgpr \
89 data/test_eval92 exp/mono0a/decode_nosp_tgpr_eval92
90 ) &
92 steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
93 data/train_si84_half data/lang_nosp exp/mono0a exp/mono0a_ali || exit 1;
95 steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" 2000 10000 \
96 data/train_si84_half data/lang_nosp exp/mono0a_ali exp/tri1 || exit 1;
98 while [ ! -f data/lang_nosp_test_tgpr/tmp/LG.fst ] || \
99 [ -z data/lang_nosp_test_tgpr/tmp/LG.fst ]; do
100 sleep 20;
101 done
102 sleep 30;
103 # or the mono mkgraph.sh might be writing
104 # data/lang_test_tgpr/tmp/LG.fst which will cause this to fail.
106 utils/mkgraph.sh data/lang_nosp_test_tgpr \
107 exp/tri1 exp/tri1/graph_nosp_tgpr || exit 1;
109 steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgpr \
110 data/test_dev93 exp/tri1/decode_nosp_tgpr_dev93 || exit 1;
111 steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgpr \
112 data/test_eval92 exp/tri1/decode_nosp_tgpr_eval92 || exit 1;
114 # test various modes of LM rescoring (4 is the default one).
115 # This is just confirming they're equivalent.
116 for mode in 1 2 3 4; do
117 steps/lmrescore.sh --mode $mode --cmd "$decode_cmd" \
118 data/lang_nosp_test_{tgpr,tg} data/test_dev93 \
119 exp/tri1/decode_nosp_tgpr_dev93 \
120 exp/tri1/decode_nosp_tgpr_dev93_tg$mode || exit 1;
121 done
124 ## the following command demonstrates how to get lattices that are
125 ## "word-aligned" (arcs coincide with words, with boundaries in the right
126 ## place).
127 #sil_label=`grep '!SIL' data/lang_nosp_test_tgpr/words.txt | awk '{print $2}'`
128 #steps/word_align_lattices.sh --cmd "$train_cmd" --silence-label $sil_label \
129 # data/lang_nosp_test_tgpr exp/tri1/decode_nosp_tgpr_dev93 \
130 # exp/tri1/decode_nosp_tgpr_dev93_aligned || exit 1;
132 steps/align_si.sh --nj 10 --cmd "$train_cmd" \
133 data/train_si84 data/lang_nosp exp/tri1 exp/tri1_ali_si84 || exit 1;
135 steps/train_lda_mllt.sh --cmd "$train_cmd" \
136 --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
137 data/train_si84 data/lang_nosp exp/tri1_ali_si84 exp/tri2b || exit 1;
139 utils/mkgraph.sh data/lang_nosp_test_tgpr \
140 exp/tri2b exp/tri2b/graph_nosp_tgpr || exit 1;
141 steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgpr \
142 data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93 || exit 1;
143 steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgpr \
144 data/test_eval92 exp/tri2b/decode_nosp_tgpr_eval92 || exit 1;
146 # At this point, you could run the example scripts that show how VTLN works.
147 # We haven't included this in the default recipes yet.
148 # local/run_vtln.sh --lang-suffix "_nosp"
149 # local/run_vtln2.sh --lang-suffix "_nosp"
151 # Now, with dev93, compare lattice rescoring with biglm decoding,
152 # going from tgpr to tg. Note: results are not the same, even though they should
153 # be, and I believe this is due to the beams not being wide enough. The pruning
154 # seems to be a bit too narrow in the current scripts (got at least 0.7% absolute
155 # improvement from loosening beams from their current values).
157 steps/decode_biglm.sh --nj 10 --cmd "$decode_cmd" \
158 exp/tri2b/graph_nosp_tgpr data/lang_test_{tgpr,tg}/G.fst \
159 data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93_tg_biglm
161 # baseline via LM rescoring of lattices.
162 steps/lmrescore.sh --cmd "$decode_cmd" \
163 data/lang_nosp_test_tgpr/ data/lang_nosp_test_tg/ \
164 data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93 \
165 exp/tri2b/decode_nosp_tgpr_dev93_tg || exit 1;
167 # Trying Minimum Bayes Risk decoding (like Confusion Network decoding):
168 mkdir exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr
169 cp exp/tri2b/decode_nosp_tgpr_dev93_tg/lat.*.gz \
170 exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr
171 local/score_mbr.sh --cmd "$decode_cmd" \
172 data/test_dev93/ data/lang_nosp_test_tgpr/ \
173 exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr
175 # This script trains a delta+delta-delta system. It's not really recommended or
176 # necessary, but it does contain a demonstration of the decode_fromlats.sh
177 # script which isn't used elsewhere.
178 # local/run_deltas.sh
180 # Align tri2b system with si84 data.
181 steps/align_si.sh --nj 10 --cmd "$train_cmd" \
182 --use-graphs true data/train_si84 \
183 data/lang_nosp exp/tri2b exp/tri2b_ali_si84 || exit 1;
185 local/run_mmi_tri2b.sh --lang-suffix "_nosp"
187 # From 2b system, train 3b which is LDA + MLLT + SAT.
188 steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
189 data/train_si84 data/lang_nosp exp/tri2b_ali_si84 exp/tri3b || exit 1;
190 utils/mkgraph.sh data/lang_nosp_test_tgpr \
191 exp/tri3b exp/tri3b/graph_nosp_tgpr || exit 1;
192 steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
193 exp/tri3b/graph_nosp_tgpr data/test_dev93 \
194 exp/tri3b/decode_nosp_tgpr_dev93 || exit 1;
195 steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
196 exp/tri3b/graph_nosp_tgpr data/test_eval92 \
197 exp/tri3b/decode_nosp_tgpr_eval92 || exit 1;
199 # At this point you could run the command below; this gets
200 # results that demonstrate the basis-fMLLR adaptation (adaptation
201 # on small amounts of adaptation data).
202 local/run_basis_fmllr.sh --lang-suffix "_nosp"
204 steps/lmrescore.sh --cmd "$decode_cmd" \
205 data/lang_nosp_test_tgpr data/lang_nosp_test_tg \
206 data/test_dev93 exp/tri3b/decode_nosp_tgpr_dev93 \
207 exp/tri3b/decode_nosp_tgpr_dev93_tg || exit 1;
208 steps/lmrescore.sh --cmd "$decode_cmd" \
209 data/lang_nosp_test_tgpr data/lang_nosp_test_tg \
210 data/test_eval92 exp/tri3b/decode_nosp_tgpr_eval92 \
211 exp/tri3b/decode_nosp_tgpr_eval92_tg || exit 1;
213 # Trying the larger dictionary ("big-dict"/bd) + locally produced LM.
214 utils/mkgraph.sh data/lang_nosp_test_bd_tgpr \
215 exp/tri3b exp/tri3b/graph_nosp_bd_tgpr || exit 1;
217 steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 8 \
218 exp/tri3b/graph_nosp_bd_tgpr data/test_eval92 \
219 exp/tri3b/decode_nosp_bd_tgpr_eval92 || exit 1;
220 steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 \
221 exp/tri3b/graph_nosp_bd_tgpr data/test_dev93 \
222 exp/tri3b/decode_nosp_bd_tgpr_dev93 || exit 1;
224 # Example of rescoring with ConstArpaLm.
225 steps/lmrescore_const_arpa.sh \
226 --cmd "$decode_cmd" data/lang_nosp_test_bd_{tgpr,fgconst} \
227 data/test_eval92 exp/tri3b/decode_nosp_bd_tgpr_eval92{,_fgconst} || exit 1;
229 steps/lmrescore.sh --cmd "$decode_cmd" \
230 data/lang_nosp_test_bd_tgpr data/lang_nosp_test_bd_fg \
231 data/test_eval92 exp/tri3b/decode_nosp_bd_tgpr_eval92 \
232 exp/tri3b/decode_nosp_bd_tgpr_eval92_fg || exit 1;
233 steps/lmrescore.sh --cmd "$decode_cmd" \
234 data/lang_nosp_test_bd_tgpr data/lang_nosp_test_bd_tg \
235 data/test_eval92 exp/tri3b/decode_nosp_bd_tgpr_eval92 \
236 exp/tri3b/decode_nosp_bd_tgpr_eval92_tg || exit 1;
238 # The following two steps, which are a kind of side-branch, try mixing up
239 ( # from the 3b system. This is to demonstrate that script.
240 steps/mixup.sh --cmd "$train_cmd" \
241 20000 data/train_si84 data/lang_nosp exp/tri3b exp/tri3b_20k || exit 1;
242 steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 \
243 exp/tri3b/graph_nosp_tgpr data/test_dev93 \
244 exp/tri3b_20k/decode_nosp_tgpr_dev93 || exit 1;
245 )
247 # From 3b system, align all si284 data.
248 steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
249 data/train_si284 data/lang_nosp exp/tri3b exp/tri3b_ali_si284 || exit 1;
252 # From 3b system, train another SAT system (tri4a) with all the si284 data.
254 steps/train_sat.sh --cmd "$train_cmd" 4200 40000 \
255 data/train_si284 data/lang_nosp exp/tri3b_ali_si284 exp/tri4a || exit 1;
256 (
257 utils/mkgraph.sh data/lang_nosp_test_tgpr \
258 exp/tri4a exp/tri4a/graph_nosp_tgpr || exit 1;
259 steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
260 exp/tri4a/graph_nosp_tgpr data/test_dev93 \
261 exp/tri4a/decode_nosp_tgpr_dev93 || exit 1;
262 steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
263 exp/tri4a/graph_nosp_tgpr data/test_eval92 \
264 exp/tri4a/decode_nosp_tgpr_eval92 || exit 1;
265 ) &
268 # This step is just to demonstrate the train_quick.sh script, in which we
269 # initialize the GMMs from the old system's GMMs.
270 steps/train_quick.sh --cmd "$train_cmd" 4200 40000 \
271 data/train_si284 data/lang_nosp exp/tri3b_ali_si284 exp/tri4b || exit 1;
273 (
274 utils/mkgraph.sh data/lang_nosp_test_tgpr \
275 exp/tri4b exp/tri4b/graph_nosp_tgpr || exit 1;
276 steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
277 exp/tri4b/graph_nosp_tgpr data/test_dev93 \
278 exp/tri4b/decode_nosp_tgpr_dev93 || exit 1;
279 steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
280 exp/tri4b/graph_nosp_tgpr data/test_eval92 \
281 exp/tri4b/decode_nosp_tgpr_eval92 || exit 1;
283 utils/mkgraph.sh data/lang_nosp_test_bd_tgpr \
284 exp/tri4b exp/tri4b/graph_nosp_bd_tgpr || exit 1;
285 steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
286 exp/tri4b/graph_nosp_bd_tgpr data/test_dev93 \
287 exp/tri4b/decode_nosp_bd_tgpr_dev93 || exit 1;
288 steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
289 exp/tri4b/graph_nosp_bd_tgpr data/test_eval92 \
290 exp/tri4b/decode_nosp_bd_tgpr_eval92 || exit 1;
291 ) &
293 # Silprob for normal lexicon.
294 steps/get_prons.sh --cmd "$train_cmd" \
295 data/train_si284 data/lang_nosp exp/tri4b || exit 1;
296 utils/dict_dir_add_pronprobs.sh --max-normalize true \
297 data/local/dict_nosp \
298 exp/tri4b/pron_counts_nowb.txt exp/tri4b/sil_counts_nowb.txt \
299 exp/tri4b/pron_bigram_counts_nowb.txt data/local/dict || exit 1
301 utils/prepare_lang.sh data/local/dict \
302 "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1;
304 for lm_suffix in bg bg_5k tg tg_5k tgpr tgpr_5k; do
305 mkdir -p data/lang_test_${lm_suffix}
306 cp -r data/lang/* data/lang_test_${lm_suffix}/ || exit 1;
307 rm -rf data/lang_test_${lm_suffix}/tmp
308 cp data/lang_nosp_test_${lm_suffix}/G.* data/lang_test_${lm_suffix}/
309 done
311 # Silprob for larger lexicon.
312 utils/dict_dir_add_pronprobs.sh --max-normalize true \
313 data/local/dict_nosp_larger \
314 exp/tri4b/pron_counts_nowb.txt exp/tri4b/sil_counts_nowb.txt \
315 exp/tri4b/pron_bigram_counts_nowb.txt data/local/dict_larger || exit 1
317 utils/prepare_lang.sh data/local/dict_larger \
318 "<SPOKEN_NOISE>" data/local/lang_tmp_larger data/lang_bd || exit 1;
320 for lm_suffix in tgpr tgconst tg fgpr fgconst fg; do
321 mkdir -p data/lang_test_bd_${lm_suffix}
322 cp -r data/lang_bd/* data/lang_test_bd_${lm_suffix}/ || exit 1;
323 rm -rf data/lang_test_bd_${lm_suffix}/tmp
324 cp data/lang_nosp_test_bd_${lm_suffix}/G.* data/lang_test_bd_${lm_suffix}/
325 done
327 (
328 utils/mkgraph.sh data/lang_test_tgpr exp/tri4b exp/tri4b/graph_tgpr || exit 1;
329 steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
330 exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b/decode_tgpr_dev93 || exit 1;
331 steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
332 exp/tri4b/graph_tgpr data/test_eval92 exp/tri4b/decode_tgpr_eval92 || exit 1;
334 utils/mkgraph.sh data/lang_test_bd_tgpr \
335 exp/tri4b exp/tri4b/graph_bd_tgpr || exit 1;
336 steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
337 exp/tri4b/graph_bd_tgpr data/test_dev93 \
338 exp/tri4b/decode_bd_tgpr_dev93 || exit 1;
339 steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
340 exp/tri4b/graph_bd_tgpr data/test_eval92 \
341 exp/tri4b/decode_bd_tgpr_eval92 || exit 1;
342 ) &
345 # Train and test MMI, and boosted MMI, on tri4b (LDA+MLLT+SAT on
346 # all the data). Use 30 jobs.
347 steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
348 data/train_si284 data/lang exp/tri4b exp/tri4b_ali_si284 || exit 1;
350 # These demonstrate how to build a sytem usable for online-decoding with the nnet2 setup.
351 # (see local/run_nnet2.sh for other, non-online nnet2 setups).
352 local/online/run_nnet2.sh
353 local/online/run_nnet2_baseline.sh
354 local/online/run_nnet2_discriminative.sh
356 # Demonstration of RNNLM rescoring on TDNN models. We comment this out by
357 # default.
358 # local/run_rnnlms.sh
360 local/run_mmi_tri4b.sh
362 #local/run_nnet2.sh
364 ## Segregated some SGMM builds into a separate file.
365 #local/run_sgmm.sh
367 # You probably want to run the sgmm2 recipe as it's generally a bit better:
368 local/run_sgmm2.sh
370 # We demonstrate MAP adaptation of GMMs to gender-dependent systems here. This also serves
371 # as a generic way to demonstrate MAP adaptation to different domains.
372 # local/run_gender_dep.sh
374 # You probably want to run the hybrid recipe as it is complementary:
375 local/nnet/run_dnn.sh
377 # The following demonstrate how to re-segment long audios.
378 # local/run_segmentation.sh
380 # The next two commands show how to train a bottleneck network based on the nnet2 setup,
381 # and build an SGMM system on top of it.
382 #local/run_bnf.sh
383 #local/run_bnf_sgmm.sh
386 # You probably want to try KL-HMM
387 #local/run_kl_hmm.sh
389 # Getting results [see RESULTS file]
390 # for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
393 # KWS setup. We leave it commented out by default
395 # $duration is the length of the search collection, in seconds
396 #duration=`feat-to-len scp:data/test_eval92/feats.scp ark,t:- | awk '{x+=$2} END{print x/100;}'`
397 #local/generate_example_kws.sh data/test_eval92/ data/kws/
398 #local/kws_data_prep.sh data/lang_test_bd_tgpr/ data/test_eval92/ data/kws/
399 #
400 #steps/make_index.sh --cmd "$decode_cmd" --acwt 0.1 \
401 # data/kws/ data/lang_test_bd_tgpr/ \
402 # exp/tri4b/decode_bd_tgpr_eval92/ \
403 # exp/tri4b/decode_bd_tgpr_eval92/kws
404 #
405 #steps/search_index.sh --cmd "$decode_cmd" \
406 # data/kws \
407 # exp/tri4b/decode_bd_tgpr_eval92/kws
408 #
409 # If you want to provide the start time for each utterance, you can use the --segments
410 # option. In WSJ each file is an utterance, so we don't have to set the start time.
411 #cat exp/tri4b/decode_bd_tgpr_eval92/kws/result.* | \
412 # utils/write_kwslist.pl --flen=0.01 --duration=$duration \
413 # --normalize=true --map-utter=data/kws/utter_map \
414 # - exp/tri4b/decode_bd_tgpr_eval92/kws/kwslist.xml
416 # # A couple of nnet3 recipes:
417 # local/nnet3/run_tdnn_baseline.sh # designed for exact comparison with nnet2 recipe
418 # local/nnet3/run_tdnn.sh # better absolute results
419 # local/nnet3/run_lstm.sh # lstm recipe
420 # bidirectional lstm recipe
421 # local/nnet3/run_lstm.sh --affix bidirectional \
422 # --lstm-delay " [-1,1] [-2,2] [-3,3] " \
423 # --label-delay 0 \
424 # --cell-dim 640 \
425 # --recurrent-projection-dim 128 \
426 # --non-recurrent-projection-dim 128 \
427 # --chunk-left-context 40 \
428 # --chunk-right-context 40