egs/wsj/s5/run.sh

   1 #!/bin/bash
   2
   3 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
   4            ## This relates to the queue.
   5
   6 # This is a shell script, but it's recommended that you run the commands one by
   7 # one by copying and pasting into the shell.
   8
   9 #wsj0=/ais/gobi2/speech/WSJ/csr_?_senn_d?
  10 #wsj1=/ais/gobi2/speech/WSJ/csr_senn_d?
  11
  12 #wsj0=/mnt/matylda2/data/WSJ0
  13 #wsj1=/mnt/matylda2/data/WSJ1
  14
  15 #wsj0=/data/corpora0/LDC93S6B
  16 #wsj1=/data/corpora0/LDC94S13B
  17
  18 wsj0=/export/corpora5/LDC/LDC93S6B
  19 wsj1=/export/corpora5/LDC/LDC94S13B
  20
  21 local/wsj_data_prep.sh $wsj0/??-{?,??}.? $wsj1/??-{?,??}.?  || exit 1;
  22
  23 # Sometimes, we have seen WSJ distributions that do not have subdirectories
  24 # like '11-13.1', but instead have 'doc', 'si_et_05', etc. directly under the
  25 # wsj0 or wsj1 directories. In such cases, try the following:
  26 #
  27 # corpus=/exports/work/inf_hcrc_cstr_general/corpora/wsj
  28 # local/cstr_wsj_data_prep.sh $corpus
  29 # rm data/local/dict/lexiconp.txt
  30 # $corpus must contain a 'wsj0' and a 'wsj1' subdirectory for this to work.
  31 #
  32 # "nosp" refers to the dictionary before silence probabilities and pronunciation
  33 # probabilities are added.
  34 local/wsj_prepare_dict.sh --dict-suffix "_nosp" || exit 1;
  35
  36 utils/prepare_lang.sh data/local/dict_nosp \
  37   "<SPOKEN_NOISE>" data/local/lang_tmp_nosp data/lang_nosp || exit 1;
  38
  39 local/wsj_format_data.sh --lang-suffix "_nosp" || exit 1;
  40
  41  # We suggest to run the next three commands in the background,
  42  # as they are not a precondition for the system building and
  43  # most of the tests: these commands build a dictionary
  44  # containing many of the OOVs in the WSJ LM training data,
  45  # and an LM trained directly on that data (i.e. not just
  46  # copying the arpa files from the disks from LDC).
  47  # Caution: the commands below will only work if $decode_cmd
  48  # is setup to use qsub.  Else, just remove the --cmd option.
  49  # NOTE: If you have a setup corresponding to the cstr_wsj_data_prep.sh style,
  50  # use local/cstr_wsj_extend_dict.sh $corpus/wsj1/doc/ instead.
  51   (
  52    local/wsj_extend_dict.sh --dict-suffix "_nosp" $wsj1/13-32.1  && \
  53    utils/prepare_lang.sh data/local/dict_nosp_larger \
  54      "<SPOKEN_NOISE>" data/local/lang_tmp_nosp_larger data/lang_nosp_bd && \
  55    local/wsj_train_lms.sh --dict-suffix "_nosp" &&
  56    local/wsj_format_local_lms.sh --lang-suffix "_nosp" # &&
  57   ) &
  58
  59 # Now make MFCC features.
  60 # mfccdir should be some place with a largish disk where you
  61 # want to store MFCC features.
  62 mfccdir=mfcc
  63 for x in test_eval92 test_eval93 test_dev93 train_si284; do
  64  steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 \
  65    data/$x exp/make_mfcc/$x $mfccdir || exit 1;
  66  steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
  67 done
  68
  69 utils/subset_data_dir.sh --first data/train_si284 7138 data/train_si84 || exit 1
  70
  71 # Now make subset with the shortest 2k utterances from si-84.
  72 utils/subset_data_dir.sh --shortest data/train_si84 2000 data/train_si84_2kshort || exit 1;
  73
  74 # Now make subset with half of the data from si-84.
  75 utils/subset_data_dir.sh data/train_si84 3500 data/train_si84_half || exit 1;
  76
  77
  78 # Note: the --boost-silence option should probably be omitted by default
  79 # for normal setups.  It doesn't always help. [it's to discourage non-silence
  80 # models from modeling silence.]
  81 steps/train_mono.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
  82   data/train_si84_2kshort data/lang_nosp exp/mono0a || exit 1;
  83
  84 (
  85  utils/mkgraph.sh --mono data/lang_nosp_test_tgpr \
  86    exp/mono0a exp/mono0a/graph_nosp_tgpr && \
  87  steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/mono0a/graph_nosp_tgpr \
  88    data/test_dev93 exp/mono0a/decode_nosp_tgpr_dev93 && \
  89  steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/mono0a/graph_nosp_tgpr \
  90    data/test_eval92 exp/mono0a/decode_nosp_tgpr_eval92
  91 ) &
  92
  93 steps/align_si.sh --boost-silence 1.25 --nj 10 --cmd "$train_cmd" \
  94   data/train_si84_half data/lang_nosp exp/mono0a exp/mono0a_ali || exit 1;
  95
  96 steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" 2000 10000 \
  97   data/train_si84_half data/lang_nosp exp/mono0a_ali exp/tri1 || exit 1;
  98
  99 while [ ! -f data/lang_nosp_test_tgpr/tmp/LG.fst ] || \
 100    [ -z data/lang_nosp_test_tgpr/tmp/LG.fst ]; do
 101   sleep 20;
 102 done
 103 sleep 30;
 104 # or the mono mkgraph.sh might be writing
 105 # data/lang_test_tgpr/tmp/LG.fst which will cause this to fail.
 106
 107 utils/mkgraph.sh data/lang_nosp_test_tgpr \
 108   exp/tri1 exp/tri1/graph_nosp_tgpr || exit 1;
 109
 110 steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgpr \
 111   data/test_dev93 exp/tri1/decode_nosp_tgpr_dev93 || exit 1;
 112 steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/tri1/graph_nosp_tgpr \
 113   data/test_eval92 exp/tri1/decode_nosp_tgpr_eval92 || exit 1;
 114
 115 # test various modes of LM rescoring (4 is the default one).
 116 # This is just confirming they're equivalent.
 117 for mode in 1 2 3 4; do
 118   steps/lmrescore.sh --mode $mode --cmd "$decode_cmd" \
 119     data/lang_nosp_test_{tgpr,tg} data/test_dev93 \
 120     exp/tri1/decode_nosp_tgpr_dev93 \
 121     exp/tri1/decode_nosp_tgpr_dev93_tg$mode  || exit 1;
 122 done
 123
 124 # demonstrate how to get lattices that are "word-aligned" (arcs coincide with
 125 # words, with boundaries in the right place).
 126 sil_label=`grep '!SIL' data/lang_nosp_test_tgpr/words.txt | awk '{print $2}'`
 127 steps/word_align_lattices.sh --cmd "$train_cmd" --silence-label $sil_label \
 128   data/lang_nosp_test_tgpr exp/tri1/decode_nosp_tgpr_dev93 \
 129   exp/tri1/decode_nosp_tgpr_dev93_aligned || exit 1;
 130
 131 steps/align_si.sh --nj 10 --cmd "$train_cmd" \
 132   data/train_si84 data/lang_nosp exp/tri1 exp/tri1_ali_si84 || exit 1;
 133
 134 # Train tri2a, which is deltas + delta-deltas, on si84 data.
 135 steps/train_deltas.sh --cmd "$train_cmd" 2500 15000 \
 136   data/train_si84 data/lang_nosp exp/tri1_ali_si84 exp/tri2a || exit 1;
 137
 138 utils/mkgraph.sh data/lang_nosp_test_tgpr \
 139   exp/tri2a exp/tri2a/graph_nosp_tgpr || exit 1;
 140
 141 steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri2a/graph_nosp_tgpr \
 142   data/test_dev93 exp/tri2a/decode_nosp_tgpr_dev93 || exit 1;
 143 steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/tri2a/graph_nosp_tgpr \
 144   data/test_eval92 exp/tri2a/decode_nosp_tgpr_eval92 || exit 1;
 145
 146 utils/mkgraph.sh data/lang_nosp_test_bg_5k exp/tri2a exp/tri2a/graph_nosp_bg5k
 147 steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/tri2a/graph_nosp_bg5k \
 148   data/test_eval92 exp/tri2a/decode_nosp_eval92_bg5k || exit 1;
 149
 150 steps/train_lda_mllt.sh --cmd "$train_cmd" \
 151   --splice-opts "--left-context=3 --right-context=3" 2500 15000 \
 152   data/train_si84 data/lang_nosp exp/tri1_ali_si84 exp/tri2b || exit 1;
 153
 154 utils/mkgraph.sh data/lang_nosp_test_tgpr \
 155   exp/tri2b exp/tri2b/graph_nosp_tgpr || exit 1;
 156 steps/decode.sh --nj 10 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgpr \
 157   data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93 || exit 1;
 158 steps/decode.sh --nj 8 --cmd "$decode_cmd" exp/tri2b/graph_nosp_tgpr \
 159   data/test_eval92 exp/tri2b/decode_nosp_tgpr_eval92 || exit 1;
 160
 161 # At this point, you could run the example scripts that show how VTLN works.
 162 # We haven't included this in the default recipes yet.
 163 # local/run_vtln.sh --lang-suffix "_nosp"
 164 # local/run_vtln2.sh --lang-suffix "_nosp"
 165
 166 # Now, with dev93, compare lattice rescoring with biglm decoding,
 167 # going from tgpr to tg.  Note: results are not the same, even though they should
 168 # be, and I believe this is due to the beams not being wide enough.  The pruning
 169 # seems to be a bit too narrow in the current scripts (got at least 0.7% absolute
 170 # improvement from loosening beams from their current values).
 171
 172 steps/decode_biglm.sh --nj 10 --cmd "$decode_cmd" \
 173   exp/tri2b/graph_nosp_tgpr data/lang_test_{tgpr,tg}/G.fst \
 174   data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93_tg_biglm
 175
 176 # baseline via LM rescoring of lattices.
 177 steps/lmrescore.sh --cmd "$decode_cmd" \
 178   data/lang_nosp_test_tgpr/ data/lang_nosp_test_tg/ \
 179   data/test_dev93 exp/tri2b/decode_nosp_tgpr_dev93 \
 180   exp/tri2b/decode_nosp_tgpr_dev93_tg || exit 1;
 181
 182 # Trying Minimum Bayes Risk decoding (like Confusion Network decoding):
 183 mkdir exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr
 184 cp exp/tri2b/decode_nosp_tgpr_dev93_tg/lat.*.gz \
 185   exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr
 186 local/score_mbr.sh --cmd "$decode_cmd" \
 187  data/test_dev93/ data/lang_nosp_test_tgpr/ \
 188  exp/tri2b/decode_nosp_tgpr_dev93_tg_mbr
 189
 190 steps/decode_fromlats.sh --cmd "$decode_cmd" \
 191   data/test_dev93 data/lang_nosp_test_tgpr exp/tri2b/decode_nosp_tgpr_dev93 \
 192   exp/tri2a/decode_nosp_tgpr_dev93_fromlats || exit 1
 193
 194 # Align tri2b system with si84 data.
 195 steps/align_si.sh  --nj 10 --cmd "$train_cmd" \
 196   --use-graphs true data/train_si84 \
 197   data/lang_nosp exp/tri2b exp/tri2b_ali_si84  || exit 1;
 198
 199 local/run_mmi_tri2b.sh --lang-suffix "_nosp"
 200
 201 # From 2b system, train 3b which is LDA + MLLT + SAT.
 202 steps/train_sat.sh --cmd "$train_cmd" 2500 15000 \
 203   data/train_si84 data/lang_nosp exp/tri2b_ali_si84 exp/tri3b || exit 1;
 204 utils/mkgraph.sh data/lang_nosp_test_tgpr \
 205   exp/tri3b exp/tri3b/graph_nosp_tgpr || exit 1;
 206 steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
 207   exp/tri3b/graph_nosp_tgpr data/test_dev93 \
 208   exp/tri3b/decode_nosp_tgpr_dev93 || exit 1;
 209 steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
 210   exp/tri3b/graph_nosp_tgpr data/test_eval92 \
 211   exp/tri3b/decode_nosp_tgpr_eval92 || exit 1;
 212
 213 # At this point you could run the command below; this gets
 214 # results that demonstrate the basis-fMLLR adaptation (adaptation
 215 # on small amounts of adaptation data).
 216 local/run_basis_fmllr.sh --lang-suffix "_nosp"
 217
 218 steps/lmrescore.sh --cmd "$decode_cmd" \
 219   data/lang_nosp_test_tgpr data/lang_nosp_test_tg \
 220   data/test_dev93 exp/tri3b/decode_nosp_tgpr_dev93 \
 221   exp/tri3b/decode_nosp_tgpr_dev93_tg || exit 1;
 222 steps/lmrescore.sh --cmd "$decode_cmd" \
 223   data/lang_nosp_test_tgpr data/lang_nosp_test_tg \
 224   data/test_eval92 exp/tri3b/decode_nosp_tgpr_eval92 \
 225   exp/tri3b/decode_nosp_tgpr_eval92_tg || exit 1;
 226
 227 # Trying the larger dictionary ("big-dict"/bd) + locally produced LM.
 228 utils/mkgraph.sh data/lang_nosp_test_bd_tgpr \
 229   exp/tri3b exp/tri3b/graph_nosp_bd_tgpr || exit 1;
 230
 231 steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 8 \
 232   exp/tri3b/graph_nosp_bd_tgpr data/test_eval92 \
 233   exp/tri3b/decode_nosp_bd_tgpr_eval92 || exit 1;
 234 steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 \
 235   exp/tri3b/graph_nosp_bd_tgpr data/test_dev93 \
 236   exp/tri3b/decode_nosp_bd_tgpr_dev93 || exit 1;
 237
 238 # Example of rescoring with ConstArpaLm.
 239 steps/lmrescore_const_arpa.sh \
 240   --cmd "$decode_cmd" data/lang_nosp_test_bd_{tgpr,fgconst} \
 241   data/test_eval92 exp/tri3b/decode_nosp_bd_tgpr_eval92{,_fgconst} || exit 1;
 242
 243 steps/lmrescore.sh --cmd "$decode_cmd" \
 244   data/lang_nosp_test_bd_tgpr data/lang_nosp_test_bd_fg \
 245   data/test_eval92 exp/tri3b/decode_nosp_bd_tgpr_eval92 \
 246   exp/tri3b/decode_nosp_bd_tgpr_eval92_fg || exit 1;
 247 steps/lmrescore.sh --cmd "$decode_cmd" \
 248   data/lang_nosp_test_bd_tgpr data/lang_nosp_test_bd_tg \
 249   data/test_eval92 exp/tri3b/decode_nosp_bd_tgpr_eval92 \
 250   exp/tri3b/decode_nosp_bd_tgpr_eval92_tg || exit 1;
 251
 252 # The following two steps, which are a kind of side-branch, try mixing up
 253 ( # from the 3b system.  This is to demonstrate that script.
 254  steps/mixup.sh --cmd "$train_cmd" \
 255    20000 data/train_si84 data/lang_nosp exp/tri3b exp/tri3b_20k || exit 1;
 256  steps/decode_fmllr.sh --cmd "$decode_cmd" --nj 10 \
 257    exp/tri3b/graph_nosp_tgpr data/test_dev93 \
 258    exp/tri3b_20k/decode_nosp_tgpr_dev93  || exit 1;
 259 )
 260
 261 # From 3b system, align all si284 data.
 262 steps/align_fmllr.sh --nj 20 --cmd "$train_cmd" \
 263   data/train_si284 data/lang_nosp exp/tri3b exp/tri3b_ali_si284 || exit 1;
 264
 265
 266 # From 3b system, train another SAT system (tri4a) with all the si284 data.
 267
 268 steps/train_sat.sh  --cmd "$train_cmd" 4200 40000 \
 269   data/train_si284 data/lang_nosp exp/tri3b_ali_si284 exp/tri4a || exit 1;
 270 (
 271  utils/mkgraph.sh data/lang_nosp_test_tgpr \
 272    exp/tri4a exp/tri4a/graph_nosp_tgpr || exit 1;
 273  steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
 274    exp/tri4a/graph_nosp_tgpr data/test_dev93 \
 275    exp/tri4a/decode_nosp_tgpr_dev93 || exit 1;
 276  steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
 277    exp/tri4a/graph_nosp_tgpr data/test_eval92 \
 278    exp/tri4a/decode_nosp_tgpr_eval92 || exit 1;
 279 ) &
 280
 281
 282 # This step is just to demonstrate the train_quick.sh script, in which we
 283 # initialize the GMMs from the old system's GMMs.
 284 steps/train_quick.sh --cmd "$train_cmd" 4200 40000 \
 285   data/train_si284 data/lang_nosp exp/tri3b_ali_si284 exp/tri4b || exit 1;
 286
 287 (
 288  utils/mkgraph.sh data/lang_nosp_test_tgpr \
 289    exp/tri4b exp/tri4b/graph_nosp_tgpr || exit 1;
 290  steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
 291    exp/tri4b/graph_nosp_tgpr data/test_dev93 \
 292    exp/tri4b/decode_nosp_tgpr_dev93 || exit 1;
 293  steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
 294   exp/tri4b/graph_nosp_tgpr data/test_eval92 \
 295   exp/tri4b/decode_nosp_tgpr_eval92 || exit 1;
 296
 297  utils/mkgraph.sh data/lang_nosp_test_bd_tgpr \
 298    exp/tri4b exp/tri4b/graph_nosp_bd_tgpr || exit 1;
 299  steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
 300    exp/tri4b/graph_nosp_bd_tgpr data/test_dev93 \
 301    exp/tri4b/decode_nosp_bd_tgpr_dev93 || exit 1;
 302  steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
 303   exp/tri4b/graph_nosp_bd_tgpr data/test_eval92 \
 304   exp/tri4b/decode_nosp_bd_tgpr_eval92 || exit 1;
 305 ) &
 306
 307 # Silprob for normal lexicon.
 308 steps/get_prons.sh --cmd "$train_cmd" \
 309   data/train_si284 data/lang_nosp exp/tri4b || exit 1;
 310 utils/dict_dir_add_pronprobs.sh --max-normalize true \
 311   data/local/dict_nosp \
 312   exp/tri4b/pron_counts_nowb.txt exp/tri4b/sil_counts_nowb.txt \
 313   exp/tri4b/pron_bigram_counts_nowb.txt data/local/dict || exit 1
 314
 315 utils/prepare_lang.sh data/local/dict \
 316   "<SPOKEN_NOISE>" data/local/lang_tmp data/lang || exit 1;
 317
 318 for lm_suffix in bg bg_5k tg tg_5k tgpr tgpr_5k; do
 319   mkdir -p data/lang_test_${lm_suffix}
 320   cp -r data/lang/* data/lang_test_${lm_suffix}/ || exit 1;
 321   rm -rf data/lang_test_${lm_suffix}/tmp
 322   cp data/lang_nosp_test_${lm_suffix}/G.* data/lang_test_${lm_suffix}/
 323 done
 324
 325 # Silprob for larger lexicon.
 326 utils/dict_dir_add_pronprobs.sh --max-normalize true \
 327   data/local/dict_nosp_larger \
 328   exp/tri4b/pron_counts_nowb.txt exp/tri4b/sil_counts_nowb.txt \
 329   exp/tri4b/pron_bigram_counts_nowb.txt data/local/dict_larger || exit 1
 330
 331 utils/prepare_lang.sh data/local/dict_larger \
 332   "<SPOKEN_NOISE>" data/local/lang_tmp_larger data/lang_bd || exit 1;
 333
 334 for lm_suffix in tgpr tgconst tg fgpr fgconst fg; do
 335   mkdir -p data/lang_test_bd_${lm_suffix}
 336   cp -r data/lang_bd/* data/lang_test_bd_${lm_suffix}/ || exit 1;
 337   rm -rf data/lang_test_bd_${lm_suffix}/tmp
 338   cp data/lang_nosp_test_bd_${lm_suffix}/G.* data/lang_test_bd_${lm_suffix}/
 339 done
 340
 341 (
 342  utils/mkgraph.sh data/lang_test_tgpr exp/tri4b exp/tri4b/graph_tgpr || exit 1;
 343  steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
 344    exp/tri4b/graph_tgpr data/test_dev93 exp/tri4b/decode_tgpr_dev93 || exit 1;
 345  steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
 346   exp/tri4b/graph_tgpr data/test_eval92 exp/tri4b/decode_tgpr_eval92 || exit 1;
 347
 348  utils/mkgraph.sh data/lang_test_bd_tgpr \
 349    exp/tri4b exp/tri4b/graph_bd_tgpr || exit 1;
 350  steps/decode_fmllr.sh --nj 10 --cmd "$decode_cmd" \
 351    exp/tri4b/graph_bd_tgpr data/test_dev93 \
 352    exp/tri4b/decode_bd_tgpr_dev93 || exit 1;
 353  steps/decode_fmllr.sh --nj 8 --cmd "$decode_cmd" \
 354   exp/tri4b/graph_bd_tgpr data/test_eval92 \
 355   exp/tri4b/decode_bd_tgpr_eval92 || exit 1;
 356 ) &
 357
 358
 359 # Train and test MMI, and boosted MMI, on tri4b (LDA+MLLT+SAT on
 360 # all the data).  Use 30 jobs.
 361 steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \
 362   data/train_si284 data/lang exp/tri4b exp/tri4b_ali_si284 || exit 1;
 363
 364 # These demonstrate how to build a sytem usable for online-decoding with the nnet2 setup.
 365 # (see local/run_nnet2.sh for other, non-online nnet2 setups).
 366 local/online/run_nnet2.sh
 367 local/online/run_nnet2_baseline.sh
 368 local/online/run_nnet2_discriminative.sh
 369
 370 # Demonstration of RNNLM rescoring on TDNN models. We comment this out by
 371 # default.
 372 # local/run_rnnlms.sh
 373
 374 local/run_mmi_tri4b.sh
 375
 376 #local/run_nnet2.sh
 377
 378 ## Segregated some SGMM builds into a separate file.
 379 #local/run_sgmm.sh
 380
 381 # You probably want to run the sgmm2 recipe as it's generally a bit better:
 382 local/run_sgmm2.sh
 383
 384 # We demonstrate MAP adaptation of GMMs to gender-dependent systems here.  This also serves
 385 # as a generic way to demonstrate MAP adaptation to different domains.
 386 # local/run_gender_dep.sh
 387
 388 # You probably want to run the hybrid recipe as it is complementary:
 389 local/nnet/run_dnn.sh
 390
 391 # The following demonstrate how to re-segment long audios.
 392 # local/run_segmentation.sh
 393
 394 # The next two commands show how to train a bottleneck network based on the nnet2 setup,
 395 # and build an SGMM system on top of it.
 396 #local/run_bnf.sh
 397 #local/run_bnf_sgmm.sh
 398
 399
 400 # You probably want to try KL-HMM
 401 #local/run_kl_hmm.sh
 402
 403 # Getting results [see RESULTS file]
 404 # for x in exp/*/decode*; do [ -d $x ] && grep WER $x/wer_* | utils/best_wer.sh; done
 405
 406
 407 # KWS setup. We leave it commented out by default
 408
 409 # $duration is the length of the search collection, in seconds
 410 #duration=`feat-to-len scp:data/test_eval92/feats.scp  ark,t:- | awk '{x+=$2} END{print x/100;}'`
 411 #local/generate_example_kws.sh data/test_eval92/ data/kws/
 412 #local/kws_data_prep.sh data/lang_test_bd_tgpr/ data/test_eval92/ data/kws/
 413 #
 414 #steps/make_index.sh --cmd "$decode_cmd" --acwt 0.1 \
 415 #  data/kws/ data/lang_test_bd_tgpr/ \
 416 #  exp/tri4b/decode_bd_tgpr_eval92/ \
 417 #  exp/tri4b/decode_bd_tgpr_eval92/kws
 418 #
 419 #steps/search_index.sh --cmd "$decode_cmd" \
 420 #  data/kws \
 421 #  exp/tri4b/decode_bd_tgpr_eval92/kws
 422 #
 423 # If you want to provide the start time for each utterance, you can use the --segments
 424 # option. In WSJ each file is an utterance, so we don't have to set the start time.
 425 #cat exp/tri4b/decode_bd_tgpr_eval92/kws/result.* | \
 426 #  utils/write_kwslist.pl --flen=0.01 --duration=$duration \
 427 #  --normalize=true --map-utter=data/kws/utter_map \
 428 #  - exp/tri4b/decode_bd_tgpr_eval92/kws/kwslist.xml
 429
 430 # # forward-backward decoding example [way to speed up decoding by decoding forward
 431 # # and backward in time]
 432 # local/run_fwdbwd.sh
 433
 434 # # A couple of nnet3 recipes:
 435 # local/nnet3/run_tdnn_baseline.sh  # designed for exact comparison with nnet2 recipe
 436 # local/nnet3/run_tdnn.sh  # better absolute results