summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: c4a7352)
raw | patch | inline | side by side (parent: c4a7352)
author | Hossein Hadian <hn.hadian@gmail.com> | |
Fri, 4 Aug 2017 23:38:02 +0000 (18:38 -0500) | ||
committer | Daniel Povey <dpovey@gmail.com> | |
Fri, 4 Aug 2017 23:38:02 +0000 (16:38 -0700) |
diff --git a/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh b/egs/aspire/s5/local/chain/tuning/run_blstm_7b.sh
index e54b5f4312811f8972872b84508d17a970125257..9f89799e292c4170220f41ee46e1c4a04acaf646 100755 (executable)
fi
if [ $stage -le 11 ]; then
- echo "$0: creating neural net configs";
+ echo "$0: creating neural net configs using the xconfig parser";
- steps/nnet3/lstm/make_configs.py \
- --feat-dir data/train_rvb_hires \
- --ivector-dir exp/nnet3/ivectors_train_min${min_seg_len} \
- --tree-dir $treedir \
- --splice-indexes="-2,-1,0,1,2 0 0" \
- --lstm-delay=" [-3,3] [-3,3] [-3,3] " \
- --xent-regularize 0.1 \
- --include-log-softmax false \
- --num-lstm-layers 3 \
- --cell-dim 1024 \
- --hidden-dim 1024 \
- --recurrent-projection-dim 256 \
- --non-recurrent-projection-dim 256 \
- --label-delay 0 \
- --self-repair-scale-nonlinearity 0.00001 \
- --self-repair-scale-clipgradient 1.0 \
- $dir/configs || exit 1;
+ num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')
+ [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
+ learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+ lstm_opts="decay-time=20"
+
+ mkdir -p $dir/configs
+ cat <<EOF > $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # the first splicing is moved before the lda layer, so no splicing here
+
+ # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
+ fast-lstmp-layer name=blstm1-forward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+ fast-lstmp-layer name=blstm1-backward input=lda cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts
+ fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+ fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts
+
+ fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=-3 $lstm_opts
+ fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward) cell-dim=1024 recurrent-projection-dim=256 non-recurrent-projection-dim=256 delay=3 $lstm_opts
+
+ ## adding the layers for chain branch
+ output-layer name=output input=Append(blstm3-forward, blstm3-backward) output-delay=0 include-log-softmax=false dim=$num_targets max-change=1.5
+
+ # adding the layers for xent branch
+ # This block prints the configs for a separate output that will be
+ # trained with a cross-entropy objective in the 'chain' models... this
+ # has the effect of regularizing the hidden parts of the model. we use
+ # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+ # 0.5 / args.xent_regularize is suitable as it means the xent
+ # final-layer learns at a rate independent of the regularization
+ # constant; and the 0.5 was tuned so as to make the relative progress
+ # similar in the xent and regular final layers.
+ output-layer name=output-xent input=Append(blstm3-forward, blstm3-backward) output-delay=0 dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
+
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi
if [ $stage -le 12 ]; then
--egs.chunk-width $chunk_width \
--egs.chunk-left-context $chunk_left_context \
--egs.chunk-right-context $chunk_right_context \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
--egs.dir "$common_egs_dir" \
--trainer.frames-per-iter 1500000 \
--trainer.num-epochs $num_epochs \
local/nnet3/prep_test_aspire.sh --stage 4 --decode-num-jobs 30 --affix "v7" \
--extra-left-context $extra_left_context \
--extra-right-context $extra_right_context \
+ --extra-left-context-initial 0 \
+ --extra-right-context-final 0 \
--frames-per-chunk $chunk_width \
--acwt 1.0 --post-decode-acwt 10.0 \
--window 10 --overlap 5 \
diff --git a/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh b/egs/aspire/s5/local/chain/tuning/run_tdnn_7b.sh
index 4bddb3e5955cfb153786ff55e52e26b561c7a834..42104c577da2364df218ceb833f2c5b419ac1bb0 100755 (executable)
fi
if [ $stage -le 11 ]; then
- echo "$0: creating neural net configs";
+ echo "$0: creating neural net configs using the xconfig parser";
+
+ num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
+ learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
+
+ mkdir -p $dir/configs
+ cat <<EOF > $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # the first splicing is moved before the lda layer, so no splicing here
+ relu-batchnorm-layer name=tdnn1 dim=1024
+ relu-batchnorm-layer name=tdnn2 input=Append(-1,0,1,2) dim=1024
+ relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=1024
+ relu-batchnorm-layer name=tdnn4 input=Append(-3,0,3) dim=1024
+ relu-batchnorm-layer name=tdnn5 input=Append(-3,0,3) dim=1024
+ relu-batchnorm-layer name=tdnn6 input=Append(-6,-3,0) dim=1024
+
+ ## adding the layers for chain branch
+ relu-batchnorm-layer name=prefinal-chain input=tdnn6 dim=1024 target-rms=0.5
+ output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5
+
+ # adding the layers for xent branch
+ # This block prints the configs for a separate output that will be
+ # trained with a cross-entropy objective in the 'chain' models... this
+ # has the effect of regularizing the hidden parts of the model. we use
+ # 0.5 / args.xent_regularize as the learning rate factor- the factor of
+ # 0.5 / args.xent_regularize is suitable as it means the xent
+ # final-layer learns at a rate independent of the regularization
+ # constant; and the 0.5 was tuned so as to make the relative progress
+ # similar in the xent and regular final layers.
+ relu-batchnorm-layer name=prefinal-xent input=tdnn6 dim=1024 target-rms=0.5
+ output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5
- # create the config files for nnet initialization
- steps/nnet3/tdnn/make_configs.py \
- --self-repair-scale-nonlinearity 0.00001 \
- --feat-dir data/train_rvb_hires \
- --ivector-dir exp/nnet3/ivectors_train_min${min_seg_len} \
- --tree-dir $treedir \
- --relu-dim 1024 \
- --splice-indexes "-1,0,1 -1,0,1,2 -3,0,3 -3,0,3 -3,0,3 -6,-3,0 0" \
- --use-presoftmax-prior-scale false \
- --xent-regularize 0.1 \
- --xent-separate-forward-affine true \
- --include-log-softmax false \
- --final-layer-normalize-target 0.5 \
- $dir/configs || exit 1;
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi
if [ $stage -le 12 ]; then
diff --git a/egs/aspire/s5/local/nnet3/prep_test_aspire.sh b/egs/aspire/s5/local/nnet3/prep_test_aspire.sh
index bc4559183b5af48cb6073df8633483a27c9a332b..cb69aaff10b597c0c107a45f8d17bf51075502f0 100755 (executable)
frames_per_chunk=50 # change for (B)LSTM
acwt=0.1 # important to change this when using chain models
post_decode_acwt=1.0 # important to change this when using chain models
+extra_left_context_initial=-1
+extra_right_context_final=-1
. ./cmd.sh
[ -f ./path.sh ] && . ./path.sh
--acwt $acwt --post-decode-acwt $post_decode_acwt \
--extra-left-context $extra_left_context \
--extra-right-context $extra_right_context \
+ --extra-left-context-initial $extra_left_context_initial \
+ --extra-right-context-final $extra_right_context_final \
--frames-per-chunk "$frames_per_chunk" \
--online-ivector-dir $ivector_dir/ivectors_${segmented_data_set}${ivector_affix}${ivector_scale_affix}_stage1 \
--skip-scoring true --iter $iter \
index 71b515aa1126b319e0ff07c5e5a33478e812ad11..8fe53fa4db179c1affac21d7c09a8c6f6cdc00e5 100755 (executable)
#!/bin/bash
-# this is a script to train the nnet3 blstm acoustic model
-# it is based on blstm used in fisher_swbd recipe
+# based on egs/fisher_swbd/s5/local/nnet3/run_lstm.sh
-stage=7 # assuming you already ran the TDNN system ; local/nnet3/run_tdnn.sh
-affix=bidirectional
+stage=7
train_stage=-10
-egs_stage=0
+egs_stage=
+affix=
common_egs_dir=
+reporting_email=
+
+# LSTM options
+label_delay=0
+cell_dim=1024
+hidden_dim=1024
+recurrent_projection_dim=128
+non_recurrent_projection_dim=128
+chunk_width=20
+chunk_left_context=40
+chunk_right_context=40
+
+
+# training options
+num_epochs=6
+initial_effective_lrate=0.0003
+final_effective_lrate=0.00003
+num_jobs_initial=4
+num_jobs_final=22
+momentum=0.5
+num_chunk_per_minibatch=100
+samples_per_iter=20000
remove_egs=true
+#decode options
+extra_left_context=50
+extra_right_context=50
+
+# End configuration section.
+
+echo "$0 $@" # Print the command line for logging
+
. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh
-local/nnet3/run_lstm.sh --stage $stage --train-stage $train_stage --egs-stage "$egs_stage" \
- --affix $affix --lstm-delay " [-1,1] [-2,2] [-3,3] " --label-delay 0 \
- --cell-dim 1024 --recurrent-projection-dim 128 --non-recurrent-projection-dim 128 \
- --chunk-left-context 40 --chunk-right-context 40 \
- --extra-left-context 50 --extra-right-context 50 \
- --common-egs-dir "$common_egs_dir" --remove-egs "$remove_egs"
+if ! cuda-compiled; then
+ cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.
+EOF
+fi
+
+if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
+ cmd_opts=" --config conf/queue_only_k80.conf --only-k80 false"
+fi
+
+
+dir=exp/nnet3/lstm_bidirectional
+dir=$dir${affix:+_$affix}
+if [ $label_delay -gt 0 ]; then dir=${dir}_ld$label_delay; fi
+ali_dir=exp/tri5a_rvb_ali
+
+local/nnet3/run_ivector_common.sh --stage $stage || exit 1;
+if [ $stage -le 7 ]; then
+ num_targets=$(tree-info $ali_dir/tree | grep num-pdfs | awk '{print $2}')
+ [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
+
+ lstm_opts="decay-time=20 cell-dim=$cell_dim"
+ lstm_opts+=" recurrent-projection-dim=$recurrent_projection_dim"
+ lstm_opts+=" non-recurrent-projection-dim=$non_recurrent_projection_dim"
+
+ mkdir -p $dir/configs
+ cat <<EOF > $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
+ fast-lstmp-layer name=blstm1-forward input=lda delay=-1 $lstm_opts
+ fast-lstmp-layer name=blstm1-backward input=lda delay=1 $lstm_opts
+
+ fast-lstmp-layer name=blstm2-forward input=Append(blstm1-forward, blstm1-backward) delay=-2 $lstm_opts
+ fast-lstmp-layer name=blstm2-backward input=Append(blstm1-forward, blstm1-backward) delay=2 $lstm_opts
+
+ fast-lstmp-layer name=blstm3-forward input=Append(blstm2-forward, blstm2-backward) delay=-3 $lstm_opts
+ fast-lstmp-layer name=blstm3-backward input=Append(blstm2-forward, blstm2-backward delay=3 $lstm_opts
+
+ output-layer name=output output-delay=$label_delay dim=$num_targets max-change=1.5
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs || exit 1
+fi
+
+if [ $stage -le 8 ]; then
+ if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
+ utils/create_split_dir.pl \
+ /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
+ fi
+
+ steps/nnet3/train_rnn.py --stage=$train_stage \
+ --cmd="$decode_cmd $cmd_opts" \
+ --feat.online-ivector-dir=exp/nnet3/ivectors_train \
+ --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
+ --trainer.num-epochs=$num_epochs \
+ --trainer.samples-per-iter=$samples_per_iter \
+ --trainer.optimization.num-jobs-initial=$num_jobs_initial \
+ --trainer.optimization.num-jobs-final=$num_jobs_final \
+ --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \
+ --trainer.optimization.final-effective-lrate=$final_effective_lrate \
+ --trainer.optimization.shrink-value 0.99 \
+ --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \
+ --trainer.optimization.momentum=$momentum \
+ --egs.opts " --nj 12 " \
+ --egs.chunk-width=$chunk_width \
+ --egs.chunk-left-context=$chunk_left_context \
+ --egs.chunk-right-context=$chunk_right_context \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
+ --egs.dir="$common_egs_dir" \
+ --egs.stage "$egs_stage" \
+ --cleanup.remove-egs=$remove_egs \
+ --cleanup.preserve-model-interval=100 \
+ --use-gpu=true \
+ --feat-dir=data/train_rvb_hires \
+ --ali-dir=$ali_dir \
+ --lang=data/lang \
+ --reporting.email="$reporting_email" \
+ --dir=$dir || exit 1;
+fi
#ASpIRE decodes
+if [ $stage -le 14 ]; then
local/nnet3/prep_test_aspire.sh --stage 1 --decode-num-jobs 30 --affix "v7" \
--extra-left-context 40 --extra-right-context 40 --frames-per-chunk 20 \
+ --extra-left-context-initial 0 \
+ --extra-right-context-final 0 \
--sub-speaker-frames 6000 --window 10 --overlap 5 --max-count 75 --pass2-decode-opts "--min-active 1000" \
--ivector-scale 0.75 dev_aspire data/lang exp/tri5a/graph_pp exp/nnet3/lstm_bidirectional
-
+fi
exit 0;
# final result
diff --git a/egs/aspire/s5/local/nnet3/run_lstm.sh b/egs/aspire/s5/local/nnet3/run_lstm.sh
+++ /dev/null
@@ -1,132 +0,0 @@
-#!/bin/bash
-
-
-# this is a basic lstm script
-# it is also used to run blstm experiments, so it uses a lot more variables
-# than a typical top level script.
-
-# based on egs/fisher_swbd/s5/local/nnet3/run_lstm.sh
-
-stage=7
-train_stage=-10
-egs_stage=
-affix=
-common_egs_dir=
-reporting_email=
-
-# LSTM options
-splice_indexes="-2,-1,0,1,2 0 0"
-lstm_delay=" -1 -2 -3 "
-label_delay=5
-num_lstm_layers=3
-cell_dim=1024
-hidden_dim=1024
-recurrent_projection_dim=256
-non_recurrent_projection_dim=256
-chunk_width=20
-chunk_left_context=40
-chunk_right_context=0
-
-
-# training options
-num_epochs=6
-initial_effective_lrate=0.0003
-final_effective_lrate=0.00003
-num_jobs_initial=4
-num_jobs_final=22
-momentum=0.5
-num_chunk_per_minibatch=100
-samples_per_iter=20000
-remove_egs=true
-
-#decode options
-extra_left_context=
-extra_right_context=
-frames_per_chunk=
-
-# End configuration section.
-
-echo "$0 $@" # Print the command line for logging
-
-. ./cmd.sh
-. ./path.sh
-. ./utils/parse_options.sh
-
-if ! cuda-compiled; then
- cat <<EOF && exit 1
-This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
-If you want to use GPUs (and have them), go to src/, and configure and make on a machine
-where "nvcc" is installed.
-EOF
-fi
-
-if [[ $(hostname -f) == *.clsp.jhu.edu ]]; then
- cmd_opts=" --config conf/queue_only_k80.conf --only-k80 false"
-fi
-
-
-dir=exp/nnet3/lstm
-dir=$dir${affix:+_$affix}
-if [ $label_delay -gt 0 ]; then dir=${dir}_ld$label_delay; fi
-ali_dir=exp/tri5a_rvb_ali
-
-local/nnet3/run_ivector_common.sh --stage $stage || exit 1;
-
-if [ $stage -le 7 ]; then
- echo "$0: creating neural net configs";
- config_extra_opts=()
- [ ! -z "$lstm_delay" ] && config_extra_opts+=(--lstm-delay "$lstm_delay")
- steps/nnet3/lstm/make_configs.py "${config_extra_opts[@]}" \
- --feat-dir data/train_rvb_hires \
- --ivector-dir exp/nnet3/ivectors_train \
- --ali-dir $ali_dir \
- --num-lstm-layers $num_lstm_layers \
- --splice-indexes "$splice_indexes " \
- --cell-dim $cell_dim \
- --hidden-dim $hidden_dim \
- --recurrent-projection-dim $recurrent_projection_dim \
- --non-recurrent-projection-dim $non_recurrent_projection_dim \
- --label-delay $label_delay \
- --self-repair-scale-nonlinearity 0.00001 \
- --self-repair-scale-clipgradient 1.0 \
- $dir/configs || exit 1;
-
-fi
-
-if [ $stage -le 8 ]; then
- if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
- utils/create_split_dir.pl \
- /export/b0{3,4,5,6}/$USER/kaldi-data/egs/aspire-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
- fi
-
- steps/nnet3/train_rnn.py --stage=$train_stage \
- --cmd="$decode_cmd $cmd_opts" \
- --feat.online-ivector-dir=exp/nnet3/ivectors_train \
- --feat.cmvn-opts="--norm-means=false --norm-vars=false" \
- --trainer.num-epochs=$num_epochs \
- --trainer.samples-per-iter=$samples_per_iter \
- --trainer.optimization.num-jobs-initial=$num_jobs_initial \
- --trainer.optimization.num-jobs-final=$num_jobs_final \
- --trainer.optimization.initial-effective-lrate=$initial_effective_lrate \
- --trainer.optimization.final-effective-lrate=$final_effective_lrate \
- --trainer.optimization.shrink-value 0.99 \
- --trainer.rnn.num-chunk-per-minibatch=$num_chunk_per_minibatch \
- --trainer.optimization.momentum=$momentum \
- --egs.opts " --nj 12 " \
- --egs.chunk-width=$chunk_width \
- --egs.chunk-left-context=$chunk_left_context \
- --egs.chunk-right-context=$chunk_right_context \
- --egs.dir="$common_egs_dir" \
- --egs.stage "$egs_stage" \
- --cleanup.remove-egs=$remove_egs \
- --cleanup.preserve-model-interval=100 \
- --use-gpu=true \
- --feat-dir=data/train_rvb_hires \
- --ali-dir=$ali_dir \
- --lang=data/lang \
- --reporting.email="$reporting_email" \
- --dir=$dir || exit 1;
-fi
-
-wait;
-exit 0;
index f0d66371b112d345c62bdbc2a61354617962b818..6dffe45e04f4a2720c572b9894592fba9c668adb 100755 (executable)
dir=$dir${affix:+_$affix}
if [ $stage -le 7 ]; then
- echo "$0: creating neural net configs";
-
- # create the config files for nnet initialization
- python steps/nnet3/tdnn/make_configs.py \
- --feat-dir data/train_rvb_hires \
- --ivector-dir exp/nnet3/ivectors_train \
- --ali-dir $ali_dir \
- --relu-dim 1248 \
- --splice-indexes "-2,-1,0,1,2 -1,2 -3,3 -3,3 -7,2 0" \
- --use-presoftmax-prior-scale true \
- $dir/configs || exit 1;
+ echo "$0: creating neural net configs using the xconfig parser";
+ num_targets=$(tree-info $ali_dir/tree | grep num-pdfs | awk '{print $2}')
+
+ mkdir -p $dir/configs
+ cat <<EOF > $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # the first splicing is moved before the lda layer, so no splicing here
+ relu-renorm-layer name=tdnn1 dim=1248
+ relu-renorm-layer name=tdnn2 dim=1248 input=Append(-1,2)
+ relu-renorm-layer name=tdnn3 dim=1248 input=Append(-3,3)
+ relu-renorm-layer name=tdnn4 dim=1248 input=Append(-3,3)
+ relu-renorm-layer name=tdnn5 dim=1248 input=Append(-7,2)
+ relu-renorm-layer name=tdnn6 dim=1248
+ output-layer name=output dim=$num_targets max-change=1.5 presoftmax-scale-file=$dir/configs/presoftmax_prior_scale.vec
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi
if [ $stage -le 8 ]; then
diff --git a/egs/tedlium/s5_r2/local/nnet3/tuning/run_lstm_1a.sh b/egs/tedlium/s5_r2/local/nnet3/tuning/run_lstm_1a.sh
index 5fbeb79991ce02f0ca8af4513ff6a70925ff8580..0f8dc6304f06d3c2d09bb2d48547711786ab32c9 100755 (executable)
# LSTM options
train_stage=-10
-splice_indexes="-2,-1,0,1,2 0 0"
-lstm_delay=" -1 -2 -3 "
label_delay=5
-num_lstm_layers=3
cell_dim=1024
-hidden_dim=1024
recurrent_projection_dim=256
non_recurrent_projection_dim=256
chunk_width=20
num_jobs_initial=3
num_jobs_final=15
momentum=0.5
-num_chunk_per_minibatch=100
+num_chunk_per_minibatch=128,64
samples_per_iter=20000
remove_egs=true
done
if [ $stage -le 12 ]; then
- echo "$0: creating neural net configs"
- config_extra_opts=()
- [ ! -z "$lstm_delay" ] && config_extra_opts+=(--lstm-delay "$lstm_delay")
- steps/nnet3/lstm/make_configs.py "${config_extra_opts[@]}" \
- --feat-dir $train_data_dir \
- --ivector-dir $train_ivector_dir \
- --ali-dir $ali_dir \
- --num-lstm-layers $num_lstm_layers \
- --splice-indexes "$splice_indexes " \
- --cell-dim $cell_dim \
- --hidden-dim $hidden_dim \
- --recurrent-projection-dim $recurrent_projection_dim \
- --non-recurrent-projection-dim $non_recurrent_projection_dim \
- --label-delay $label_delay \
- --self-repair-scale-nonlinearity 0.00001 \
- $dir/configs || exit 1;
+ echo "$0: creating neural net configs using the xconfig parser";
+
+ num_targets=$(tree-info $gmm_dir/tree | grep num-pdfs | awk '{print $2}')
+ [ -z $num_targets ] && { echo "$0: error getting num-targets"; exit 1; }
+
+ lstm_opts="decay-time=20 cell-dim=$cell_dim"
+ lstm_opts+=" recurrent-projection-dim=$recurrent_projection_dim"
+ lstm_opts+=" non-recurrent-projection-dim=$non_recurrent_projection_dim"
+
+ mkdir -p $dir/configs
+ cat <<EOF > $dir/configs/network.xconfig
+ input dim=100 name=ivector
+ input dim=40 name=input
+
+ # please note that it is important to have input layer with the name=input
+ # as the layer immediately preceding the fixed-affine-layer to enable
+ # the use of short notation for the descriptor
+ fixed-affine-layer name=lda delay=$label_delay input=Append(-2,-1,0,1,2,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
+
+ # check steps/libs/nnet3/xconfig/lstm.py for the other options and defaults
+ fast-lstmp-layer name=fastlstm1 delay=-1 $lstm_opts
+ fast-lstmp-layer name=fastlstm2 delay=-2 $lstm_opts
+ fast-lstmp-layer name=fastlstm3 delay=-3 $lstm_opts
+ output-layer name=output output-delay=$label_delay dim=$num_targets max-change=1.5
+EOF
+ steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs || exit 1
fi
if [ $stage -le 13 ]; then
--egs.chunk-width=$chunk_width \
--egs.chunk-left-context=$chunk_left_context \
--egs.chunk-right-context=$chunk_right_context \
+ --egs.chunk-left-context-initial=0 \
+ --egs.chunk-right-context-final=0 \
--egs.dir="$common_egs_dir" \
--cleanup.remove-egs=$remove_egs \
--cleanup.preserve-model-interval=1 \
steps/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" --num-threads 4 \
--extra-left-context $extra_left_context \
--extra-right-context $extra_right_context \
+ --extra-left-context-initial 0 \
+ --extra-right-context-final 0 \
--online-ivector-dir exp/nnet3${nnet3_affix}/ivectors_${dset}_hires \
${graph_dir} data/${dset}_hires ${dir}/decode_${dset} || exit 1
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" data/lang data/lang_rescore \