[egs] Fixed some issues in the multilingual BABEL example scripts (#1850)
[processor-sdk/kaldi.git] / egs / babel_multilang / s5 / local / nnet3 / run_decode_lang.sh
1 #!/bin/bash
3 # Copyright 2016 Pegah Ghahremani
5 # This script is used for decoding multilingual model and it is called in
6 # local/nnet3/run_tdnn_multilingual.sh script.
7 # This script needs decoding data dir, which is prepared using
8 # eg/babel/s5d scripts (i.e. run-4-anydecode.sh).
9 # If --use-pitch is true, pitch feature is added to high-resolution MFCC features.
10 # If --use-bnf option is true, the --bnf-nnet-dir option, nnet3 model for
11 # bottleneck feature extraction, should be provided.
13 set -e
14 set -o pipefail
17 dir=dev10h.pem
18 kind=
19 use_pitch=true
20 use_pitch_ivector=false # If true, pitch feature is used in ivector extraction.
21 use_ivector=false
22 decode_stage=-1
23 nnet3_affix=
24 feat_suffix=
25 ivector_suffix=
26 iter=final
27 nj=30
29 # params for extracting bn features
30 use_bnf=false # If true, bottleneck feature is extracted and appended to input
31               # for nnet3 model.
32 bnf_nnet_dir=exp/nnet3/multi_bnf_sp # dir for bottlneck nnet3 model
33                                     # (used for bottleneck feature extraction)
34 use_ivector_bnf=false # If true, ivector used in extracting bottleneck features.
36 . conf/common_vars.sh || exit 1;
38 . utils/parse_options.sh
40 if [ $# -ne 2 ]; then
41   echo "Usage: $(basename $0) --dir <dir-type> <lang> <multilingual-nnet3-dir>"
42   echo " e.g.: $(basename $0) --dir dev2h.pem ASM exp/nnet3/tdnn_multi_sp"
43   exit 1
44 fi
46 lang=$1
47 nnet3_dir=$2
49 langconf=conf/$lang/lang.conf
51 if [ ! -f $langconf ]; then
52   echo "$0: Language configuration $langconf does not exist! Use the "
53   echo "configurations in ../../babel/s5d/conf/lang/$lang-* as a startup." && exit 1
54 fi
55 . $langconf || exit 1;
56 [ -f local.conf ] && . local.conf;
58 mfcc=mfcc/$lang
59 data=data/$lang
60 vector_suffix=_gb
62 dataset_dir=$data/$dir
63 dataset_id=$dir
64 dataset_type=${dir%%.*}
66 #By default, we want the script to accept how the dataset should be handled,
67 #i.e. of  what kind is the dataset
68 if [ -z ${kind} ] ; then
69   if [ "$dataset_type" == "dev2h" ] || [ "$dataset_type" == "dev10h" ]; then
70     dataset_kind=supervised
71   else
72     dataset_kind=unsupervised
73   fi
74 else
75   dataset_kind=$kind
76 fi
78 dataset=$(basename $dataset_dir)
79 mfccdir=mfcc_hires/$lang
80 mfcc_affix=""
81 hires_config="--mfcc-config conf/mfcc_hires.conf"
82 nnet3_data_dir=${dataset_dir}_hires
83 feat_suffix=_hires
84 ivec_feat_suffix=_hires
85 log_dir=exp/$lang/make_hires/$dataset
87 if $use_pitch_ivector; then
88   ivec_feat_suffix=_hires_pitch
89 fi
91 if $use_pitch; then
92   mfcc_affix="_pitch_online"
93   hires_config="$hires_config --online-pitch-config conf/pitch.conf"
94   mfccdir=mfcc_hires_pitch/lang
95   nnet3_data_dir=${dataset_dir}_hires_pitch
96   feat_suffix="_hires_pitch"
97   log_dir=exp/$lang/make_hires_pitch/$dataset
98 fi
101 ####################################################################
102 ##
103 ##  Feature extraction for decoding
104 ##
105 ####################################################################
106 echo ---------------------------------------------------------------------
107 echo "Preparing ${dataset_kind} data files in ${dataset_dir} on" `date`
108 echo ---------------------------------------------------------------------
109 if [ ! -f  $dataset_dir/.done ] ; then
110   if [ ! -f ${nnet3_data_dir}/.mfcc.done ]; then
111     echo ---------------------------------------------------------------------
112     echo "Preparing ${dataset_kind} MFCC features in  ${nnet3_data_dir} and corresponding "
113     echo "iVectors in exp/$lang/nnet3${nnet3_affix}/ivectors_${dataset}${feat_suffix}${ivector_suffix} on" `date`
114     echo ---------------------------------------------------------------------
115     if [ ! -d ${nnet3_data_dir} ]; then
116       utils/copy_data_dir.sh $data/$dataset ${nnet3_data_dir}
117     fi
119     steps/make_mfcc${mfcc_affix}.sh --nj $nj $hires_config \
120         --cmd "$train_cmd" ${nnet3_data_dir} $log_dir $mfccdir;
121     steps/compute_cmvn_stats.sh ${nnet3_data_dir} $log_dir $mfccdir;
122     utils/fix_data_dir.sh ${nnet3_data_dir};
123     touch ${nnet3_data_dir}/.mfcc.done
124   fi
125   touch $dataset_dir/.done
126 fi
128 ivector_dir=exp/$lang/nnet3${nnet3_affix}/ivectors_${dataset}${ivec_feat_suffix}${ivector_suffix}
129 if $use_ivector && [ ! -f $ivector_dir/.ivector.done ];then
130   extractor=exp/multi/nnet3${nnet3_affix}/extractor
131   ivec_feat_suffix=$feat_suffix
132   if $use_pitch && ! $use_pitch_ivector; then
133     ivec_feat_suffix=_hires
134     featdir=${dataset_dir}${feat_suffix}
135     mfcc_only_dim=`feat-to-dim scp:$featdir/feats.scp - | awk '{print $1-3}'`
136     steps/select_feats.sh --cmd "$train_cmd" --nj $nj 0-$[$mfcc_only_dim-1] \
137       $featdir ${dataset_dir}${ivec_feat_suffix} || exit 1;
138     steps/compute_cmvn_stats.sh ${dataset_dir}${ivec_feat_suffix} || exit 1;
139   fi
141   steps/online/nnet2/extract_ivectors_online.sh --cmd "$train_cmd" --nj $nj \
142     ${dataset_dir}${ivec_feat_suffix} $extractor $ivector_dir || exit 1;
143   touch $ivector_dir/.ivector.done
144 fi
146 if $use_bnf; then
147   multi_ivector_dir=exp/$lang/nnet3${nnet3_affix}/ivectors_${dataset}${ivec_feat_suffix}${ivector_suffix}
149   ivector_for_bnf_opt=
150   if $use_ivector_bnf;then ivector_for_bnf_opt="--ivector-dir $multi_ivector_dir"; fi
152   bnf_data_dir=${dataset_dir}_bnf/$lang
153   if [ ! -f $bnf_data_dir/.done ]; then
154     steps/nnet3/make_bottleneck_features.sh --use-gpu true --nj 100 --cmd "$train_cmd" \
155       $ivector_for_bnf_opt tdnn_bn.renorm \
156       ${dataset_dir}${feat_suffix} $bnf_data_dir \
157       $bnf_nnet_dir bnf/$lang exp/$lang/make_${dataset}_bnf || exit 1;
158     touch $bnf_data_dir/.done
159   else
160     echo "$0: Skip Bottleneck feature extraction; You can force to run this step deleting $bnf_data_dir/.done."
161   fi
163   appended_bnf=${dataset_dir}${feat_suffix}_bnf
164   if [ ! -f $appended_bnf/.done ]; then
165     steps/append_feats.sh  --nj 16 --cmd "$train_cmd" \
166       $bnf_data_dir ${dataset_dir}${feat_suffix} \
167       ${dataset_dir}${feat_suffix}_bnf exp/$lang/append${feat_suffix}_bnf \
168       mfcc${feat_suffix}_bnf/$lang || exit 1;
170     steps/compute_cmvn_stats.sh $appended_bnf exp/$lang/make_cmvn${feat_suffix}_bnf \
171       mfcc${feat_suffix}_bnf/$lang || exit 1;
172     touch $appended_bnf/.done
173   fi
174   feat_suffix=${feat_suffix}_bnf
175 fi
177 ####################################################################
178 ##
179 ## nnet3 model decoding
180 ##
181 ####################################################################
182 if [ ! -f exp/$lang/tri5/graph/HCLG.fst ];then
183   utils/mkgraph.sh \
184     data/$lang/lang exp/$lang/tri5 exp/$lang/tri5/graph |tee exp/$lang/tri5/mkgraph.log
185 fi
187 if [ -f $nnet3_dir/$lang/final.mdl ]; then
188   decode=$nnet3_dir/$lang/decode_${dataset_id}
189   feat_suffix=_hires
190   ivec_feat_suffix=_hires
192   # suffix for using other features such as pitch
193   if $use_pitch; then
194     feat_suffix=${feat_suffix}_pitch
195   fi
196   if $use_pitch_ivector; then
197     ivec_feat_suffix=_hires_pitch
198   fi
199   if $use_bnf; then
200     feat_suffix=${feat_suffix}_bnf
201   fi
202   ivector_opts=
203   if $use_ivector; then
204     ivector_opts="--online-ivector-dir exp/$lang/nnet3${nnet3_affix}/ivectors_${dataset_id}${ivec_feat_suffix}${ivector_suffix}"
205   fi
206   if [ ! -f $decode/.done ]; then
207     mkdir -p $decode
208     score_opts="--skip-scoring false"
209     [ ! -z $iter ] && iter_opt="--iter $iter"
210     steps/nnet3/decode.sh --nj $nj --cmd "$decode_cmd" $iter_opt \
211           --stage $decode_stage \
212           --beam $dnn_beam --lattice-beam $dnn_lat_beam \
213           $score_opts $ivector_opts \
214           exp/$lang/tri5/graph ${dataset_dir}${feat_suffix} $decode | tee $decode/decode.log
216     touch $decode/.done
217   fi
218 fi
220 echo "Everything looking good...."
221 exit 0