egs/sre16/v1/run.sh

   1 #!/bin/bash
   2 # Copyright      2017   David Snyder
   3 #                2017   Johns Hopkins University (Author: Daniel Garcia-Romero)
   4 #                2017   Johns Hopkins University (Author: Daniel Povey)
   5 # Apache 2.0.
   6 #
   7 # See README.txt for more info on data required.
   8 # Results (mostly EERs) are inline in comments below.
   9 #
  10 # This example demonstrates a "bare bones" NIST SRE 2016 recipe using ivectors.
  11 # In the future, we will add score-normalization and a more effective form of
  12 # PLDA domain adaptation.
  13
  14 . cmd.sh
  15 . path.sh
  16 set -e
  17 mfccdir=`pwd`/mfcc
  18 vaddir=`pwd`/mfcc
  19
  20 # SRE16 trials
  21 sre16_trials=data/sre16_eval_test/trials
  22 sre16_trials_tgl=data/sre16_eval_test/trials_tgl
  23 sre16_trials_yue=data/sre16_eval_test/trials_yue
  24
  25 stage=0
  26 if [ $stage -le 0 ]; then
  27   # Path to some, but not all of the training corpora
  28   data_root=/export/corpora/LDC
  29
  30   # Prepare telephone and microphone speech from Mixer6.
  31   local/make_mx6.sh $data_root/LDC2013S03 data/
  32
  33   # Prepare SRE10 test and enroll. Includes microphone interview speech.
  34   # NOTE: This corpus is now available through the LDC as LDC2017S06.
  35   local/make_sre10.pl /export/corpora5/SRE/SRE2010/eval/ data/
  36
  37   # Prepare SRE08 test and enroll. Includes some microphone speech.
  38   local/make_sre08.pl $data_root/LDC2011S08 $data_root/LDC2011S05 data/
  39
  40   # This prepares the older NIST SREs from 2004-2006.
  41   local/make_sre.sh $data_root data/
  42
  43   # Combine all SREs prior to 2016 and Mixer6 into one dataset
  44   utils/combine_data.sh data/sre \
  45     data/sre2004 data/sre2005_train \
  46     data/sre2005_test data/sre2006_train \
  47     data/sre2006_test_1 data/sre2006_test_2 \
  48     data/sre08 data/mx6 data/sre10
  49   utils/validate_data_dir.sh --no-text --no-feats data/sre
  50   utils/fix_data_dir.sh data/sre
  51
  52   # Prepare SWBD corpora.
  53   local/make_swbd_cellular1.pl $data_root/LDC2001S13 \
  54     data/swbd_cellular1_train
  55   local/make_swbd_cellular2.pl /export/corpora5/LDC/LDC2004S07 \
  56     data/swbd_cellular2_train
  57   local/make_swbd2_phase1.pl $data_root/LDC98S75 \
  58     data/swbd2_phase1_train
  59   local/make_swbd2_phase2.pl /export/corpora5/LDC/LDC99S79 \
  60     data/swbd2_phase2_train
  61   local/make_swbd2_phase3.pl /export/corpora5/LDC/LDC2002S06 \
  62     data/swbd2_phase3_train
  63
  64   # Combine all SWB corpora into one dataset.
  65   utils/combine_data.sh data/swbd \
  66     data/swbd_cellular1_train data/swbd_cellular2_train \
  67     data/swbd2_phase1_train data/swbd2_phase2_train data/swbd2_phase3_train
  68
  69   # Prepare NIST SRE 2016 evaluation data.
  70   local/make_sre16_eval.pl /export/corpora5/SRE/R149_0_1 data
  71
  72   # Prepare unlabeled Cantonese and Tagalog development data. This dataset
  73   # was distributed to SRE participants.
  74   local/make_sre16_unlabeled.pl /export/corpora5/SRE/LDC2016E46_SRE16_Call_My_Net_Training_Data data
  75 fi
  76
  77 if [ $stage -le 1 ]; then
  78   # Make MFCCs and compute the energy-based VAD for each dataset
  79   for name in sre swbd sre16_eval_enroll sre16_eval_test sre16_major; do
  80     steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
  81       data/${name} exp/make_mfcc $mfccdir
  82     utils/fix_data_dir.sh data/${name}
  83     sid/compute_vad_decision.sh --nj 40 --cmd "$train_cmd" \
  84       data/${name} exp/make_vad $vaddir
  85     utils/fix_data_dir.sh data/${name}
  86   done
  87 fi
  88
  89 if [ $stage -le 2 ]; then
  90   # Train the UBM.
  91   sid/train_diag_ubm.sh --cmd "$train_cmd --mem 20G" \
  92     --nj 40 --num-threads 8  --subsample 1 \
  93     data/sre16_major 2048 \
  94     exp/diag_ubm
  95
  96   sid/train_full_ubm.sh --cmd "$train_cmd --mem 25G" \
  97     --nj 40 --remove-low-count-gaussians false --subsample 1 \
  98     data/sre16_major \
  99     exp/diag_ubm exp/full_ubm
 100 fi
 101
 102 if [ $stage -le 3 ]; then
 103   # Train the i-vector extractor.
 104   utils/combine_data.sh data/swbd_sre data/swbd data/sre
 105   sid/train_ivector_extractor.sh --cmd "$train_cmd --mem 35G" \
 106     --ivector-dim 600 \
 107     --num-iters 5 \
 108     exp/full_ubm/final.ubm data/swbd_sre \
 109     exp/extractor
 110 fi
 111
 112 # In this section, we augment the SRE data with reverberation,
 113 # noise, music, and babble, and combined it with the clean SRE
 114 # data.  The combined list will be used to train the PLDA model.
 115 if [ $stage -le 4 ]; then
 116   utils/data/get_utt2num_frames.sh --nj 40 --cmd "$train_cmd" data/sre
 117   frame_shift=0.01
 118   awk -v frame_shift=$frame_shift '{print $1, $2*frame_shift;}' data/sre/utt2num_frames > data/sre/reco2dur
 119
 120   if [ ! -d "RIRS_NOISES" ]; then
 121     # Download the package that includes the real RIRs, simulated RIRs, isotropic noises and point-source noises
 122     wget --no-check-certificate http://www.openslr.org/resources/28/rirs_noises.zip
 123     unzip rirs_noises.zip
 124   fi
 125
 126   # Make a version with reverberated speech
 127   rvb_opts=()
 128   rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/smallroom/rir_list")
 129   rvb_opts+=(--rir-set-parameters "0.5, RIRS_NOISES/simulated_rirs/mediumroom/rir_list")
 130
 131   # Make a reverberated version of the SRE list.  Note that we don't add any
 132   # additive noise here.
 133   python steps/data/reverberate_data_dir.py \
 134     "${rvb_opts[@]}" \
 135     --speech-rvb-probability 1 \
 136     --pointsource-noise-addition-probability 0 \
 137     --isotropic-noise-addition-probability 0 \
 138     --num-replications 1 \
 139     --source-sampling-rate 8000 \
 140     data/sre data/sre_reverb
 141   cp data/sre/vad.scp data/sre_reverb/
 142   utils/copy_data_dir.sh --utt-suffix "-reverb" data/sre_reverb data/sre_reverb.new
 143   rm -rf data/sre_reverb
 144   mv data/sre_reverb.new data/sre_reverb
 145
 146   # Prepare the MUSAN corpus, which consists of music, speech, and noise
 147   # suitable for augmentation.
 148   local/make_musan.sh /export/corpora/JHU/musan data
 149
 150   # Get the duration of the MUSAN recordings.  This will be used by the
 151   # script augment_data_dir.py.
 152   for name in speech noise music; do
 153     utils/data/get_utt2dur.sh data/musan_${name}
 154     mv data/musan_${name}/utt2dur data/musan_${name}/reco2dur
 155   done
 156
 157   # Augment with musan_noise
 158   python steps/data/augment_data_dir.py --utt-suffix "noise" --fg-interval 1 --fg-snrs "15:10:5:0" --fg-noise-dir "data/musan_noise" data/sre data/sre_noise
 159   # Augment with musan_music
 160   python steps/data/augment_data_dir.py --utt-suffix "music" --bg-snrs "15:10:8:5" --num-bg-noises "1" --bg-noise-dir "data/musan_music" data/sre data/sre_music
 161   # Augment with musan_speech
 162   python steps/data/augment_data_dir.py --utt-suffix "babble" --bg-snrs "20:17:15:13" --num-bg-noises "3:4:5:6:7" --bg-noise-dir "data/musan_speech" data/sre data/sre_babble
 163
 164   # Combine reverb, noise, music, and babble into one directory.
 165   utils/combine_data.sh data/sre_aug data/sre_reverb data/sre_noise data/sre_music data/sre_babble
 166
 167   # Take a random subset of the augmentations (64k is roughly the size of the SRE dataset)
 168   utils/subset_data_dir.sh data/sre_aug 64000 data/sre_aug_64k
 169   utils/fix_data_dir.sh data/sre_aug_64k
 170
 171   # Make MFCCs for the augmented data.  Note that we want we should alreay have the vad.scp
 172   # from the clean version at this point, which is identical to the clean version!
 173   steps/make_mfcc.sh --mfcc-config conf/mfcc.conf --nj 40 --cmd "$train_cmd" \
 174     data/sre_aug_64k exp/make_mfcc $mfccdir
 175
 176   # Combine the clean and augmented SRE list.  This is now roughly
 177   # double the size of the original clean list.
 178   utils/combine_data.sh data/sre_combined data/sre_aug_64k data/sre
 179 fi
 180
 181 if [ $stage -le 5 ]; then
 182   # Extract i-vectors for SRE data (includes Mixer 6). We'll use this for
 183   # things like LDA or PLDA.
 184   sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 40 \
 185     exp/extractor data/sre_combined \
 186     exp/ivectors_sre_combined
 187
 188   # The SRE16 major is an unlabeled dataset consisting of Cantonese and
 189   # and Tagalog.  This is useful for things like centering, whitening and
 190   # score normalization.
 191   sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 40 \
 192     exp/extractor data/sre16_major \
 193     exp/ivectors_sre16_major
 194
 195   # The SRE16 test data
 196   sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 40 \
 197     exp/extractor data/sre16_eval_test \
 198     exp/ivectors_sre16_eval_test
 199
 200   # The SRE16 enroll data
 201   sid/extract_ivectors.sh --cmd "$train_cmd --mem 6G" --nj 40 \
 202     exp/extractor data/sre16_eval_enroll \
 203     exp/ivectors_sre16_eval_enroll
 204 fi
 205
 206 if [ $stage -le 6 ]; then
 207   # Compute the mean vector for centering the evaluation i-vectors.
 208   $train_cmd exp/ivectors_sre16_major/log/compute_mean.log \
 209     ivector-mean scp:exp/ivectors_sre16_major/ivector.scp \
 210     exp/ivectors_sre16_major/mean.vec || exit 1;
 211
 212   # This script uses LDA to decrease the dimensionality prior to PLDA.
 213   lda_dim=200
 214   $train_cmd exp/ivectors_sre_combined/log/lda.log \
 215     ivector-compute-lda --total-covariance-factor=0.0 --dim=$lda_dim \
 216     "ark:ivector-subtract-global-mean scp:exp/ivectors_sre_combined/ivector.scp ark:- |" \
 217     ark:data/sre_combined/utt2spk exp/ivectors_sre_combined/transform.mat || exit 1;
 218
 219   #  Train the PLDA model.
 220   $train_cmd exp/ivectors_sre_combined/log/plda.log \
 221     ivector-compute-plda ark:data/sre_combined/spk2utt \
 222     "ark:ivector-subtract-global-mean scp:exp/ivectors_sre_combined/ivector.scp ark:- | transform-vec exp/ivectors_sre_combined/transform.mat ark:- ark:- | ivector-normalize-length ark:-  ark:- |" \
 223     exp/ivectors_sre_combined/plda || exit 1;
 224
 225   # Here we adapt the out-of-domain PLDA model to SRE16 major, a pile
 226   # of unlabeled in-domain data.  In the future, we will include a clustering
 227   # based approach for domain adaptation.
 228   $train_cmd exp/ivectors_sre16_major/log/plda_adapt.log \
 229     ivector-adapt-plda --within-covar-scale=0.75 --between-covar-scale=0.25 \
 230     exp/ivectors_sre_combined/plda \
 231     "ark:ivector-subtract-global-mean scp:exp/ivectors_sre16_major/ivector.scp ark:- | transform-vec exp/ivectors_sre_combined/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
 232     exp/ivectors_sre16_major/plda_adapt || exit 1;
 233 fi
 234
 235 if [ $stage -le 7 ]; then
 236   # Get results using the out-of-domain PLDA model
 237   $train_cmd exp/scores/log/sre16_eval_scoring.log \
 238     ivector-plda-scoring --normalize-length=true \
 239     --num-utts=ark:exp/ivectors_sre16_eval_enroll/num_utts.ark \
 240     "ivector-copy-plda --smoothing=0.0 exp/ivectors_sre_combined/plda - |" \
 241     "ark:ivector-mean ark:data/sre16_eval_enroll/spk2utt scp:exp/ivectors_sre16_eval_enroll/ivector.scp ark:- | ivector-subtract-global-mean exp/ivectors_sre16_major/mean.vec ark:- ark:- | transform-vec exp/ivectors_sre_combined/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
 242     "ark:ivector-subtract-global-mean exp/ivectors_sre16_major/mean.vec scp:exp/ivectors_sre16_eval_test/ivector.scp ark:- | transform-vec exp/ivectors_sre_combined/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
 243     "cat '$sre16_trials' | cut -d\  --fields=1,2 |" exp/scores/sre16_eval_scores || exit 1;
 244
 245   utils/filter_scp.pl $sre16_trials_tgl exp/scores/sre16_eval_scores > exp/scores/sre16_eval_tgl_scores
 246   utils/filter_scp.pl $sre16_trials_yue exp/scores/sre16_eval_scores > exp/scores/sre16_eval_yue_scores
 247   pooled_eer=$(paste $sre16_trials exp/scores/sre16_eval_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
 248   tgl_eer=$(paste $sre16_trials_tgl exp/scores/sre16_eval_tgl_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
 249   yue_eer=$(paste $sre16_trials_yue exp/scores/sre16_eval_yue_scores | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
 250   echo "Using Out-of-Domain PLDA, EER: Pooled ${pooled_eer}%, Tagalog ${tgl_eer}%, Cantonese ${yue_eer}%"
 251   # EER: Pooled 13.65%, Tagalog 17.73%, Cantonese 9.612%
 252 fi
 253
 254 if [ $stage -le 8 ]; then
 255   # Get results using an adapted PLDA model. In the future we'll replace
 256   # this (or add to this) with a clustering based approach to PLDA adaptation.
 257   $train_cmd exp/scores/log/sre16_eval_scoring_adapt.log \
 258     ivector-plda-scoring --normalize-length=true \
 259     --num-utts=ark:exp/ivectors_sre16_eval_enroll/num_utts.ark \
 260     "ivector-copy-plda --smoothing=0.0 exp/ivectors_sre16_major/plda_adapt - |" \
 261     "ark:ivector-mean ark:data/sre16_eval_enroll/spk2utt scp:exp/ivectors_sre16_eval_enroll/ivector.scp ark:- | ivector-subtract-global-mean exp/ivectors_sre16_major/mean.vec ark:- ark:- | transform-vec exp/ivectors_sre_combined/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
 262     "ark:ivector-subtract-global-mean exp/ivectors_sre16_major/mean.vec scp:exp/ivectors_sre16_eval_test/ivector.scp ark:- | transform-vec exp/ivectors_sre_combined/transform.mat ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
 263     "cat '$sre16_trials' | cut -d\  --fields=1,2 |" exp/scores/sre16_eval_scores_adapt || exit 1;
 264
 265   utils/filter_scp.pl $sre16_trials_tgl exp/scores/sre16_eval_scores_adapt > exp/scores/sre16_eval_tgl_scores_adapt
 266   utils/filter_scp.pl $sre16_trials_yue exp/scores/sre16_eval_scores_adapt > exp/scores/sre16_eval_yue_scores_adapt
 267   pooled_eer=$(paste $sre16_trials exp/scores/sre16_eval_scores_adapt | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
 268   tgl_eer=$(paste $sre16_trials_tgl exp/scores/sre16_eval_tgl_scores_adapt | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
 269   yue_eer=$(paste $sre16_trials_yue exp/scores/sre16_eval_yue_scores_adapt | awk '{print $6, $3}' | compute-eer - 2>/dev/null)
 270   echo "Using Adapted PLDA, EER: Pooled ${pooled_eer}%, Tagalog ${tgl_eer}%, Cantonese ${yue_eer}%"
 271   # EER: Pooled 12.98%, Tagalog 17.8%, Cantonese 8.35%
 272   #
 273   # Using the official SRE16 scoring software, we obtain the following equalized results:
 274   #
 275   # -- Pooled --
 276   # EER:         13.08
 277   # min_Cprimary: 0.72
 278   # act_Cprimary: 0.73
 279
 280   # -- Cantonese --
 281   # EER:          8.23
 282   # min_Cprimary: 0.59
 283   # act_Cprimary: 0.59
 284
 285   # -- Tagalog --
 286   # EER:         17.87
 287   # min_Cprimary: 0.84
 288   # act_Cprimary: 0.87
 289 fi