egs/sre08/v1/sid/nnet3/xvector/extract_xvectors.sh

   1 #!/bin/bash
   2
   3 # Copyright     2017  David Snyder
   4 #               2017  Johns Hopkins University (Author: Daniel Povey)
   5 #               2017  Johns Hopkins University (Author: Daniel Garcia Romero)
   6 # Apache 2.0.
   7
   8 # This script extracts embeddings (called "xvectors" here) from a set of
   9 # utterances, given features and a trained DNN.  The purpose of this script
  10 # is analogous to sid/extract_ivectors.sh: it creates archives of
  11 # vectors that are used in speaker recognition.  Like ivectors, xvectors can
  12 # be used in PLDA or a similar backend for scoring.
  13
  14 # Begin configuration section.
  15 nj=30
  16 cmd="run.pl"
  17 chunk_size=-1 # The chunk size over which the embedding is extracted.
  18               # If left unspecified, it uses the max_chunk_size in the nnet
  19               # directory.
  20 use_gpu=false
  21 stage=0
  22
  23 echo "$0 $@"  # Print the command line for logging
  24
  25 if [ -f path.sh ]; then . ./path.sh; fi
  26 . parse_options.sh || exit 1;
  27
  28 if [ $# != 3 ]; then
  29   echo "Usage: $0 <nnet-dir> <data> <xvector-dir>"
  30   echo " e.g.: $0 exp/xvector_nnet data/train exp/xvectors_train"
  31   echo "main options (for others, see top of script file)"
  32   echo "  --config <config-file>                           # config containing options"
  33   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
  34   echo "  --use-gpu <bool|false>                           # If true, use GPU."
  35   echo "  --nj <n|30>                                      # Number of jobs"
  36   echo "  --stage <stage|0>                                # To control partial reruns"
  37   echo "  --chunk-size <n|-1>                              # If provided, extracts embeddings with specified"
  38   echo "                                                   # chunk size, and averages to produce final embedding"
  39 fi
  40
  41 srcdir=$1
  42 data=$2
  43 dir=$3
  44
  45 for f in $srcdir/final.raw $srcdir/min_chunk_size $srcdir/max_chunk_size $data/feats.scp $data/vad.scp ; do
  46   [ ! -f $f ] && echo "No such file $f" && exit 1;
  47 done
  48
  49 min_chunk_size=`cat $srcdir/min_chunk_size 2>/dev/null`
  50 max_chunk_size=`cat $srcdir/max_chunk_size 2>/dev/null`
  51
  52 nnet=$srcdir/final.raw
  53 if [ -f $srcdir/extract.config ] ; then
  54   echo "$0: using $srcdir/extract.config to extract xvectors"
  55   nnet="nnet3-copy --nnet-config=$srcdir/extract.config $srcdir/final.raw - |"
  56 fi
  57
  58 if [ $chunk_size -le 0 ]; then
  59   chunk_size=$max_chunk_size
  60 fi
  61
  62 if [ $max_chunk_size -lt $chunk_size ]; then
  63   echo "$0: specified chunk size of $chunk_size is larger than the maximum chunk size, $max_chunk_size" && exit 1;
  64 fi
  65
  66 mkdir -p $dir/log
  67
  68 utils/split_data.sh $data $nj
  69 echo "$0: extracting xvectors for $data"
  70 sdata=$data/split$nj/JOB
  71
  72 # Set up the features
  73 feat="ark:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:${sdata}/feats.scp ark:- | select-voiced-frames ark:- scp,s,cs:${sdata}/vad.scp ark:- |"
  74
  75 if [ $stage -le 0 ]; then
  76   echo "$0: extracting xvectors from nnet"
  77   if $use_gpu; then
  78     for g in $(seq $nj); do
  79       $cmd --gpu 1 ${dir}/log/extract.$g.log \
  80         nnet3-xvector-compute --use-gpu=yes --min-chunk-size=$min_chunk_size --chunk-size=$chunk_size \
  81         "$nnet" "`echo $feat | sed s/JOB/$g/g`" ark,scp:${dir}/xvector.$g.ark,${dir}/xvector.$g.scp || exit 1 &
  82     done
  83     wait
  84   else
  85     $cmd JOB=1:$nj ${dir}/log/extract.JOB.log \
  86       nnet3-xvector-compute --use-gpu=no --min-chunk-size=$min_chunk_size --chunk-size=$chunk_size \
  87       "$nnet" "$feat" ark,scp:${dir}/xvector.JOB.ark,${dir}/xvector.JOB.scp || exit 1;
  88   fi
  89 fi
  90
  91 if [ $stage -le 1 ]; then
  92   echo "$0: combining xvectors across jobs"
  93   for j in $(seq $nj); do cat $dir/xvector.$j.scp; done >$dir/xvector.scp || exit 1;
  94 fi
  95
  96 if [ $stage -le 2 ]; then
  97   # Average the utterance-level xvectors to get speaker-level xvectors.
  98   echo "$0: computing mean of xvectors for each speaker"
  99   $cmd $dir/log/speaker_mean.log \
 100     ivector-mean ark:$data/spk2utt scp:$dir/xvector.scp \
 101     ark,scp:$dir/spk_xvector.ark,$dir/spk_xvector.scp ark,t:$dir/num_utts.ark || exit 1;
 102 fi