]> Gitweb @ Texas Instruments - Open Source Git Repositories - git.TI.com/gitweb - processor-sdk/kaldi.git/blob - egs/sre08/v1/sid/nnet3/xvector/extract_xvectors.sh
[src,scripts,egs] Xvectors: DNN Embeddings for Speaker Recognition (#1896)
[processor-sdk/kaldi.git] / egs / sre08 / v1 / sid / nnet3 / xvector / extract_xvectors.sh
1 #!/bin/bash
3 # Copyright     2017  David Snyder
4 #               2017  Johns Hopkins University (Author: Daniel Povey)
5 #               2017  Johns Hopkins University (Author: Daniel Garcia Romero)
6 # Apache 2.0.
8 # This script extracts embeddings (called "xvectors" here) from a set of
9 # utterances, given features and a trained DNN.  The purpose of this script
10 # is analogous to sid/extract_ivectors.sh: it creates archives of
11 # vectors that are used in speaker recognition.  Like ivectors, xvectors can
12 # be used in PLDA or a similar backend for scoring.
14 # Begin configuration section.
15 nj=30
16 cmd="run.pl"
17 chunk_size=-1 # The chunk size over which the embedding is extracted.
18               # If left unspecified, it uses the max_chunk_size in the nnet
19               # directory.
20 use_gpu=false
21 stage=0
23 echo "$0 $@"  # Print the command line for logging
25 if [ -f path.sh ]; then . ./path.sh; fi
26 . parse_options.sh || exit 1;
28 if [ $# != 3 ]; then
29   echo "Usage: $0 <nnet-dir> <data> <xvector-dir>"
30   echo " e.g.: $0 exp/xvector_nnet data/train exp/xvectors_train"
31   echo "main options (for others, see top of script file)"
32   echo "  --config <config-file>                           # config containing options"
33   echo "  --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
34   echo "  --use-gpu <bool|false>                           # If true, use GPU."
35   echo "  --nj <n|30>                                      # Number of jobs"
36   echo "  --stage <stage|0>                                # To control partial reruns"
37   echo "  --chunk-size <n|-1>                              # If provided, extracts embeddings with specified"
38   echo "                                                   # chunk size, and averages to produce final embedding"
39 fi
41 srcdir=$1
42 data=$2
43 dir=$3
45 for f in $srcdir/final.raw $srcdir/min_chunk_size $srcdir/max_chunk_size $data/feats.scp $data/vad.scp ; do
46   [ ! -f $f ] && echo "No such file $f" && exit 1;
47 done
49 min_chunk_size=`cat $srcdir/min_chunk_size 2>/dev/null`
50 max_chunk_size=`cat $srcdir/max_chunk_size 2>/dev/null`
52 nnet=$srcdir/final.raw
53 if [ -f $srcdir/extract.config ] ; then
54   echo "$0: using $srcdir/extract.config to extract xvectors"
55   nnet="nnet3-copy --nnet-config=$srcdir/extract.config $srcdir/final.raw - |"
56 fi
58 if [ $chunk_size -le 0 ]; then
59   chunk_size=$max_chunk_size
60 fi
62 if [ $max_chunk_size -lt $chunk_size ]; then
63   echo "$0: specified chunk size of $chunk_size is larger than the maximum chunk size, $max_chunk_size" && exit 1;
64 fi
66 mkdir -p $dir/log
68 utils/split_data.sh $data $nj
69 echo "$0: extracting xvectors for $data"
70 sdata=$data/split$nj/JOB
72 # Set up the features
73 feat="ark:apply-cmvn-sliding --norm-vars=false --center=true --cmn-window=300 scp:${sdata}/feats.scp ark:- | select-voiced-frames ark:- scp,s,cs:${sdata}/vad.scp ark:- |"
75 if [ $stage -le 0 ]; then
76   echo "$0: extracting xvectors from nnet"
77   if $use_gpu; then
78     for g in $(seq $nj); do
79       $cmd --gpu 1 ${dir}/log/extract.$g.log \
80         nnet3-xvector-compute --use-gpu=yes --min-chunk-size=$min_chunk_size --chunk-size=$chunk_size \
81         "$nnet" "`echo $feat | sed s/JOB/$g/g`" ark,scp:${dir}/xvector.$g.ark,${dir}/xvector.$g.scp || exit 1 &
82     done
83     wait
84   else
85     $cmd JOB=1:$nj ${dir}/log/extract.JOB.log \
86       nnet3-xvector-compute --use-gpu=no --min-chunk-size=$min_chunk_size --chunk-size=$chunk_size \
87       "$nnet" "$feat" ark,scp:${dir}/xvector.JOB.ark,${dir}/xvector.JOB.scp || exit 1;
88   fi
89 fi
91 if [ $stage -le 1 ]; then
92   echo "$0: combining xvectors across jobs"
93   for j in $(seq $nj); do cat $dir/xvector.$j.scp; done >$dir/xvector.scp || exit 1;
94 fi
96 if [ $stage -le 2 ]; then
97   # Average the utterance-level xvectors to get speaker-level xvectors.
98   echo "$0: computing mean of xvectors for each speaker"
99   $cmd $dir/log/speaker_mean.log \
100     ivector-mean ark:$data/spk2utt scp:$dir/xvector.scp \
101     ark,scp:$dir/spk_xvector.ark,$dir/spk_xvector.scp ark,t:$dir/num_utts.ark || exit 1;
102 fi