summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 654ee7e)
raw | patch | inline | side by side (parent: 654ee7e)
author | Daniel Povey <dpovey@gmail.com> | |
Wed, 20 Jul 2016 00:11:13 +0000 (20:11 -0400) | ||
committer | Daniel Povey <dpovey@gmail.com> | |
Wed, 20 Jul 2016 01:08:16 +0000 (21:08 -0400) |
diff --git a/egs/wsj/s5/run.sh b/egs/wsj/s5/run.sh
index a9af8c00feb11523a4d984aa6ba3f85787a53c63..7be85cb5f2ee12e844396a6c29bc3f08e7ce3903 100755 (executable)
--- a/egs/wsj/s5/run.sh
+++ b/egs/wsj/s5/run.sh
# Now make MFCC features.
# mfccdir should be some place with a largish disk where you
# want to store MFCC features.
-mfccdir=mfcc
+
for x in test_eval92 test_eval93 test_dev93 train_si284; do
- steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 \
- data/$x exp/make_mfcc/$x $mfccdir || exit 1;
- steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x $mfccdir || exit 1;
+ steps/make_mfcc.sh --cmd "$train_cmd" --nj 20 data/$x || exit 1;
+ steps/compute_cmvn_stats.sh data/$x || exit 1;
done
utils/subset_data_dir.sh --first data/train_si284 7138 data/train_si84 || exit 1
index 938609df6892caa9bd74eca67552a124a1db374a..9056d88691ce0a2cd75947fdfa61ee514ba32f70 100755 (executable)
-#!/bin/bash
+#!/bin/bash
-# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
+# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# To be run from .. (one directory up from here)
# see ../run.sh for example
-# Compute cepstral mean and variance statistics per speaker.
+# Compute cepstral mean and variance statistics per speaker.
# We do this in just one job; it's fast.
# This script takes no options.
#
shift
fi
-if [ $# != 3 ]; then
- echo "Usage: $0 [options] <data-dir> <log-dir> <path-to-cmvn-dir>";
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+ echo "Usage: $0 [options] <data-dir> [<log-dir> [<cmvn-dir>] ]";
echo "e.g.: $0 data/train exp/make_mfcc/train mfcc"
+ echo "Note: <log-dir> defaults to <data-dir>/log, and <cmvn-dir> defaults to <data-dir>/data"
echo "Options:"
echo " --fake gives you fake cmvn stats that do no normalization."
echo " --two-channel is for two-channel telephone data, there must be no segments "
if [ -f path.sh ]; then . ./path.sh; fi
data=$1
-logdir=$2
-cmvndir=$3
+if [ $# -ge 2 ]; then
+ logdir=$2
+else
+ logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+ cmvndir=$3
+else
+ cmvndir=$data/data
+fi
# make $cmvndir an absolute pathname.
cmvndir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $cmvndir ${PWD}`
cp $cmvndir/cmvn_$name.scp $data/cmvn.scp || exit 1;
-nc=`cat $data/cmvn.scp | wc -l`
-nu=`cat $data/spk2utt | wc -l`
+nc=`cat $data/cmvn.scp | wc -l`
+nu=`cat $data/spk2utt | wc -l`
if [ $nc -ne $nu ]; then
echo "$0: warning: it seems not all of the speakers got cmvn stats ($nc != $nu);"
[ $nc -eq 0 ] && exit 1;
index ea355126c5d53ad246ceba5c3dcaed03e2779f6c..1baecb3939ad58db100df26792759ba7a78ac0a3 100755 (executable)
-#!/bin/bash
+#!/bin/bash
-# Copyright 2012 Karel Vesely Johns Hopkins University (Author: Daniel Povey)
+# Copyright 2012-2016 Karel Vesely Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# To be run from .. (one directory up from here)
# see ../run.sh for example
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
-if [ $# != 3 ]; then
- echo "usage: make_fbank.sh [options] <data-dir> <log-dir> <path-to-fbankdir>";
- echo "options: "
- echo " --fbank-config <config-file> # config passed to compute-fbank-feats "
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+ echo "Usage: $0 [options] <data-dir> [<log-dir> [<fbank-dir>] ]";
+ echo "e.g.: $0 data/train exp/make_fbank/train mfcc"
+ echo "Note: <log-dir> defaults to <data-dir>/log, and <fbank-dir> defaults to <data-dir>/data"
+ echo "Options: "
+ echo " --fbank-config <config-file> # config passed to compute-fbank-feats "
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
exit 1;
fi
data=$1
-logdir=$2
-fbankdir=$3
+if [ $# -ge 2 ]; then
+ logdir=$2
+else
+ logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+ fbankdir=$3
+else
+ fbankdir=$data/data
+fi
# make $fbankdir an absolute pathname.
for n in $(seq $nj); do
# the next command does nothing unless $fbankdir/storage/ exists, see
# utils/create_data_link.pl for more info.
- utils/create_data_link.pl $fbankdir/raw_fbank_$name.$n.ark
+ utils/create_data_link.pl $fbankdir/raw_fbank_$name.$n.ark
done
if [ -f $data/segments ]; then
done
utils/split_scp.pl $scp $split_scps || exit 1;
-
+
$cmd JOB=1:$nj $logdir/make_fbank_${name}.JOB.log \
compute-fbank-feats $vtln_opts --verbose=2 --config=$fbank_config scp,p:$logdir/wav.JOB.scp ark:- \| \
copy-feats --compress=$compress ark:- \
rm $logdir/wav.*.scp $logdir/segments.* 2>/dev/null
-nf=`cat $data/feats.scp | wc -l`
-nu=`cat $data/utt2spk | wc -l`
+nf=`cat $data/feats.scp | wc -l`
+nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
index ec0c8794bc4766d47f08052967cc784e953d3cc6..4dbd00e09bd39a43c909ce1502865120c35e6afc 100755 (executable)
-#!/bin/bash
+#!/bin/bash
-# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech,
-# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
+# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech,
+# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
+# 2016 Johns Hopkins University (author: Daniel Povey)
# Apache 2.0
-# Combine filterbank and pitch features together
+# Combine filterbank and pitch features together
# Note: This file is based on make_fbank.sh and make_pitch_kaldi.sh
# Begin configuration section.
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
-if [ $# != 3 ]; then
- echo "usage: make_fbank_pitch.sh [options] <data-dir> <log-dir> <path-to-fbank-pitch-dir>";
- echo "options: "
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+ echo "Usage: $0 [options] <data-dir> [<log-dir> [<fbank-dir>] ]";
+ echo "e.g.: $0 data/train exp/make_fbank/train mfcc"
+ echo "Note: <log-dir> defaults to <data-dir>/log, and <fbank-dir> defaults to <data-dir>/data"
+ echo "Options: "
echo " --fbank-config <config-file> # config passed to compute-fbank-feats "
echo " --pitch-config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch-postprocess-config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
fi
data=$1
-logdir=$2
-fbank_pitch_dir=$3
+if [ $# -ge 2 ]; then
+ logdir=$2
+else
+ logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+ fbank_pitch_dir=$3
+else
+ fbank_pitch_dir=$data/data
+fi
# make $fbank_pitch_dir an absolute pathname.
for n in $(seq $nj); do
# the next command does nothing unless $fbank_pitch_dir/storage/ exists, see
# utils/create_data_link.pl for more info.
- utils/create_data_link.pl $fbank_pitch_dir/raw_fbank_pitch_$name.$n.ark
+ utils/create_data_link.pl $fbank_pitch_dir/raw_fbank_pitch_$name.$n.ark
done
if [ -f $data/segments ]; then
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
-
+
fbank_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-fbank-feats $vtln_opts --verbose=2 --config=$fbank_config ark:- ark:- |"
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
done
utils/split_scp.pl $scp $split_scps || exit 1;
-
+
fbank_feats="ark:compute-fbank-feats $vtln_opts --verbose=2 --config=$fbank_config scp,p:$logdir/wav.JOB.scp ark:- |"
pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
-
+
$cmd JOB=1:$nj $logdir/make_fbank_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$fbank_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress ark:- \
rm $logdir/wav.*.scp $logdir/segments.* 2>/dev/null
-nf=`cat $data/feats.scp | wc -l`
-nu=`cat $data/utt2spk | wc -l`
+nf=`cat $data/feats.scp | wc -l`
+nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
index 09c34d40b240c459772ca175b458a41c491192fa..e8ff4bc8cd7b2d65581e074e8f04e695c37b47c1 100755 (executable)
#!/bin/bash
-# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
+# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# To be run from .. (one directory up from here)
# see ../run.sh for example
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
-if [ $# != 3 ]; then
- echo "Usage: $0 [options] <data-dir> <log-dir> <path-to-mfccdir>";
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+ echo "Usage: $0 [options] <data-dir> [<log-dir> [<mfcc-dir>] ]";
echo "e.g.: $0 data/train exp/make_mfcc/train mfcc"
- echo "options: "
+ echo "Note: <log-dir> defaults to <data-dir>/log, and <mfccdir> defaults to <data-dir>/data"
+ echo "Options: "
echo " --mfcc-config <config-file> # config passed to compute-mfcc-feats "
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
fi
data=$1
-logdir=$2
-mfccdir=$3
-
+if [ $# -ge 2 ]; then
+ logdir=$2
+else
+ logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+ mfccdir=$3
+else
+ mfccdir=$data/data
+fi
# make $mfccdir an absolute pathname.
mfccdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $mfccdir ${PWD}`
index 385d262fd5987c3ec1e5cf1097d51dbd0b0f56f3..ff9a7d2f5f3d103d35d8837d54728c96d550d074 100755 (executable)
-#!/bin/bash
+#!/bin/bash
# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech,
# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
+# 2016 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
-# Combine MFCC and pitch features together
+# Combine MFCC and pitch features together
# Note: This file is based on make_mfcc.sh and make_pitch_kaldi.sh
# Begin configuration section.
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
-if [ $# != 3 ]; then
- echo "usage: make_mfcc_pitch.sh [options] <data-dir> <log-dir> <path-to-mfcc-pitch-dir>";
- echo "options: "
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+ echo "Usage: $0 [options] <data-dir> [<log-dir> [<mfcc-dir>] ]";
+ echo "e.g.: $0 data/train exp/make_mfcc/train mfcc"
+ echo "Note: <log-dir> defaults to <data-dir>/log, and <mfcc-dir> defaults to <data-dir>/data"
+ echo "Options: "
echo " --mfcc-config <mfcc-config-file> # config passed to compute-mfcc-feats "
echo " --pitch-config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch-postprocess-config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
fi
data=$1
-logdir=$2
-mfcc_pitch_dir=$3
+if [ $# -ge 2 ]; then
+ logdir=$2
+else
+ logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+ mfcc_pitch_dir=$3
+else
+ mfcc_pitch_dir=$data/data
+fi
# make $mfcc_pitch_dir an absolute pathname.
for n in $(seq $nj); do
# the next command does nothing unless $mfcc_pitch_dir/storage/ exists, see
# utils/create_data_link.pl for more info.
- utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.ark
+ utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_pitch_$name.$n.ark
done
if [ -f $data/segments ]; then
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
-
+
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |"
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
done
utils/split_scp.pl $scp $split_scps || exit 1;
-
+
mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
-
+
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress ark:- \
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null
-nf=`cat $data/feats.scp | wc -l`
-nu=`cat $data/utt2spk | wc -l`
+nf=`cat $data/feats.scp | wc -l`
+nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
diff --git a/egs/wsj/s5/steps/make_mfcc_pitch_online.sh b/egs/wsj/s5/steps/make_mfcc_pitch_online.sh
index beb13615a0b52d7200d3385d942b25f9258b56da..2658850605310f41d7a72cb7ffb287044ea489f0 100755 (executable)
-#!/bin/bash
+#!/bin/bash
# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech,
# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
-# 2014 Johns Hopkins University (Author: Daniel Povey)
+# 2014-2016 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
-# Combine MFCC and online-pitch features together
+# Combine MFCC and online-pitch features together
# Note: This file is based on make_mfcc_pitch.sh
# Begin configuration section.
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
-if [ $# != 3 ]; then
- echo "usage: make_mfcc_pitch.sh [options] <data-dir> <log-dir> <path-to-mfcc-pitch-dir>";
- echo "options: "
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+ echo "Usage: $0 [options] <data-dir> [<log-dir> [<mfcc-dir>] ]";
+ echo "e.g.: $0 data/train exp/make_mfcc/train mfcc"
+ echo "Note: <log-dir> defaults to <data-dir>/log, and <mfcc-dir> defaults to <data-dir>/data"
+ echo "Options: "
echo " --mfcc-config <mfcc-config-file> # config passed to compute-mfcc-feats, default "
echo " # is conf/mfcc.conf"
echo " --online-pitch-config <online-pitch-config-file> # config passed to compute-and-process-kaldi-pitch-feats, "
fi
data=$1
-logdir=$2
-mfcc_pitch_dir=$3
+if [ $# -ge 2 ]; then
+ logdir=$2
+else
+ logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+ mfcc_pitch_dir=$3
+else
+ mfcc_pitch_dir=$data/data
+fi
# make $mfcc_pitch_dir an absolute pathname.
for n in $(seq $nj); do
# the next command does nothing unless $mfcc_pitch_dir/storage/ exists, see
# utils/create_data_link.pl for more info.
- utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_online_pitch_$name.$n.ark
+ utils/create_data_link.pl $mfcc_pitch_dir/raw_mfcc_online_pitch_$name.$n.ark
done
if [ -f $data/segments ]; then
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
-
+
mfcc_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config ark:- ark:- |"
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-and-process-kaldi-pitch-feats --verbose=2 --config=$online_pitch_config ark:- ark:- |"
done
utils/split_scp.pl $scp $split_scps || exit 1;
-
+
mfcc_feats="ark:compute-mfcc-feats $vtln_opts --verbose=2 --config=$mfcc_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
pitch_feats="ark,s,cs:compute-and-process-kaldi-pitch-feats --verbose=2 --config=$online_pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
-
+
$cmd JOB=1:$nj $logdir/make_mfcc_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$mfcc_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress ark:- \
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null
-nf=`cat $data/feats.scp | wc -l`
-nu=`cat $data/utt2spk | wc -l`
+nf=`cat $data/feats.scp | wc -l`
+nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
index 477dbaa657fd64d8d9264388cde2a216fa4c11bd..e2fdfe5e280f49e4bb75989980105f2a3a4ff158 100755 (executable)
-#!/bin/bash
+#!/bin/bash
-# Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
+# Copyright 2012-2016 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
# To be run from .. (one directory up from here)
# see ../run.sh for example
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
-if [ $# != 3 ]; then
- echo "Usage: $0 [options] <data-dir> <log-dir> <path-to-plpdir>";
- echo "e.g.: $0 data/train exp/make_plp/train plp"
- echo "options: "
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+ echo "Usage: $0 [options] <data-dir> [<log-dir> [<plp-dir>] ]";
+ echo "e.g.: $0 data/train exp/make_plp/train mfcc"
+ echo "Note: <log-dir> defaults to <data-dir>/log, and <plp-dir> defaults to <data-dir>/data"
+ echo "Options: "
echo " --plp-config <config-file> # config passed to compute-plp-feats "
echo " --nj <nj> # number of parallel jobs"
echo " --cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs."
fi
data=$1
-logdir=$2
-plpdir=$3
-
+if [ $# -ge 2 ]; then
+ logdir=$2
+else
+ logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+ plpdir=$3
+else
+ plpdir=$data/data
+fi
# make $plpdir an absolute pathname.
plpdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $plpdir ${PWD}`
for n in $(seq $nj); do
# the next command does nothing unless $plpdir/storage/ exists, see
# utils/create_data_link.pl for more info.
- utils/create_data_link.pl $plpdir/raw_plp_$name.$n.ark
+ utils/create_data_link.pl $plpdir/raw_plp_$name.$n.ark
done
if [ -f $data/segments ]; then
done
utils/split_scp.pl $scp $split_scps || exit 1;
-
+
$cmd JOB=1:$nj $logdir/make_plp_${name}.JOB.log \
compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config scp,p:$logdir/wav_${name}.JOB.scp ark:- \| \
copy-feats --compress=$compress ark:- \
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null
-nf=`cat $data/feats.scp | wc -l`
-nu=`cat $data/utt2spk | wc -l`
+nf=`cat $data/feats.scp | wc -l`
+nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
index c8de73cdb27911f5208084aa6d710b7d0045d90c..ff6e83ef577973a8548c68b781567134ca824f9d 100755 (executable)
-#!/bin/bash
+#!/bin/bash
# Copyright 2013 The Shenzhen Key Laboratory of Intelligent Media and Speech,
# PKU-HKUST Shenzhen Hong Kong Institution (Author: Wei Shi)
+# 2016 Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0
-# Combine PLP and pitch features together
+# Combine PLP and pitch features together
# Note: This file is based on make_plp.sh and make_pitch_kaldi.sh
# Begin configuration section.
if [ -f path.sh ]; then . ./path.sh; fi
. parse_options.sh || exit 1;
-if [ $# != 3 ]; then
- echo "usage: make_plp_pitch.sh [options] <data-dir> <log-dir> <path-to-plp-pitch-dir>";
- echo "options: "
+if [ $# -lt 1 ] || [ $# -gt 3 ]; then
+ echo "Usage: $0 [options] <data-dir> [<log-dir> [<plp-dir>] ]";
+ echo "e.g.: $0 data/train exp/make_plp/train mfcc"
+ echo "Note: <log-dir> defaults to <data-dir>/log, and <plp-dir> defaults to <data-dir>/data"
+ echo "Options: "
echo " --plp-config <config-file> # config passed to compute-plp-feats "
echo " --pitch-config <pitch-config-file> # config passed to compute-kaldi-pitch-feats "
echo " --pitch-postprocess-config <postprocess-config-file> # config passed to process-kaldi-pitch-feats "
fi
data=$1
-logdir=$2
-plp_pitch_dir=$3
-
+if [ $# -ge 2 ]; then
+ logdir=$2
+else
+ logdir=$data/log
+fi
+if [ $# -ge 3 ]; then
+ plp_pitch_dir=$3
+else
+ plp_pitch_dir=$data/data
+fi
# make $plp_pitch_dir an absolute pathname.
plp_pitch_dir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $plp_pitch_dir ${PWD}`
for n in $(seq $nj); do
# the next command does nothing unless $plp_pitch_dir/storage/ exists, see
# utils/create_data_link.pl for more info.
- utils/create_data_link.pl $plp_pitch_dir/raw_plp_pitch_$name.$n.ark
+ utils/create_data_link.pl $plp_pitch_dir/raw_plp_pitch_$name.$n.ark
done
utils/split_scp.pl $data/segments $split_segments || exit 1;
rm $logdir/.error 2>/dev/null
-
+
plp_feats="ark:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config ark:- ark:- |"
pitch_feats="ark,s,cs:extract-segments scp,p:$scp $logdir/segments.JOB ark:- | compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config ark:- ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
done
utils/split_scp.pl $scp $split_scps || exit 1;
-
+
plp_feats="ark:compute-plp-feats $vtln_opts --verbose=2 --config=$plp_config scp,p:$logdir/wav_${name}.JOB.scp ark:- |"
pitch_feats="ark,s,cs:compute-kaldi-pitch-feats --verbose=2 --config=$pitch_config scp,p:$logdir/wav_${name}.JOB.scp ark:- | process-kaldi-pitch-feats $postprocess_config_opt ark:- ark:- |"
-
+
$cmd JOB=1:$nj $logdir/make_plp_pitch_${name}.JOB.log \
paste-feats --length-tolerance=$paste_length_tolerance "$plp_feats" "$pitch_feats" ark:- \| \
copy-feats --compress=$compress ark:- \
rm $logdir/wav_${name}.*.scp $logdir/segments.* 2>/dev/null
-nf=`cat $data/feats.scp | wc -l`
-nu=`cat $data/utt2spk | wc -l`
+nf=`cat $data/feats.scp | wc -l`
+nu=`cat $data/utt2spk | wc -l`
if [ $nf -ne $nu ]; then
echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"