summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 71eb5c6)
raw | patch | inline | side by side (parent: 71eb5c6)
author | Karel Vesely <vesis84@gmail.com> | |
Fri, 10 Jul 2015 08:43:50 +0000 (08:43 +0000) | ||
committer | vesis84 <vesis84@gmail.com> | |
Wed, 30 Sep 2015 13:06:51 +0000 (15:06 +0200) |
- the transcripts are now prepared in local directory,
- the default 'AMI_DIR' points to the shared-dir with wavs,
- beamforming is skipped if already done,
- imported 'cmd.sh' config from Vijay,
- the default 'AMI_DIR' points to the shared-dir with wavs,
- beamforming is skipped if already done,
- imported 'cmd.sh' config from Vijay,
diff --git a/egs/ami/s5/cmd.sh b/egs/ami/s5/cmd.sh
index 1f04585609b34dcc150df3851545688207bfd9a4..e47440c93e895e39cec57bc184f8295578b3f00a 100644 (file)
--- a/egs/ami/s5/cmd.sh
+++ b/egs/ami/s5/cmd.sh
#export highmem_cmd="queue.pl -P inf_hcrc_cstr_nst -l h_rt=05:00:00 -pe memory-2G 4"
#export scoring_cmd="queue.pl -P inf_hcrc_cstr_nst -l h_rt=00:20:00"
+# JSALT2015 workshop, cluster AWS-EC2, (setup from Vijay)
+export train_cmd="queue.pl -l arch=*64*"
+export decode_cmd="queue.pl -l arch=*64* --mem 4G"
+export highmem_cmd="queue.pl -l arch=*64* --mem 4G"
+export scoring_cmd="queue.pl -l arch=*64*"
+export cuda_cmd="queue.pl --gpu 1"
+export cntk_decode_cmd="queue.pl -l arch=*64* --mem 12G"
+
# To run locally, use:
-export train_cmd=run.pl
-export decode_cmd=run.pl
-export highmem_cmd=run.pl
-export cuda_cmd=run.pl
+#export train_cmd=run.pl
+#export decode_cmd=run.pl
+#export highmem_cmd=run.pl
+#export cuda_cmd=run.pl
-host=$(hostname -f)
-if [ ${host#*.} == "fit.vutbr.cz" ]; then
+if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then
# BUT cluster:
queue="all.q@@blade,all.q@@speech"
gpu_queue="long.q@supergpu*,long.q@dellgpu*,long.q@pcspeech-gpu,long.q@pcgpu*"
index 419e67c74d2d7fe4abbcef22e29b0fbf5c745040..b5ff8c23ba8a1e1e6f11825533e6c67a7fec7f37 100755 (executable)
mkdir -p $odir
mkdir -p $wdir/log
+[ -e $odir/.done_beamforming ] && echo "Beamforming already done, skipping..." && exit 0
+
meetings=$wdir/meetings.list
cat local/split_train.orig local/split_dev.orig local/split_eval.orig | sort > $meetings
$cmd JOB=1:$nj $wdir/log/beamform.JOB.log \
local/beamformit.sh $nj JOB $numch $meetings $sdir $odir
+touch $odir/.done_beamforming
index 3a2a0c5c0fe7c642bf33ecf3bfd6e95df55630b9..e078b977e23992dc83b75926098de15d218b0a90 100755 (executable)
wgetfile=$wdir/wget_$mic.sh
# TODO fix this with Pawel, files don't exist anymore,
-manifest="wget -O $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt"
-license="wget -O $adir/LICENCE.TXT http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt"
+manifest="wget -O --continue $adir/MANIFEST.TXT http://groups.inf.ed.ac.uk/ami/download/temp/amiBuild-04237-Sun-Jun-15-2014.manifest.txt"
+license="wget -O --continue $adir/LICENCE.TXT http://groups.inf.ed.ac.uk/ami/download/temp/Creative-Commons-Attribution-NonCommercial-ShareAlike-2.5.txt"
echo "#!/bin/bash" > $wgetfile
echo $manifest >> $wgetfile
index 0b87d10e4dea45914c900f94461148e7a048ec73..05fc2dbee9b322f75624db3c16cbc2e74bcb06c4 100755 (executable)
set -e
set -u
-amidir=$1
-mkdir -p $amidir
+dir=$1
+mkdir -p $dir
-echo "Downloading annotiations..."
+echo "Downloading annotations..."
amiurl=http://groups.inf.ed.ac.uk/ami
annotver=ami_public_manual_1.6.1
-annot="$amidir/$annotver"
+annot="$dir/$annotver"
logdir=data/local/downloads; mkdir -p $logdir/log
[ ! -f $annot.zip ] && wget -nv -O $annot.zip $amiurl/AMICorpusAnnotations/$annotver.zip &> $logdir/log/download_ami_annot.log
-mkdir -p $amidir/annotations
-unzip -o -d $amidir/annotations $annot.zip &> /dev/null
+mkdir -p $dir/annotations
+unzip -o -d $dir/annotations $annot.zip &> /dev/null
-[ ! -f "$amidir/annotations/AMI-metadata.xml" ] && echo "$0: File AMI-Metadata.xml not found under $amidir/annotations." && exit 1;
+[ ! -f "$dir/annotations/AMI-metadata.xml" ] && echo "$0: File AMI-Metadata.xml not found under $dir/annotations." && exit 1;
# extract text from AMI XML annotations,
-local/ami_xml2text.sh $amidir
+local/ami_xml2text.sh $dir
wdir=data/local/annotations
[ ! -f $wdir/transcripts1 ] && echo "$0: File $wdir/transcripts1 not found." && exit 1;
echo "Preprocessing transcripts..."
local/ami_split_segments.pl $wdir/transcripts1 $wdir/transcripts2 &> $wdir/log/split_segments.log
-#make final train/dev/eval splits
+# make final train/dev/eval splits
for dset in train eval dev; do
[ ! -f local/split_$dset.final ] && cp local/split_$dset.orig local/split_$dset.final
grep -f local/split_$dset.final $wdir/transcripts2 > $wdir/$dset.txt
diff --git a/egs/ami/s5/run_ihm.sh b/egs/ami/s5/run_ihm.sh
index be047f8f369d48fa71c3a042b7ab55a8d1760eab..a69358e6cb71acb27ab9e50579d7a9e1e775c963 100755 (executable)
--- a/egs/ami/s5/run_ihm.sh
+++ b/egs/ami/s5/run_ihm.sh
set -x
# Path where AMI gets downloaded (or where locally available):
-[ ! -r conf/ami_dir ] && echo "Please, run 'run_prepare_shared.sh' first!" && exit 1
-AMI_DIR=$(cat conf/ami_dir)
+#AMI_DIR=$PWD/DOWNLOAD/ami # Default,
+AMI_DIR=/export/ws15-ffs-data/corpora/ami # JSALT2015 workshop, cluster AWS-EC2,
+[ ! -r data/local/lm/final_lm ] && echo "Please, run 'run_prepare_shared.sh' first!" && exit 1
final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
local/online/run_nnet2_ms_perturbed.sh --mic $mic
fi
-echo "Done!"
+echo "Done."
diff --git a/egs/ami/s5/run_mdm.sh b/egs/ami/s5/run_mdm.sh
index aecf05da875c433c7d187a089ba7b534ebbd36d2..a58365e3b0ae9d343393c6b74c276314c7e453f7 100755 (executable)
--- a/egs/ami/s5/run_mdm.sh
+++ b/egs/ami/s5/run_mdm.sh
nmics=8 #we use all 8 channels, possible other options are 2 and 4
mic=mdm$nmics
+# Path where AMI gets downloaded (or where locally available):
+#AMI_DIR=$PWD/wav_db # Default,
+AMI_DIR=/export/ws15-ffs-data/corpora/ami # JSALT2015 workshop, cluster AWS-EC2,
+
+# MDM_DIR is directory for beamformed waves,
+#MDM_DIR=/disk/data1/s1136550/ami/mdm # [Edinburgh]
+MDM_DIR=$AMI_DIR/beamformed # [Default]
+
+[ ! -r data/local/lm/final_lm ] && echo "Please, run 'run_prepare_shared.sh' first!" && exit 1
+final_lm=`cat data/local/lm/final_lm`
+LM=$final_lm.pr1-7
+
stage=0
. utils/parse_options.sh
set -o pipefail
set -x
-# Path where AMI gets downloaded (or where locally available):
-[ ! -r conf/ami_dir ] && echo "Please, run 'run_prepare_shared.sh' first!" && exit 1
-AMI_DIR=$(cat conf/ami_dir)
-
-# MDM_DIR is directory for beamformed waves,
-MDM_DIR=$AMI_DIR/beamformed # [Default]
-#MDM_DIR=/disk/data1/s1136550/ami/mdm # [Edinburgh]
-
-final_lm=`cat data/local/lm/final_lm`
-LM=$final_lm.pr1-7
-
# Download AMI corpus (distant channels), You need around 130GB of free space to get whole data ihm+mdm,
if [ $stage -le 0 ]; then
[ -e data/local/downloads/wget_mdm.sh ] && \
local/nnet/run_dnn_lda_mllt.sh $mic
fi
-echo "Done!"
+echo "Done."
index befbff16fce4d59c18b75434981c80bddb49e201..aa80e950626cb09d5d0d6df38f26593178a37bd7 100755 (executable)
. ./cmd.sh
. ./path.sh
-# To run this script you need SRILM,
-
# Path to Fisher transcripts LM interpolation (if not defined only AMI transcript LM is built),
-FISHER_TRANS=`pwd`/eddie_data/lm/data/fisher/part1 # Edinburgh, [DEFAULT]
-# Path where AMI gets downloaded (or where locally available),
-AMI_DIR=$PWD/DOWNLOAD/amicorpus # [DEFAULT]
-
-# We can make setup specific to the 'domain' where the cluster is,
-case "$(hostname -d)" in
- fit.vutbr.cz) # BUT cluster,
- FISHER_TRANS=/mnt/matylda2/data/FISHER/fe_03_p1_tran
- AMI_DIR=$(mktemp -d $(find /mnt/scratch*/$USER -maxdepth 0)/kaldi_ami_data_XXXXXX)
- ;;
- *) echo "Using defaults locations,"
- ;;
-esac
-
-# We can override the automatic setup by :
-# './run_prepare_shared.sh --AMI-DIR [dir] --FISHER-TRANS [dir]'
-. utils/parse_options.sh
-
-# Load previous / store the new AMI_DIR location,
-[ -r conf/ami_dir ] && AMI_DIR=$(cat conf/ami_dir) || echo $AMI_DIR >conf/ami_dir
+#FISHER_TRANS=`pwd`/eddie_data/lm/data/fisher/part1 # Edinburgh,
+#FISHER_TRANS=/mnt/matylda2/data/FISHER/fe_03_p1_tran # BUT,
+FISHER_TRANS=/export/ws15-ffs-data/corpora/LDC/LDC2004T19/fe_03_p1_tran # JSALT2015 workshop, cluster AWS-EC2,
+
+# To run this script you need SRILM,
+# JSALT2015 note : it's downloaded to /export/ws15-ffs-data/tools/srilm-1.7.1.tar.gz
+! hash ngram-count && echo "Missing srilm, run 'cd ../../../tools/; ./install_srilm.sh" && exit 1
# Set bash to 'debug' mode, it will exit on :
# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands',
set -u
set -x
-local/ami_text_prep.sh $AMI_DIR
+# Download of annotations, pre-processing,
+local/ami_text_prep.sh data/local/downloads
local/ami_prepare_dict.sh
utils/prepare_lang.sh data/local/dict "<unk>" data/local/lang data/lang
prune-lm --threshold=1e-7 data/local/lm/$final_lm.gz /dev/stdout | gzip -c > data/local/lm/$LM.gz
utils/format_lm.sh data/lang data/local/lm/$LM.gz data/local/dict/lexicon.txt data/lang_$LM
-echo "Done!"
+echo "Done"
exit 0
diff --git a/egs/ami/s5/run_sdm.sh b/egs/ami/s5/run_sdm.sh
index e7fbe19e15a402778dec85f128bcd4f3e9230233..4e9565937911d199c920f9b011789a0b1fd37e5f 100755 (executable)
--- a/egs/ami/s5/run_sdm.sh
+++ b/egs/ami/s5/run_sdm.sh
set -x
# Path where AMI gets downloaded (or where locally available):
-[ ! -r conf/ami_dir ] && echo "Please, run 'run_prepare_shared.sh' first!" && exit 1
-AMI_DIR=$(cat conf/ami_dir)
+#AMI_DIR=$PWD/DOWNLOAD/ami # Default,
+AMI_DIR=/export/ws15-ffs-data/corpora/ami # JSALT2015 workshop, cluster AWS-EC2,
+[ ! -r data/local/lm/final_lm ] && echo "Please, run 'run_prepare_shared.sh' first!" && exit 1
final_lm=`cat data/local/lm/final_lm`
LM=$final_lm.pr1-7
local/nnet/run_dnn_lda_mllt.sh $mic
fi
-echo "Done!"
+echo "Done."
# By default we do not build systems adapted to sessions for AMI in distant scnearios
index 2462c2c4dcc36c065a129a4ffd5a132017d55917..97325d5d5f97a6ac3266124ec481896e4923ed76 100755 (executable)
echo way because you need to put your address in a download form.
echo Please download SRILM from http://www.speech.sri.com/projects/srilm/download.html
echo put it in ./srilm.tgz, then run this script.
+ exit 1;
fi
! which gawk 2>/dev/null && \
cat tmpf | awk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
> Makefile || exit 1;
-make
+make -j4 # 4threds,