[egs] Speaker recognition scripts for FAME! Speech Corpus (#1897)
authorEmre Yilmaz <emrey@kth.se>
Mon, 25 Sep 2017 04:06:22 +0000 (21:06 -0700)
committerDaniel Povey <dpovey@gmail.com>
Mon, 25 Sep 2017 04:06:22 +0000 (00:06 -0400)
40 files changed:
egs/fame/README.txt
egs/fame/s5/RESULTS
egs/fame/s5/local/fame_data_prep.sh
egs/fame/s5/local/fame_dict_prep.sh
egs/fame/v1/RESULTS [new file with mode: 0644]
egs/fame/v1/cmd.sh [new file with mode: 0644]
egs/fame/v1/conf/mfcc_16k.conf [new file with mode: 0644]
egs/fame/v1/conf/vad.conf [new file with mode: 0644]
egs/fame/v1/local/dnn/run_nnet2_common.sh [new file with mode: 0755]
egs/fame/v1/local/dnn/run_nnet2_multisplice.sh [new file with mode: 0755]
egs/fame/v1/local/dnn/train_dnn.sh [new file with mode: 0755]
egs/fame/v1/local/fame_data_prep.sh [new file with mode: 0755]
egs/fame/v1/local/fame_dict_prep.sh [new file with mode: 0755]
egs/fame/v1/local/make_fame_test.pl [new file with mode: 0755]
egs/fame/v1/local/make_fame_test_year.pl [new file with mode: 0755]
egs/fame/v1/local/make_fame_train.pl [new file with mode: 0755]
egs/fame/v1/local/make_fame_train_year.pl [new file with mode: 0755]
egs/fame/v1/local/plda_scoring.sh [new file with mode: 0755]
egs/fame/v1/local/prepare_for_eer.py [new file with mode: 0755]
egs/fame/v1/local/prepare_train.sh [new file with mode: 0755]
egs/fame/v1/local/scoring_common.sh [new file with mode: 0755]
egs/fame/v1/path.sh [new file with mode: 0755]
egs/fame/v1/run.sh [new file with mode: 0755]
egs/fame/v1/sid [new symlink]
egs/fame/v1/steps [new symlink]
egs/fame/v1/utils [new symlink]
egs/fame/v2/RESULTS [new file with mode: 0644]
egs/fame/v2/cmd.sh [new file with mode: 0644]
egs/fame/v2/conf/decode.config [new file with mode: 0644]
egs/fame/v2/conf/decode_dnn.config [new file with mode: 0644]
egs/fame/v2/conf/mfcc_16k.conf [new file with mode: 0644]
egs/fame/v2/conf/mfcc_asr.conf [new file with mode: 0644]
egs/fame/v2/conf/mfcc_hires_16k.conf [new file with mode: 0644]
egs/fame/v2/conf/vad.conf [new file with mode: 0644]
egs/fame/v2/local [new symlink]
egs/fame/v2/path.sh [new file with mode: 0755]
egs/fame/v2/run.sh [new file with mode: 0755]
egs/fame/v2/sid [new symlink]
egs/fame/v2/steps [new symlink]
egs/fame/v2/utils [new symlink]

index d2ed39eef75907dc317ef36695958981c945e4bc..d0037dc332d35d4db64243c3b543d079fdffecea 100644 (file)
@@ -6,10 +6,10 @@ The Corpus consists of short utterances extracted from 203 audio segments of app
 
 A full description of the FAME! Speech Corpus is provided in:
 
-Yilmaz, E., Heuvel, H. van den, Van de Velde, H., Kampstra, F., Algra, J., Leeuwen, D. van:
+E. Yılmaz, H. van den Heuvel, J. Dijkstra, H. Van de Velde, F. Kampstra, J. Algra and D. van Leeuwen, "Open Source Speech and Language Resources for Frisian,"  In Proc. INTERSPEECH, pp. 1536-1540, San Francisco, CA, USA, Sept. 2016. 
 
-Open Source Speech and Language Resources for Frisian Language.
+Speaker clustering and verification corpus details are provided in:
 
-In: Proceedings Interspeech 2016, pp. 1536--1540, 8-12 September 2016, San Francisco
+E. Yılmaz, J. Dijkstra, H. Van de Velde, F. Kampstra, J. Algra, H. van den Heuvel and D. van Leeuwen, "Longitudinal Speaker Clustering and Verification Corpus with Code-switching Frisian-Dutch Speech," in Proc. INTERSPEECH, pp. 37-41 Stockholm, Sweden, August 2017.
 
-Please check http://www.ru.nl/clst/datasets/ to get the FAME! Speech Corpus
+Please check http://www.ru.nl/clst/datasets/ to get the FAME! Speech Corpus. The ASR scripts are in ./s5. The GMM-UBM and DNN-UBM SV scripts are in ./v1 and ./v2 respectively. 
index a8541fba6b5a328eca3ab77cf1e6885434249101..c1a1267a6c2bf72fd59f390ef3bbc118433d596f 100644 (file)
@@ -1,28 +1,28 @@
-%WER 41.10 [ 4974 / 12101, 522 ins, 1223 del, 3229 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_devel/wer_11_0.0
-%WER 38.10 [ 4909 / 12886, 527 ins, 1220 del, 3162 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_test/wer_11_0.0
-%WER 41.06 [ 4969 / 12101, 514 ins, 1277 del, 3178 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0
-%WER 40.38 [ 4886 / 12101, 515 ins, 1225 del, 3146 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.0
-%WER 40.15 [ 4859 / 12101, 514 ins, 1177 del, 3168 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_10_0.5
-%WER 37.86 [ 4879 / 12886, 596 ins, 1083 del, 3200 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it1/wer_10_0.0
-%WER 37.16 [ 4789 / 12886, 592 ins, 1056 del, 3141 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it3/wer_10_0.0
-%WER 36.92 [ 4757 / 12886, 618 ins, 1010 del, 3129 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it6/wer_10_0.0
-%WER 42.38 [ 5129 / 12101, 576 ins, 1171 del, 3382 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_devel/wer_11_0.0
-%WER 39.14 [ 5043 / 12886, 536 ins, 1172 del, 3335 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_test/wer_11_0.0
-%WER 42.05 [ 5088 / 12101, 525 ins, 1282 del, 3281 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0
-%WER 41.41 [ 5011 / 12101, 461 ins, 1345 del, 3205 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.5
-%WER 40.97 [ 4958 / 12101, 485 ins, 1279 del, 3194 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_11_0.5
-%WER 38.79 [ 4998 / 12886, 512 ins, 1194 del, 3292 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it1/wer_11_0.0
-%WER 38.16 [ 4917 / 12886, 544 ins, 1128 del, 3245 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0
-%WER 37.68 [ 4856 / 12886, 564 ins, 1068 del, 3224 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it6/wer_11_0.0
-%WER 70.85 [ 8574 / 12101, 414 ins, 2596 del, 5564 sub ] exp/mono/decode_devel/wer_9_0.0
-%WER 68.17 [ 8785 / 12886, 413 ins, 2704 del, 5668 sub ] exp/mono/decode_test/wer_9_0.0
-%WER 44.05 [ 5330 / 12101, 560 ins, 1467 del, 3303 sub ] exp/sgmm2/decode_devel/wer_10_0.0
-%WER 40.22 [ 5183 / 12886, 680 ins, 1142 del, 3361 sub ] exp/sgmm2/decode_test/wer_9_0.0
-%WER 54.39 [ 6582 / 12101, 695 ins, 1595 del, 4292 sub ] exp/tri1/decode_devel/wer_10_0.0
-%WER 51.60 [ 6649 / 12886, 630 ins, 1706 del, 4313 sub ] exp/tri1/decode_test/wer_11_0.0
-%WER 51.53 [ 6236 / 12101, 659 ins, 1675 del, 3902 sub ] exp/tri2/decode_devel/wer_11_0.0
-%WER 48.32 [ 6226 / 12886, 643 ins, 1669 del, 3914 sub ] exp/tri2/decode_test/wer_12_0.0
-%WER 47.15 [ 5706 / 12101, 580 ins, 1537 del, 3589 sub ] exp/tri3/decode_devel/wer_13_0.0
-%WER 52.13 [ 6308 / 12101, 623 ins, 1706 del, 3979 sub ] exp/tri3/decode_devel.si/wer_11_0.5
-%WER 43.71 [ 5632 / 12886, 594 ins, 1538 del, 3500 sub ] exp/tri3/decode_test/wer_14_0.0
-%WER 48.21 [ 6212 / 12886, 825 ins, 1358 del, 4029 sub ] exp/tri3/decode_test.si/wer_10_0.0
+%WER 41.89 [ 5087 / 12143, 603 ins, 1108 del, 3376 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_devel/wer_10_0.0
+%WER 38.71 [ 5019 / 12966, 529 ins, 1192 del, 3298 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_test/wer_11_0.0
+%WER 41.64 [ 5056 / 12143, 497 ins, 1300 del, 3259 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_10_0.5
+%WER 40.90 [ 4966 / 12143, 456 ins, 1375 del, 3135 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.5
+%WER 40.47 [ 4914 / 12143, 458 ins, 1329 del, 3127 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_11_0.5
+%WER 38.32 [ 4969 / 12966, 581 ins, 1102 del, 3286 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it1/wer_10_0.0
+%WER 37.76 [ 4896 / 12966, 536 ins, 1133 del, 3227 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0
+%WER 37.42 [ 4852 / 12966, 485 ins, 1221 del, 3146 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it6/wer_12_0.0
+%WER 43.14 [ 5239 / 12143, 530 ins, 1319 del, 3390 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_devel/wer_10_0.5
+%WER 39.71 [ 5149 / 12966, 597 ins, 1091 del, 3461 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_test/wer_10_0.0
+%WER 42.94 [ 5214 / 12143, 569 ins, 1228 del, 3417 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0
+%WER 42.11 [ 5114 / 12143, 555 ins, 1169 del, 3390 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_10_0.5
+%WER 41.74 [ 5069 / 12143, 570 ins, 1110 del, 3389 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_10_0.5
+%WER 39.33 [ 5099 / 12966, 452 ins, 1342 del, 3305 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it1/wer_11_0.5
+%WER 38.65 [ 5012 / 12966, 577 ins, 1105 del, 3330 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0
+%WER 38.23 [ 4957 / 12966, 542 ins, 1171 del, 3244 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it6/wer_12_0.0
+%WER 69.62 [ 8454 / 12143, 535 ins, 2189 del, 5730 sub ] exp/mono/decode_devel/wer_7_0.0
+%WER 68.26 [ 8851 / 12966, 572 ins, 2393 del, 5886 sub ] exp/mono/decode_test/wer_7_0.0
+%WER 44.31 [ 5381 / 12143, 551 ins, 1465 del, 3365 sub ] exp/sgmm2/decode_devel/wer_9_0.5
+%WER 40.58 [ 5261 / 12966, 607 ins, 1305 del, 3349 sub ] exp/sgmm2/decode_test/wer_10_0.0
+%WER 54.29 [ 6593 / 12143, 477 ins, 2120 del, 3996 sub ] exp/tri1/decode_devel/wer_11_0.5
+%WER 51.25 [ 6645 / 12966, 679 ins, 1715 del, 4251 sub ] exp/tri1/decode_test/wer_11_0.0
+%WER 51.25 [ 6223 / 12143, 564 ins, 1811 del, 3848 sub ] exp/tri2/decode_devel/wer_10_1.0
+%WER 48.87 [ 6337 / 12966, 629 ins, 1799 del, 3909 sub ] exp/tri2/decode_test/wer_13_0.0
+%WER 47.19 [ 5730 / 12143, 647 ins, 1449 del, 3634 sub ] exp/tri3/decode_devel/wer_12_0.0
+%WER 52.43 [ 6366 / 12143, 643 ins, 1711 del, 4012 sub ] exp/tri3/decode_devel.si/wer_11_0.5
+%WER 44.14 [ 5723 / 12966, 693 ins, 1404 del, 3626 sub ] exp/tri3/decode_test/wer_13_0.0
+%WER 48.77 [ 6324 / 12966, 754 ins, 1524 del, 4046 sub ] exp/tri3/decode_test.si/wer_12_0.0
index 42949505822e8e44e7e7df9d5e0cb0c57d9447b8..11c28c1d130292b12963efdb502d4cebc984d8ac 100755 (executable)
@@ -8,7 +8,7 @@
 corpus=$1
 set -e -o pipefail
 if [ -z "$corpus" ] ; then
-    echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database"
+    echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech corpus"
     exit 1
 fi
 if [ ! -d "$corpus" ] ; then
index c6530217a67d329406590f59038c8d23c3459ed7..95b5d846e6ab6e20eb0928d1fe3c9252c6c8b605 100755 (executable)
@@ -7,7 +7,7 @@
 
 corpus=$1
 if [ -z "$corpus" ] ; then
-    echo >&2 "The script $0 expects one parameter -- the location of the Iban corpus"
+    echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech corpus"
     exit 1
 fi
 if [ ! -d "$corpus" ] ; then
diff --git a/egs/fame/v1/RESULTS b/egs/fame/v1/RESULTS
new file mode 100644 (file)
index 0000000..5ef6daa
--- /dev/null
@@ -0,0 +1,105 @@
+GMM-2048 EER for fame_complete_3sec_eval
+python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_gmm_2048_ind_female_complete_3sec_eval/plda_scores
+ind female: 25.88
+python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_gmm_2048_ind_male_complete_3sec_eval/plda_scores
+ind male: 16.95
+python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_gmm_2048_ind_pooled_complete_3sec_eval/plda_scores
+ind pooled: 21.9
+GMM-2048 EER for fame_complete_10sec_eval
+python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_gmm_2048_ind_female_complete_10sec_eval/plda_scores
+ind female: 18.14
+python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_gmm_2048_ind_male_complete_10sec_eval/plda_scores
+ind male: 9.454
+python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_gmm_2048_ind_pooled_complete_10sec_eval/plda_scores
+ind pooled: 14.15
+GMM-2048 EER for fame_complete_30sec_eval
+python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_gmm_2048_ind_female_complete_30sec_eval/plda_scores
+ind female: 15.49
+python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_gmm_2048_ind_male_complete_30sec_eval/plda_scores
+ind male: 6.985
+python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_gmm_2048_ind_pooled_complete_30sec_eval/plda_scores
+ind pooled: 11.51
+GMM-2048 EER for fame_ageing_3sec_eval
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_gmm_2048_ind_female_ageing_3sec_eval/plda_scores
+ind female: 25.03
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_gmm_2048_ind_male_ageing_3sec_eval/plda_scores
+ind male: 18.38
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_gmm_2048_ind_pooled_ageing_3sec_eval/plda_scores
+ind pooled: 22.87
+GMM-2048 EER for fame_ageing_10sec_eval
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_gmm_2048_ind_female_ageing_10sec_eval/plda_scores
+ind female: 16.31
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_gmm_2048_ind_male_ageing_10sec_eval/plda_scores
+ind male: 10.4
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_gmm_2048_ind_pooled_ageing_10sec_eval/plda_scores
+ind pooled: 14.51
+GMM-2048 EER for fame_ageing_30sec_eval
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_gmm_2048_ind_female_ageing_30sec_eval/plda_scores
+ind female: 11.53
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_gmm_2048_ind_male_ageing_30sec_eval/plda_scores
+ind male: 6.869
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_gmm_2048_ind_pooled_ageing_30sec_eval/plda_scores
+ind pooled: 10.76
+GMM-2048 EER for fame_ageing_3sec_eval_1t3
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_gmm_2048_ind_female_ageing_3sec_eval_1t3/plda_scores
+ind female: 25.28
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_gmm_2048_ind_male_ageing_3sec_eval_1t3/plda_scores
+ind male: 14.07
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_gmm_2048_ind_pooled_ageing_3sec_eval_1t3/plda_scores
+ind pooled: 21.08
+GMM-2048 EER for fame_ageing_3sec_eval_4t10
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_gmm_2048_ind_female_ageing_3sec_eval_4t10/plda_scores
+ind female: 23.81
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_gmm_2048_ind_male_ageing_3sec_eval_4t10/plda_scores
+ind male: 17.2
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_gmm_2048_ind_pooled_ageing_3sec_eval_4t10/plda_scores
+ind pooled: 21.57
+GMM-2048 EER for fame_ageing_3sec_eval_mt10
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_gmm_2048_ind_female_ageing_3sec_eval_mt10/plda_scores
+ind female: 26.47
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_gmm_2048_ind_male_ageing_3sec_eval_mt10/plda_scores
+ind male: 22.74
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_gmm_2048_ind_pooled_ageing_3sec_eval_mt10/plda_scores
+ind pooled: 25.9
+GMM-2048 EER for fame_ageing_10sec_eval_1t3
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_gmm_2048_ind_female_ageing_10sec_eval_1t3/plda_scores
+ind female: 15.91
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_gmm_2048_ind_male_ageing_10sec_eval_1t3/plda_scores
+ind male: 7.064
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_gmm_2048_ind_pooled_ageing_10sec_eval_1t3/plda_scores
+ind pooled: 13.5
+GMM-2048 EER for fame_ageing_10sec_eval_4t10
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_gmm_2048_ind_female_ageing_10sec_eval_4t10/plda_scores
+ind female: 14.36
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_gmm_2048_ind_male_ageing_10sec_eval_4t10/plda_scores
+ind male: 10.5
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_gmm_2048_ind_pooled_ageing_10sec_eval_4t10/plda_scores
+ind pooled: 13.41
+GMM-2048 EER for fame_ageing_10sec_eval_mt10
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_gmm_2048_ind_female_ageing_10sec_eval_mt10/plda_scores
+ind female: 18.26
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_gmm_2048_ind_male_ageing_10sec_eval_mt10/plda_scores
+ind male: 11.93
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_gmm_2048_ind_pooled_ageing_10sec_eval_mt10/plda_scores
+ind pooled: 16.59
+GMM-2048 EER for fame_ageing_30sec_eval_1t3
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_gmm_2048_ind_female_ageing_30sec_eval_1t3/plda_scores
+ind female: 13.16
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_gmm_2048_ind_male_ageing_30sec_eval_1t3/plda_scores
+ind male: 4.425
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_gmm_2048_ind_pooled_ageing_30sec_eval_1t3/plda_scores
+ind pooled: 10.38
+GMM-2048 EER for fame_ageing_30sec_eval_4t10
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_gmm_2048_ind_female_ageing_30sec_eval_4t10/plda_scores
+ind female: 9.536
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_gmm_2048_ind_male_ageing_30sec_eval_4t10/plda_scores
+ind male: 6.826
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_gmm_2048_ind_pooled_ageing_30sec_eval_4t10/plda_scores
+ind pooled: 9.692
+GMM-2048 EER for fame_ageing_30sec_eval_mt10
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_gmm_2048_ind_female_ageing_30sec_eval_mt10/plda_scores
+ind female: 9.677
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_gmm_2048_ind_male_ageing_30sec_eval_mt10/plda_scores
+ind male: 7.983
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_gmm_2048_ind_pooled_ageing_30sec_eval_mt10/plda_scores
+ind pooled: 11.99
diff --git a/egs/fame/v1/cmd.sh b/egs/fame/v1/cmd.sh
new file mode 100644 (file)
index 0000000..23721d0
--- /dev/null
@@ -0,0 +1,23 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd=queue.pl
+export decode_cmd="queue.pl --mem 2G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then
+  queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+  export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+  export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+  export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
diff --git a/egs/fame/v1/conf/mfcc_16k.conf b/egs/fame/v1/conf/mfcc_16k.conf
new file mode 100644 (file)
index 0000000..7361509
--- /dev/null
@@ -0,0 +1 @@
+--use-energy=false   # only non-default option.
diff --git a/egs/fame/v1/conf/vad.conf b/egs/fame/v1/conf/vad.conf
new file mode 100644 (file)
index 0000000..a0ca244
--- /dev/null
@@ -0,0 +1,2 @@
+--vad-energy-threshold=5.5
+--vad-energy-mean-scale=0.5
diff --git a/egs/fame/v1/local/dnn/run_nnet2_common.sh b/egs/fame/v1/local/dnn/run_nnet2_common.sh
new file mode 100755 (executable)
index 0000000..df5804d
--- /dev/null
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Make the features.
+
+. ./cmd.sh
+. ./path.sh
+stage=1
+set -e
+. ./utils/parse_options.sh
+
+mkdir -p exp/nnet2_online
+
+if [ $stage -le 1 ]; then
+  # this shows how you can split across multiple file-systems.  we'll split the
+  # MFCC dir across multiple locations.  You might want to be careful here, if you
+  # have multiple copies of Kaldi checked out and run the same recipe, not to let
+  # them overwrite each other.
+  mfccdir=mfcc
+  utils/copy_data_dir.sh data/train_asr data/train_hires_asr
+  steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires_16k.conf \
+      --cmd "$train_cmd" data/train_hires_asr exp/make_hires/train $mfccdir || exit 1;
+fi
diff --git a/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh b/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh
new file mode 100755 (executable)
index 0000000..bba54c5
--- /dev/null
@@ -0,0 +1,64 @@
+#!/bin/bash
+# Copyright 2017  Radboud University (Author: Emre Yilmaz)
+#
+# This script is based on run_nnet2_multisplice.sh in
+# egs/fisher_english/s5/local/online. It has been modified
+# for speaker recognition.
+
+stage=1
+train_stage=-10
+use_gpu=true
+set -e
+. ./cmd.sh
+. ./path.sh
+
+. utils/parse_options.sh
+
+# assume use_gpu=true since it would be way too slow otherwise.
+
+if $use_gpu; then
+  if ! cuda-compiled; then
+    cat <<EOF && exit 1
+This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
+If you want to use GPUs (and have them), go to src/, and configure and make on a machine
+where "nvcc" is installed.  Otherwise, call this script with --use-gpu false
+EOF
+  fi
+  parallel_opts="--gpu 1"
+  num_threads=1
+  minibatch_size=512
+else
+  # Use 4 nnet jobs just like run_4d_gpu.sh so the results should be
+  # almost the same, but this may be a little bit slow.
+  num_threads=16
+  minibatch_size=128
+  parallel_opts="--num-threads $num_threads"
+fi
+
+dir=exp/nnet2_online/nnet_ms_a
+mkdir -p exp/nnet2_online
+decode_cmd='run.pl'
+# Stages 1 through 5 are done in run_nnet2_common.sh,
+# so it can be shared with other similar scripts.
+local/dnn/run_nnet2_common.sh --stage $stage
+
+if [ $stage -le 6 ]; then
+
+  sid/nnet2/train_multisplice_accel2.sh --stage $train_stage \
+    --feat-type raw \
+    --splice-indexes "layer0/-2:-1:0:1:2 layer1/-1:2 layer3/-3:3 layer4/-7:2" \
+    --num-epochs 10 \
+    --num-hidden-layers 6 \
+    --num-jobs-initial 3 --num-jobs-final 8 \
+    --num-threads "$num_threads" \
+    --minibatch-size "$minibatch_size" \
+    --parallel-opts "$parallel_opts" \
+    --mix-up 10500 \
+    --initial-effective-lrate 0.0015 --final-effective-lrate 0.00015 \
+    --cmd "$decode_cmd" \
+    --egs-dir "$common_egs_dir" \
+    --pnorm-input-dim 2000 \
+    --pnorm-output-dim 200 \
+    data/train_hires_asr data/lang exp/tri3 $dir  || exit 1;
+
+fi
diff --git a/egs/fame/v1/local/dnn/train_dnn.sh b/egs/fame/v1/local/dnn/train_dnn.sh
new file mode 100755 (executable)
index 0000000..7155f32
--- /dev/null
@@ -0,0 +1,89 @@
+#!/bin/bash
+# Copyright 2017  Radboud University (Author: Emre Yilmaz)
+
+. ./cmd.sh
+. ./path.sh
+
+stage=0
+feat_nj=10
+train_nj=10
+decode_nj=10
+famecorpus=./corpus/ASR
+. ./utils/parse_options.sh
+
+numLeavesTri1=5000
+numGaussTri1=25000
+numLeavesMLLT=5000
+numGaussMLLT=25000
+numLeavesSAT=5000
+numGaussSAT=25000
+numGaussUBM=800
+numLeavesSGMM=10000
+numGaussSGMM=20000
+
+if [ -d $famecorpus ] ; then
+  echo "Fame corpus present. OK."
+elif [ -f ./fame.tar.gz ] ; then
+  echo "Unpacking..."
+  tar xzf fame.tar.gz
+elif [ ! -d $famecorpus ] && [ ! -f ./fame.tar.gz ] ; then
+  echo "The Fame! corpus is not present. Please register here: http://www.ru.nl/clst/datasets/ "
+  echo " and download the corpus and put it at $famecorpus" && exit 1
+fi
+
+if [ $stage -le 1 ]; then
+  local/fame_data_prep.sh $famecorpus || exit 1;
+  local/fame_dict_prep.sh $famecorpus || exit 1;
+  utils/prepare_lang.sh data/local/dict "<UNK>" data/local/lang data/lang || exit 1;
+  utils/format_lm.sh data/lang data/local/LM.gz data/local/dict/lexicon.txt data/lang_test || exit 1;
+fi
+
+if [ $stage -le 2 ]; then
+  # Feature extraction
+  for x in train_asr devel_asr test_asr; do
+      steps/make_mfcc.sh --nj $feat_nj --mfcc-config conf/mfcc_asr.conf --cmd "$train_cmd" data/$x exp/make_mfcc/$x mfcc || exit 1;
+      steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc || exit 1;
+  done
+fi
+
+if [ $stage -le 3 ]; then
+  ### Monophone
+  echo "Starting monophone training."
+  steps/train_mono.sh --nj $train_nj --cmd "$train_cmd" data/train_asr data/lang exp/mono || exit 1;
+  echo "Mono training done."
+
+fi
+
+if [ $stage -le 4 ]; then
+  ### Triphone
+  echo "Starting triphone training."
+  steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train_asr data/lang exp/mono exp/mono_ali || exit 1;
+  steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd"  $numLeavesTri1 $numGaussTri1 data/train_asr data/lang exp/mono_ali exp/tri1 || exit 1;
+  echo "Triphone training done."
+
+fi
+
+if [ $stage -le 5 ]; then
+  ### Triphone + LDA and MLLT
+  echo "Starting LDA+MLLT training."
+  steps/align_si.sh  --nj $train_nj --cmd "$train_cmd"  data/train_asr data/lang exp/tri1 exp/tri1_ali || exit 1;
+  steps/train_lda_mllt.sh  --cmd "$train_cmd"  --splice-opts "--left-context=3 --right-context=3" $numLeavesMLLT $numGaussMLLT data/train_asr data/lang  exp/tri1_ali exp/tri2 || exit 1;
+  echo "LDA+MLLT training done."
+
+fi
+
+if [ $stage -le 6 ]; then
+  ### Triphone + LDA and MLLT + SAT and FMLLR
+  echo "Starting SAT+FMLLR training."
+  steps/align_si.sh  --nj $train_nj --cmd "$train_cmd" --use-graphs true data/train_asr data/lang exp/tri2 exp/tri2_ali || exit 1;
+  steps/train_sat.sh --cmd "$train_cmd" $numLeavesSAT $numGaussSAT data/train_asr data/lang exp/tri2_ali exp/tri3 || exit 1;
+  echo "SAT+FMLLR training done."
+
+  echo "Decoding the development and test sets using SAT+FMLLR models."
+  utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1;
+  steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --skip-scoring true exp/tri3/graph data/devel_asr exp/tri3/decode_devel || exit 1;
+  steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --skip-scoring true exp/tri3/graph data/test_asr exp/tri3/decode_test || exit 1;
+  echo "SAT+FMLLR decoding done."
+fi
+
+local/dnn/run_nnet2_multisplice.sh
diff --git a/egs/fame/v1/local/fame_data_prep.sh b/egs/fame/v1/local/fame_data_prep.sh
new file mode 100755 (executable)
index 0000000..bbe3097
--- /dev/null
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright 2015-2016  Sarah Flora Juan
+# Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
+# Copyright 2017  Radboud University (Author: Emre Yilmaz)
+
+# Apache 2.0
+
+corpus=$1
+set -e -o pipefail
+if [ -z "$corpus" ] ; then
+    echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database"
+    exit 1
+fi
+if [ ! -d "$corpus" ] ; then
+    echo >&2 "The directory $corpus does not exist"
+fi
+
+echo "Preparing train, development and test data"
+mkdir -p data data/local data/train_asr data/devel_asr data/test_asr
+
+for x in train devel test; do
+    echo "Copy spk2utt, utt2spk, wav.scp, text for $x"
+    cp $corpus/data/$x/text     data/${x}_asr/text    || exit 1;
+    cp $corpus/data/$x/spk2utt  data/${x}_asr/spk2utt || exit 1;
+    cp $corpus/data/$x/utt2spk  data/${x}_asr/utt2spk || exit 1;
+
+    # the corpus wav.scp contains physical paths, so we just re-generate
+    # the file again from scratchn instead of figuring out how to edit it
+    for rec in $(awk '{print $1}' $corpus/data/$x/text) ; do
+        spk=${rec%_*}
+        filename=$corpus/fame/wav/${x}/${rec:8}.wav
+        if [ ! -f "$filename" ] ; then
+            echo >&2 "The file $filename could not be found ($rec)"
+            exit 1
+        fi
+        # we might want to store physical paths as a general rule
+        filename=$(readlink -f $filename)
+        echo "$rec $filename"
+    done > data/${x}_asr/wav.scp
+
+    # fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp,
+    # duplicate entries and so on). Also, it regenerates the spk2utt from
+    # utt2sp
+    utils/fix_data_dir.sh data/${x}_asr
+done
+
+echo "Copying language model"
+if [ -f $corpus/lm/LM_FR_IKN3G ] ; then
+    gzip -c $corpus/lm/LM_FR_IKN3G > data/local/LM.gz
+fi
+
+echo "Data preparation completed."
+
diff --git a/egs/fame/v1/local/fame_dict_prep.sh b/egs/fame/v1/local/fame_dict_prep.sh
new file mode 100755 (executable)
index 0000000..122c34c
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/bash
+# Copyright 2015-2016  Sarah Flora Juan
+# Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
+# Copyright 2016  Radboud University (Author: Emre Yilmaz)
+
+# Apache 2.0
+
+corpus=$1
+if [ -z "$corpus" ] ; then
+    echo >&2 "The script $0 expects one parameter -- the location of the FAME speech corpus"
+    exit 1
+fi
+if [ ! -d "$corpus" ] ; then
+    echo >&2 "The directory $corpus does not exist"
+fi
+
+mkdir -p data/lang data/local/dict
+
+cat $corpus/lexicon/lex.asr $corpus/lexicon/lex.oov > data/local/dict/lexicon.txt
+echo "!SIL     SIL" >> data/local/dict/lexicon.txt
+echo "<UNK>    SPN" >> data/local/dict/lexicon.txt
+env LC_ALL=C sort -u -o data/local/dict/lexicon.txt data/local/dict/lexicon.txt
+cat data/local/dict/lexicon.txt | \
+    perl -ane 'print join("\n", @F[1..$#F]) . "\n"; '  | \
+    sort -u | grep -v 'SIL' > data/local/dict/nonsilence_phones.txt
+
+
+touch data/local/dict/extra_questions.txt
+touch data/local/dict/optional_silence.txt
+
+echo "SIL"   > data/local/dict/optional_silence.txt
+echo "SIL"   > data/local/dict/silence_phones.txt
+echo "<UNK>" > data/local/dict/oov.txt
+
+echo "Dictionary preparation succeeded"
diff --git a/egs/fame/v1/local/make_fame_test.pl b/egs/fame/v1/local/make_fame_test.pl
new file mode 100755 (executable)
index 0000000..2098dc1
--- /dev/null
@@ -0,0 +1,85 @@
+#!/usr/bin/perl
+#
+# Copyright 2015   David Snyder
+#           2017   Radboud University (Author: Emre Yilmaz)
+# Apache 2.0.
+# Usage: make_fame_test.pl corpus/SV/ data/ complete 3sec eval.
+
+if (@ARGV != 5) {
+  print STDERR "Usage: $0 <path-to-FAME corpus> <path-to-output> <task-name> <subtask-name> <dataset-name>\n";
+  print STDERR "e.g. $0 corpus/SV/ data/ complete 3sec eval\n";
+  exit(1);
+}
+
+($db_base, $out_base_dir, $task, $subtask, $sets) = @ARGV;
+$out_dir = "$out_base_dir/fame_${task}_${subtask}_${sets}_test";
+
+$tmp_dir = "$out_dir/tmp";
+if (system("mkdir -p $tmp_dir") != 0) {
+  die "Error making directory $tmp_dir"; 
+}
+
+open(IN_TRIALS, "<", "$db_base/docs/$task/${task}_${subtask}_${sets}_trials_key") or die "cannot open trials list";
+open(OUT_TRIALS, ">", "$out_dir/trials") or die "cannot open trials list";
+%trials = ();
+while(<IN_TRIALS>) {
+  chomp;
+  ($spkr,$utt,$side,$is_target) = split(",", $_);
+  $side = uc $side;
+  $key = "${spkr} ${utt}_${side}"; # Just keep track of the spkr-utterance pairs we want.
+  $trials{$key} = 1; # Just keep track of the spkr-utterance pairs we want.
+  print OUT_TRIALS "$spkr ${utt}_${side} $is_target\n";
+}
+
+close(OUT_TRIALS) || die;
+close(IN_TRIALS) || die;
+
+open(WAVLIST, "<", "$db_base/docs/$task/${task}_${subtask}_${sets}_trials") or die "cannot open wav list";
+open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+
+%spk2gender = ();
+%utts = ();
+while(<WAVLIST>) {
+  chomp;
+  $sph = $_;
+  ($spkr, $gender, $wav_and_side) = split(" ", $sph);
+  ($wav, $side) = split(":", $wav_and_side);
+  $wav = "${db_base}/data/${task}/${sets}/${subtask}/${wav}";
+  @A = split("/", $wav);
+  $basename = $A[$#A];
+  $raw_basename = $basename;
+  $raw_basename =~ s/\.wav$// || die "bad basename $basename";
+  $uttId = $raw_basename . "_" . $side;
+  $key = "${spkr} ${uttId}";
+  if ( (not exists($trials{"${spkr} ${uttId}"}) ) or exists($utts{$uttId})  ) {
+    next;
+  }
+  $utts{$uttId} = 1;
+  if ($side eq "A") {
+    $channel = 1;
+  } elsif ($side eq "B") {
+    $channel = 2;
+  } else {
+    die "unknown channel $side\n";
+  }
+  print WAV "$uttId"," $wav\n";
+  print SPKR "$uttId $uttId\n";
+  print GNDR "$uttId $gender\n";
+  $spk2gender{$spkr} = $gender;
+}
+
+close(SPKR) || die;
+close(WAV) || die;
+close(WAVLIST) || die;
+close(GNDR) || die;
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("utils/fix_data_dir.sh $out_dir");
+if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/fame/v1/local/make_fame_test_year.pl b/egs/fame/v1/local/make_fame_test_year.pl
new file mode 100755 (executable)
index 0000000..c881a70
--- /dev/null
@@ -0,0 +1,85 @@
+#!/usr/bin/perl
+#
+# Copyright 2015   David Snyder
+#           2017   Radboud University (Author: Emre Yilmaz)
+# Apache 2.0.
+# Usage: make_fame_test_year.pl corpus/SV/ data/ complete 3sec eval 1t3.
+
+if (@ARGV != 5) {
+  print STDERR "Usage: $0 <path-to-FAME corpus> <path-to-output> <task-name> <subtask-name> <dataset-name> <age-category>\n";
+  print STDERR "e.g. $0 corpus/SV/ data/ complete 3sec eval\n";
+  exit(1);
+}
+
+($db_base, $out_base_dir, $task, $subtask, $sets, $year) = @ARGV;
+$out_dir = "$out_base_dir/fame_${task}_${subtask}_${sets}${year}_test";
+
+$tmp_dir = "$out_dir/tmp";
+if (system("mkdir -p $tmp_dir") != 0) {
+  die "Error making directory $tmp_dir"; 
+}
+
+open(IN_TRIALS, "<", "$db_base/docs/$task/${task}_${subtask}_${sets}_trials${year}_key") or die "cannot open trials list";
+open(OUT_TRIALS, ">", "$out_dir/trials") or die "cannot open trials list";
+%trials = ();
+while(<IN_TRIALS>) {
+  chomp;
+  ($spkr,$utt,$side,$is_target) = split(",", $_);
+  $side = uc $side;
+  $key = "${spkr} ${utt}_${side}"; # Just keep track of the spkr-utterance pairs we want.
+  $trials{$key} = 1; # Just keep track of the spkr-utterance pairs we want.
+  print OUT_TRIALS "$spkr ${utt}_${side} $is_target\n";
+}
+
+close(OUT_TRIALS) || die;
+close(IN_TRIALS) || die;
+
+open(WAVLIST, "<", "$db_base/docs/$task/${task}_${subtask}_${sets}_trials${year}") or die "cannot open wav list";
+open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+
+%spk2gender = ();
+%utts = ();
+while(<WAVLIST>) {
+  chomp;
+  $sph = $_;
+  ($spkr, $gender, $wav_and_side) = split(" ", $sph);
+  ($wav, $side) = split(":", $wav_and_side);
+  $wav = "${db_base}/data/${task}/${sets}/${subtask}/${wav}";
+  @A = split("/", $wav);
+  $basename = $A[$#A];
+  $raw_basename = $basename;
+  $raw_basename =~ s/\.wav$// || die "bad basename $basename";
+  $uttId = $raw_basename . "_" . $side;
+  $key = "${spkr} ${uttId}";
+  if ( (not exists($trials{"${spkr} ${uttId}"}) ) or exists($utts{$uttId})  ) {
+    next;
+  }
+  $utts{$uttId} = 1;
+  if ($side eq "A") {
+    $channel = 1;
+  } elsif ($side eq "B") {
+    $channel = 2;
+  } else {
+    die "unknown channel $side\n";
+  }
+  print WAV "$uttId"," $wav\n";
+  print SPKR "$uttId $uttId\n";
+  print GNDR "$uttId $gender\n";
+  $spk2gender{$spkr} = $gender;
+}
+
+close(SPKR) || die;
+close(WAV) || die;
+close(WAVLIST) || die;
+close(GNDR) || die;
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("utils/fix_data_dir.sh $out_dir");
+if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/fame/v1/local/make_fame_train.pl b/egs/fame/v1/local/make_fame_train.pl
new file mode 100755 (executable)
index 0000000..f105549
--- /dev/null
@@ -0,0 +1,60 @@
+#!/usr/bin/perl
+#
+# Copyright 2015   David Snyder
+#           2017   Radboud University (Author: Emre Yilmaz) 
+# Apache 2.0.
+# Usage: make_fame_train.pl corpus/SV/ data/ complete 3sec eval.
+
+if (@ARGV != 5) {
+  print STDERR "Usage: $0 <path-to-FAME corpus> <path-to-output> <task-name> <subtask-name> <dataset-name>\n";
+  print STDERR "e.g. $0 corpus/SV/ data/ complete 3sec eval\n";
+  exit(1);
+}
+
+($db_base, $out_base_dir, $task, $subtask, $sets) = @ARGV;
+$out_dir = "$out_base_dir/fame_${task}_${subtask}_${sets}_enroll";
+
+$tmp_dir = "$out_dir/tmp";
+if (system("mkdir -p $tmp_dir") != 0) {
+  die "Error making directory $tmp_dir"; 
+}
+
+open(WAVLIST, "<", "$db_base/docs/${task}/${task}_${subtask}_${sets}_enroll") or die "cannot open wav list";
+open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+
+while(<WAVLIST>) {
+  chomp;
+  $sph = $_;
+  ($spkr, $gender, $wav_and_side) = split(" ", $sph);
+  ($wav, $side) = split(":", $wav_and_side);
+  @A = split("/", $wav);
+  $wav = "${db_base}/data/${task}/${sets}/${subtask}/${wav}";
+  $basename = $A[$#A];
+  $raw_basename = $basename;
+  $raw_basename =~ s/\.wav$// || die "bad basename $basename";
+  $uttId = $raw_basename . "_" . $side; # prefix spkr-id to utt-id to ensure sorted order.
+  if ($side eq "A") {
+    $channel = 1;
+  } elsif ($side eq "B") {
+    $channel = 2;
+  } else {
+    die "unknown channel $side\n";
+  }
+  print GNDR "$spkr $gender\n";
+  print WAV "$uttId"," $wav", "\n";
+  print SPKR "$uttId"," $spkr","\n";
+}
+close(GNDR) || die;
+close(SPKR) || die;
+close(WAV) || die;
+close(WAVLIST) || die;
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("utils/fix_data_dir.sh $out_dir");
+if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/fame/v1/local/make_fame_train_year.pl b/egs/fame/v1/local/make_fame_train_year.pl
new file mode 100755 (executable)
index 0000000..2d9dcf8
--- /dev/null
@@ -0,0 +1,60 @@
+#!/usr/bin/perl
+#
+# Copyright 2015   David Snyder
+#           2017   Radboud University (Author: Emre Yilmaz)
+# Apache 2.0.
+# Usage: make_fame_train_year.pl corpus/SV/ data/ complete 3sec eval 1t3.
+
+if (@ARGV != 5) {
+  print STDERR "Usage: $0 <path-to-FAME corpus> <path-to-output> <task-name> <subtask-name> <dataset-name> <age-category>\n";
+  print STDERR "e.g. $0 corpus/SV/ data/ complete 3sec eval\n";
+  exit(1);
+}
+
+($db_base, $out_base_dir, $task, $subtask, $sets, $year) = @ARGV;
+$out_dir = "$out_base_dir/fame_${task}_${subtask}_${sets}${year}_enroll";
+
+$tmp_dir = "$out_dir/tmp";
+if (system("mkdir -p $tmp_dir") != 0) {
+  die "Error making directory $tmp_dir"; 
+}
+
+open(WAVLIST, "<", "$db_base/docs/${task}/${task}_${subtask}_${sets}_enroll${year}") or die "cannot open wav list";
+open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
+open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender";
+open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
+
+while(<WAVLIST>) {
+  chomp;
+  $sph = $_;
+  ($spkr, $gender, $wav_and_side) = split(" ", $sph);
+  ($wav, $side) = split(":", $wav_and_side);
+  @A = split("/", $wav);
+  $wav = "${db_base}/data/${task}/${sets}/${subtask}/${wav}";
+  $basename = $A[$#A];
+  $raw_basename = $basename;
+  $raw_basename =~ s/\.wav$// || die "bad basename $basename";
+  $uttId = $raw_basename . "_" . $side; # prefix spkr-id to utt-id to ensure sorted order.
+  if ($side eq "A") {
+    $channel = 1;
+  } elsif ($side eq "B") {
+    $channel = 2;
+  } else {
+    die "unknown channel $side\n";
+  }
+  print GNDR "$spkr $gender\n";
+  print WAV "$uttId"," $wav", "\n";
+  print SPKR "$uttId"," $spkr","\n";
+}
+close(GNDR) || die;
+close(SPKR) || die;
+close(WAV) || die;
+close(WAVLIST) || die;
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_dir";
+}
+system("utils/fix_data_dir.sh $out_dir");
+if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) {
+  die "Error validating directory $out_dir";
+}
diff --git a/egs/fame/v1/local/plda_scoring.sh b/egs/fame/v1/local/plda_scoring.sh
new file mode 100755 (executable)
index 0000000..63d4a4f
--- /dev/null
@@ -0,0 +1,51 @@
+#!/bin/bash
+# Copyright 2015   David Snyder
+# Apache 2.0.
+#
+# This script trains PLDA models and does scoring.
+
+use_existing_models=false
+simple_length_norm=false # If true, replace the default length normalization
+                         # performed in PLDA  by an alternative that
+                         # normalizes the length of the iVectors to be equal
+                         # to the square root of the iVector dimension.
+
+echo "$0 $@"  # Print the command line for logging
+
+if [ -f path.sh ]; then . ./path.sh; fi
+. parse_options.sh || exit 1;
+
+if [ $# != 8 ]; then
+  echo "Usage: $0 <plda-data-dir> <enroll-data-dir> <test-data-dir> <plda-ivec-dir> <enroll-ivec-dir> <test-ivec-dir> <trials-file> <scores-dir>"
+fi
+
+plda_data_dir=$1
+enroll_data_dir=$2
+test_data_dir=$3
+plda_ivec_dir=$4
+enroll_ivec_dir=$5
+test_ivec_dir=$6
+trials=$7
+scores_dir=$8
+
+if [ "$use_existing_models" == "true" ]; then
+  for f in ${plda_ivec_dir}/mean.vec ${plda_ivec_dir}/plda ; do
+    [ ! -f $f ] && echo "No such file $f" && exit 1;
+  done
+else
+  run.pl $plda_ivec_dir/log/plda.log \
+    ivector-compute-plda ark:$plda_data_dir/spk2utt \
+    "ark:ivector-normalize-length scp:${plda_ivec_dir}/ivector.scp  ark:- |" \
+    $plda_ivec_dir/plda || exit 1;
+fi
+
+mkdir -p $scores_dir/log
+
+run.pl $scores_dir/log/plda_scoring.log \
+  ivector-plda-scoring --normalize-length=true \
+    --simple-length-normalization=$simple_length_norm \
+    --num-utts=ark:${enroll_ivec_dir}/num_utts.ark \
+    "ivector-copy-plda --smoothing=0.0 ${plda_ivec_dir}/plda - |" \
+    "ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec scp:${enroll_ivec_dir}/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |" \
+    "ark:ivector-normalize-length scp:${test_ivec_dir}/ivector.scp ark:- | ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec ark:- ark:- | ivector-normalize-length ark:- ark:- |" \
+    "cat '$trials' | cut -d\  --fields=1,2 |" $scores_dir/plda_scores || exit 1;
diff --git a/egs/fame/v1/local/prepare_for_eer.py b/egs/fame/v1/local/prepare_for_eer.py
new file mode 100755 (executable)
index 0000000..59d2985
--- /dev/null
@@ -0,0 +1,15 @@
+# Copyright 2015   David Snyder
+# Apache 2.0.
+#
+# Given a trials and scores file, this script 
+# prepares input for the binary compute-eer. 
+import sys
+trials = open(sys.argv[1], 'r').readlines()
+scores = open(sys.argv[2], 'r').readlines()
+spkrutt2target = {}
+for line in trials:
+  spkr, utt, target = line.strip().split()
+  spkrutt2target[spkr+utt]=target
+for line in scores:
+  spkr, utt, score = line.strip().split()
+  print score, spkrutt2target[spkr+utt]
diff --git a/egs/fame/v1/local/prepare_train.sh b/egs/fame/v1/local/prepare_train.sh
new file mode 100755 (executable)
index 0000000..0a3979d
--- /dev/null
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Copyright 2015-2016  Sarah Flora Juan
+# Copyright 2016  Johns Hopkins University (Author: Yenda Trmal)
+# Copyright 2017  Radboud University (Author: Emre Yilmaz)
+
+# Apache 2.0
+
+corpus=$1
+set -e -o pipefail
+if [ -z "$corpus" ] ; then
+    echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database"
+    exit 1
+fi
+if [ ! -d "$corpus" ] ; then
+    echo >&2 "The directory $corpus does not exist"
+fi
+
+mkdir -p data data/train
+
+cp $corpus/data/spk2utt  data/train/spk2utt || exit 1;
+cp $corpus/data/utt2spk  data/train/utt2spk || exit 1;
+
+# the corpus wav.scp contains physical paths, so we just re-generate
+# the file again from scratchn instead of figuring out how to edit it
+for rec in $(awk '{print $1}' $corpus/data/utt2spk) ; do
+    spk=${rec%_*}
+    filename=$corpus/SD/${rec}.wav
+    if [ ! -f "$filename" ] ; then
+        echo >&2 "The file $filename could not be found ($rec)"
+        exit 1
+    fi
+    # we might want to store physical paths as a general rule
+    filename=$(utils/make_absolute.sh $filename)
+    echo "$rec $filename"
+done > data/train/wav.scp
+
+# fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp,
+# duplicate entries and so on). Also, it regenerates the spk2utt from
+# utt2sp
+utils/fix_data_dir.sh data/train
diff --git a/egs/fame/v1/local/scoring_common.sh b/egs/fame/v1/local/scoring_common.sh
new file mode 100755 (executable)
index 0000000..63950ae
--- /dev/null
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Copyright 2015   David Snyder
+# Copyright 2017   Emre Yilmaz (Adapted)
+# Apache 2.0.
+#
+if [ $# != 6 ]; then
+  echo "Usage: $0 <plda-data-dir> <enroll-data-dir> <test-data-dir> <plda-ivec-dir> <enroll-ivec-dir> <test-ivec-dir>"
+fi
+plda_data_dir=${1%/}
+enroll_data_dir=${2%/}
+test_data_dir=${3%/}
+plda_ivec_dir=${4%/}
+enroll_ivec_dir=${5%/}
+test_ivec_dir=${6%/}
+
+if [ ! -f ${test_data_dir}/trials ]; then 
+  echo "${test_data_dir} needs a trial file."
+  exit;
+fi
+
+mkdir -p local/.tmp
+
+# Partition the SRE data into male and female subsets.
+cat ${test_data_dir}/spk2gender | grep -w f > local/.tmp/female_spklist
+utils/subset_data_dir.sh --spk-list local/.tmp/female_spklist ${test_data_dir} ${test_data_dir}_female
+cat ${enroll_data_dir}/spk2gender | grep -w f > local/.tmp/female_spklist
+utils/subset_data_dir.sh --spk-list local/.tmp/female_spklist ${enroll_data_dir} ${enroll_data_dir}_female
+cat ${test_data_dir}/spk2gender | grep -w m > local/.tmp/male_spklist
+utils/subset_data_dir.sh --spk-list local/.tmp/male_spklist ${test_data_dir} ${test_data_dir}_male
+cat ${enroll_data_dir}/spk2gender | grep -w m > local/.tmp/male_spklist
+utils/subset_data_dir.sh --spk-list local/.tmp/male_spklist ${enroll_data_dir} ${enroll_data_dir}_male
+
+# Prepare female and male trials.
+trials_female=${test_data_dir}_female/trials
+cat ${test_data_dir}/trials | awk '{print $2, $0}' | \
+  utils/filter_scp.pl ${test_data_dir}_female/utt2spk | cut -d ' ' -f 2- \
+  > $trials_female
+trials_male=${test_data_dir}_male/trials
+cat ${test_data_dir}/trials | awk '{print $2, $0}' | \
+  utils/filter_scp.pl ${test_data_dir}_male/utt2spk | cut -d ' ' -f 2- \
+  > $trials_male
+
+mkdir -p ${test_ivec_dir}_male
+mkdir -p ${test_ivec_dir}_female
+mkdir -p ${enroll_ivec_dir}_male
+mkdir -p ${enroll_ivec_dir}_female
+
+# Partition the i-vectors into male and female subsets.
+utils/filter_scp.pl ${enroll_data_dir}_male/utt2spk \
+  ${enroll_ivec_dir}/ivector.scp > ${enroll_ivec_dir}_male/ivector.scp
+utils/filter_scp.pl ${test_data_dir}_male/utt2spk \
+  ${test_ivec_dir}/ivector.scp > ${test_ivec_dir}_male/ivector.scp
+utils/filter_scp.pl ${enroll_data_dir}_female/utt2spk \
+  ${enroll_ivec_dir}/ivector.scp > ${enroll_ivec_dir}_female/ivector.scp
+utils/filter_scp.pl ${test_data_dir}_female/utt2spk \
+  ${test_ivec_dir}/ivector.scp > ${test_ivec_dir}_female/ivector.scp
+utils/filter_scp.pl ${enroll_data_dir}_male/spk2utt \
+  ${enroll_ivec_dir}/spk_ivector.scp > ${enroll_ivec_dir}_male/spk_ivector.scp
+utils/filter_scp.pl ${enroll_data_dir}_female/spk2utt \
+  ${enroll_ivec_dir}/spk_ivector.scp > ${enroll_ivec_dir}_female/spk_ivector.scp
+utils/filter_scp.pl ${enroll_data_dir}_male/spk2utt \
+  ${enroll_ivec_dir}/num_utts.ark > ${enroll_ivec_dir}_male/num_utts.ark
+utils/filter_scp.pl ${enroll_data_dir}_female/spk2utt \
+  ${enroll_ivec_dir}/num_utts.ark > ${enroll_ivec_dir}_female/num_utts.ark
+
+# Compute gender independent and dependent i-vector means.
+ivector-mean scp:${plda_ivec_dir}/ivector.scp ${plda_ivec_dir}/mean.vec
+
+rm -rf local/.tmp
diff --git a/egs/fame/v1/path.sh b/egs/fame/v1/path.sh
new file mode 100755 (executable)
index 0000000..2d17b17
--- /dev/null
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/fame/v1/run.sh b/egs/fame/v1/run.sh
new file mode 100755 (executable)
index 0000000..34c425a
--- /dev/null
@@ -0,0 +1,300 @@
+#!/bin/bash
+# Copyright 2015   David Snyder
+#           2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
+#           2015   Johns Hopkins University (Author: Daniel Povey)
+#           2017   Radboud University (Author Emre Yilmaz)
+# Apache 2.0.
+#
+# See README.txt for more info on data required.
+# Results (EERs) are inline in comments below.
+
+. ./cmd.sh
+. ./path.sh
+set -e
+
+mfccdir=`pwd`/mfcc
+vaddir=`pwd`/mfcc
+famecorpus=./corpus
+num_components=2048
+
+# Data preparation
+
+if [ -d $famecorpus ] ; then
+  echo "Fame corpus present. OK."
+elif [ -f ./fame.tar.gz ] ; then
+  echo "Unpacking..."
+  tar xzf fame.tar.gz
+elif [ ! -d $famecorpus ] && [ ! -f ./fame.tar.gz ] ; then
+  echo "The Fame! corpus is not present. Please register here: http://www.ru.nl/clst/datasets/ "
+  echo " and download the corpus and put it at $famecorpus" && exit 1
+fi
+
+echo "Preparing data/train.."
+local/prepare_train.sh $famecorpus/SC
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Preparing data/fame_${task}_${subtask}_${sets}.."
+      trials_female=data/fame_${task}_${subtask}_${sets}_female/trials
+      trials_male=data/fame_${task}_${subtask}_${sets}_male/trials
+      trials=data/fame_${task}_${subtask}_${sets}/trials
+      local/make_fame_test.pl $famecorpus/SV data $task $subtask $sets
+      local/make_fame_train.pl $famecorpus/SV data $task $subtask $sets 
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Preparing data/fame_${task}_${subtask}_${sets}${year}.."
+        trials_female=data/fame_${task}_${subtask}_${sets}${year}_female/trials
+        trials_male=data/fame_${task}_${subtask}_${sets}${year}_male/trials
+        trials=data/fame_${task}_${subtask}_${sets}${year}/trials
+        local/make_fame_test_year.pl $famecorpus/SV data $task $subtask $sets $year
+        local/make_fame_train_year.pl $famecorpus/SV data $task $subtask $sets $year 
+
+      done
+    done
+  done
+done
+
+# MFCC extraction
+
+echo "Extracting MFCC features for data/train.."
+
+steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+    data/train exp/make_mfcc $mfccdir
+utils/fix_data_dir.sh data/train
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Extracting MFCC features for data/fame_${task}_${subtask}_${sets}.."
+      steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_enroll exp/make_mfcc $mfccdir
+      utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_enroll
+      steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_test exp/make_mfcc $mfccdir
+      utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_test
+      
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Extracting MFCC features for data/fame_${task}_${subtask}_${sets}${year}.."
+        steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_enroll exp/make_mfcc $mfccdir
+        utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_enroll
+        steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_test exp/make_mfcc $mfccdir
+        utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_test
+
+      done
+    done
+  done
+done
+
+# VAD computation
+
+echo "Computing VAD for data/train.."
+
+sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+    data/train exp/make_vad $vaddir
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Computing VAD for data/fame_${task}_${subtask}_${sets}.."
+      sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_enroll exp/make_vad $vaddir
+      sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_test exp/make_vad $vaddir 
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Computing VAD for data/fame_${task}_${subtask}_${sets}${year}.."
+        sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_enroll exp/make_vad $vaddir
+        sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_test exp/make_vad $vaddir
+      
+      done
+    done
+  done
+done
+
+
+# Train UBM and i-vector extractor
+
+echo "Training UBM and the i-vector extractor.."
+
+sid/train_diag_ubm.sh --nj 40 --cmd "$train_cmd" \
+    data/train $num_components \
+    exp/diag_ubm_$num_components
+
+sid/train_full_ubm.sh --nj 40 --remove-low-count-gaussians false \
+    --cmd "$train_cmd" data/train \
+    exp/diag_ubm_$num_components exp/full_ubm_$num_components
+
+sid/train_ivector_extractor.sh --cmd "$train_cmd" --nj 20 --num-threads 4 --num-processes 2 \
+  --ivector-dim 600 \
+  --num-iters 5 exp/full_ubm_$num_components/final.ubm data/train \
+  exp/extractor
+
+# Extract i-vectors
+
+echo "Extracting i-vectors for data/train.."
+
+sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \
+   exp/extractor data/train \
+   exp/ivectors_train 
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Extracting i-vectors for data/fame_${task}_${subtask}_${sets}"
+      sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \
+         exp/extractor data/fame_${task}_${subtask}_${sets}_enroll \
+         exp/ivectors_fame_${task}_${subtask}_${sets}_enroll
+      sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \
+         exp/extractor data/fame_${task}_${subtask}_${sets}_test \
+         exp/ivectors_fame_${task}_${subtask}_${sets}_test
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Extracting i-vectors for data/fame_${task}_${subtask}_${sets}${year}"
+        sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \
+           exp/extractor data/fame_${task}_${subtask}_${sets}${year}_enroll \
+           exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll
+        sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \
+           exp/extractor data/fame_${task}_${subtask}_${sets}${year}_test \
+           exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test 
+
+      done  
+    done
+  done
+done
+# Calculate i-vector means used by the scoring scripts
+
+echo "Calculating i-vectors means.."
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      local/scoring_common.sh data/train data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_test \
+        exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}_enroll exp/ivectors_fame_${task}_${subtask}_${sets}_test
+
+      trials_female=data/fame_${task}_${subtask}_${sets}_test_female/trials
+      trials_male=data/fame_${task}_${subtask}_${sets}_test_male/trials
+      trials=data/fame_${task}_${subtask}_${sets}_test/trials
+
+      local/plda_scoring.sh data/train data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_test \
+        exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}_enroll exp/ivectors_fame_${task}_${subtask}_${sets}_test $trials local/scores_gmm_2048_ind_pooled_${task}_${subtask}_${sets}
+
+      local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}_enroll_female data/fame_${task}_${subtask}_${sets}_test_female \
+        exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_female exp/ivectors_fame_${task}_${subtask}_${sets}_test_female $trials_female local/scores_gmm_2048_ind_female_${task}_${subtask}_${sets}
+
+      local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}_enroll_male data/fame_${task}_${subtask}_${sets}_test_male \
+        exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_male exp/ivectors_fame_${task}_${subtask}_${sets}_test_male $trials_male local/scores_gmm_2048_ind_male_${task}_${subtask}_${sets}
+              
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        local/scoring_common.sh data/train data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_test \
+          exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test
+
+        trials_female=data/fame_${task}_${subtask}_${sets}${year}_test_female/trials
+        trials_male=data/fame_${task}_${subtask}_${sets}${year}_test_male/trials
+        trials=data/fame_${task}_${subtask}_${sets}${year}_test/trials
+
+        local/plda_scoring.sh data/train data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_test \
+          exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test $trials local/scores_gmm_2048_ind_pooled_${task}_${subtask}_${sets}${year}
+
+        local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}${year}_enroll_female data/fame_${task}_${subtask}_${sets}${year}_test_female \
+          exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_female exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_female $trials_female local/scores_gmm_2048_ind_female_${task}_${subtask}_${sets}${year}
+
+        local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}${year}_enroll_male data/fame_${task}_${subtask}_${sets}${year}_test_male \
+          exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_male exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_male $trials_male local/scores_gmm_2048_ind_male_${task}_${subtask}_${sets}${year}
+
+      done
+    done
+  done
+done
+
+# Calculating EER 
+
+echo "Calculating EER.."
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      trials=data/fame_${task}_${subtask}_${sets}_test/trials
+      echo "GMM-$num_components EER for fame_${task}_${subtask}_${sets}"
+      for x in ind; do
+        for y in female male pooled; do
+          echo "python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}_${task}_${subtask}_${sets}/plda_scores"
+          eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}_${task}_${subtask}_${sets}/plda_scores) 2> /dev/null`
+          echo "${x} ${y}: $eer"
+        done
+      done
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        trials=data/fame_${task}_${subtask}_${sets}${year}_test/trials
+        echo "GMM-$num_components EER for fame_${task}_${subtask}_${sets}${year}"
+        for x in ind; do
+          for y in female male pooled; do
+            echo "python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}_${task}_${subtask}_${sets}${year}/plda_scores"
+            eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}_${task}_${subtask}_${sets}${year}/plda_scores) 2> /dev/null`
+            echo "${x} ${y}: $eer"
+          done
+        done
+
+      done
+    done
+  done
+done
diff --git a/egs/fame/v1/sid b/egs/fame/v1/sid
new file mode 120000 (symlink)
index 0000000..893a12f
--- /dev/null
@@ -0,0 +1 @@
+../../sre08/v1/sid
\ No newline at end of file
diff --git a/egs/fame/v1/steps b/egs/fame/v1/steps
new file mode 120000 (symlink)
index 0000000..6e99bf5
--- /dev/null
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/fame/v1/utils b/egs/fame/v1/utils
new file mode 120000 (symlink)
index 0000000..b240885
--- /dev/null
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file
diff --git a/egs/fame/v2/RESULTS b/egs/fame/v2/RESULTS
new file mode 100644 (file)
index 0000000..375bb0b
--- /dev/null
@@ -0,0 +1,105 @@
+DNN EER for fame_complete_3sec_eval
+python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_dnn_ind_female_complete_3sec_eval/plda_scores
+ind female: 20.71
+python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_dnn_ind_male_complete_3sec_eval/plda_scores
+ind male: 13.6
+python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_dnn_ind_pooled_complete_3sec_eval/plda_scores
+ind pooled: 16.89
+DNN EER for fame_complete_10sec_eval
+python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_dnn_ind_female_complete_10sec_eval/plda_scores
+ind female: 13.21
+python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_dnn_ind_male_complete_10sec_eval/plda_scores
+ind male: 7.391
+python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_dnn_ind_pooled_complete_10sec_eval/plda_scores
+ind pooled: 9.929
+DNN EER for fame_complete_30sec_eval
+python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_dnn_ind_female_complete_30sec_eval/plda_scores
+ind female: 10.27
+python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_dnn_ind_male_complete_30sec_eval/plda_scores
+ind male: 4.963
+python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_dnn_ind_pooled_complete_30sec_eval/plda_scores
+ind pooled: 7.469
+DNN EER for fame_ageing_3sec_eval
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_dnn_ind_female_ageing_3sec_eval/plda_scores
+ind female: 20.05
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_dnn_ind_male_ageing_3sec_eval/plda_scores
+ind male: 14.52
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_dnn_ind_pooled_ageing_3sec_eval/plda_scores
+ind pooled: 17.71
+DNN EER for fame_ageing_10sec_eval
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_dnn_ind_female_ageing_10sec_eval/plda_scores
+ind female: 12.06
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_dnn_ind_male_ageing_10sec_eval/plda_scores
+ind male: 7.947
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_dnn_ind_pooled_ageing_10sec_eval/plda_scores
+ind pooled: 10.35
+DNN EER for fame_ageing_30sec_eval
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_dnn_ind_female_ageing_30sec_eval/plda_scores
+ind female: 7.518
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_dnn_ind_male_ageing_30sec_eval/plda_scores
+ind male: 4.624
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_dnn_ind_pooled_ageing_30sec_eval/plda_scores
+ind pooled: 7.118
+DNN EER for fame_ageing_3sec_eval_1t3
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_dnn_ind_female_ageing_3sec_eval_1t3/plda_scores
+ind female: 20.57
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_dnn_ind_male_ageing_3sec_eval_1t3/plda_scores
+ind male: 10.88
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_dnn_ind_pooled_ageing_3sec_eval_1t3/plda_scores
+ind pooled: 16.38
+DNN EER for fame_ageing_3sec_eval_4t10
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_dnn_ind_female_ageing_3sec_eval_4t10/plda_scores
+ind female: 18.02
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_dnn_ind_male_ageing_3sec_eval_4t10/plda_scores
+ind male: 14.48
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_dnn_ind_pooled_ageing_3sec_eval_4t10/plda_scores
+ind pooled: 16.54
+DNN EER for fame_ageing_3sec_eval_mt10
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_dnn_ind_female_ageing_3sec_eval_mt10/plda_scores
+ind female: 21.83
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_dnn_ind_male_ageing_3sec_eval_mt10/plda_scores
+ind male: 17.1
+python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_dnn_ind_pooled_ageing_3sec_eval_mt10/plda_scores
+ind pooled: 20.09
+DNN EER for fame_ageing_10sec_eval_1t3
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_dnn_ind_female_ageing_10sec_eval_1t3/plda_scores
+ind female: 12.81
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_dnn_ind_male_ageing_10sec_eval_1t3/plda_scores
+ind male: 5.076
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_dnn_ind_pooled_ageing_10sec_eval_1t3/plda_scores
+ind pooled: 9.946
+DNN EER for fame_ageing_10sec_eval_4t10
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_dnn_ind_female_ageing_10sec_eval_4t10/plda_scores
+ind female: 9.812
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_dnn_ind_male_ageing_10sec_eval_4t10/plda_scores
+ind male: 9.193
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_dnn_ind_pooled_ageing_10sec_eval_4t10/plda_scores
+ind pooled: 9.746
+DNN EER for fame_ageing_10sec_eval_mt10
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_dnn_ind_female_ageing_10sec_eval_mt10/plda_scores
+ind female: 13.19
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_dnn_ind_male_ageing_10sec_eval_mt10/plda_scores
+ind male: 7.711
+python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_dnn_ind_pooled_ageing_10sec_eval_mt10/plda_scores
+ind pooled: 11.04
+DNN EER for fame_ageing_30sec_eval_1t3
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_dnn_ind_female_ageing_30sec_eval_1t3/plda_scores
+ind female: 8.882
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_dnn_ind_male_ageing_30sec_eval_1t3/plda_scores
+ind male: 2.212
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_dnn_ind_pooled_ageing_30sec_eval_1t3/plda_scores
+ind pooled: 7.547
+DNN EER for fame_ageing_30sec_eval_4t10
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_dnn_ind_female_ageing_30sec_eval_4t10/plda_scores
+ind female: 5.155
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_dnn_ind_male_ageing_30sec_eval_4t10/plda_scores
+ind male: 5.461
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_dnn_ind_pooled_ageing_30sec_eval_4t10/plda_scores
+ind pooled: 6.021
+DNN EER for fame_ageing_30sec_eval_mt10
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_dnn_ind_female_ageing_30sec_eval_mt10/plda_scores
+ind female: 8.244
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_dnn_ind_male_ageing_30sec_eval_mt10/plda_scores
+ind male: 4.202
+python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_dnn_ind_pooled_ageing_30sec_eval_mt10/plda_scores
+ind pooled: 7.737
diff --git a/egs/fame/v2/cmd.sh b/egs/fame/v2/cmd.sh
new file mode 100644 (file)
index 0000000..23721d0
--- /dev/null
@@ -0,0 +1,23 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd=queue.pl
+export decode_cmd="queue.pl --mem 2G"
+# the use of cuda_cmd is deprecated, used only in 'nnet1',
+export cuda_cmd="queue.pl --gpu 1"
+
+if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then
+  queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf,
+  export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2"
+  export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1"
+  export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G"
+fi
diff --git a/egs/fame/v2/conf/decode.config b/egs/fame/v2/conf/decode.config
new file mode 100644 (file)
index 0000000..2f36bcc
--- /dev/null
@@ -0,0 +1,3 @@
+beam=11.0 # beam for decoding.  Was 13.0 in the scripts.
+first_beam=8.0 # beam for 1st-pass decoding in SAT.
+lattice_beam=6.0
diff --git a/egs/fame/v2/conf/decode_dnn.config b/egs/fame/v2/conf/decode_dnn.config
new file mode 100644 (file)
index 0000000..ab8dcc1
--- /dev/null
@@ -0,0 +1,2 @@
+beam=13.0 # beam for decoding.  Was 13.0 in the scripts.
+lattice_beam=8.0 # this has most effect on size of the lattices.
diff --git a/egs/fame/v2/conf/mfcc_16k.conf b/egs/fame/v2/conf/mfcc_16k.conf
new file mode 100644 (file)
index 0000000..2436c26
--- /dev/null
@@ -0,0 +1,4 @@
+--low-freq=20 # the default.
+--high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case).
+--num-ceps=20 # higher than the default which is 12.
+--snip-edges=false
diff --git a/egs/fame/v2/conf/mfcc_asr.conf b/egs/fame/v2/conf/mfcc_asr.conf
new file mode 100644 (file)
index 0000000..1620568
--- /dev/null
@@ -0,0 +1,2 @@
+--use-energy=false   # only non-default option.
+--snip-edges=false
diff --git a/egs/fame/v2/conf/mfcc_hires_16k.conf b/egs/fame/v2/conf/mfcc_hires_16k.conf
new file mode 100644 (file)
index 0000000..6dacd1c
--- /dev/null
@@ -0,0 +1,11 @@
+# config for high-resolution MFCC features, intended for neural network training
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why 
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--num-mel-bins=40     # similar to Google's setup.
+--num-ceps=40     # there is no dimensionality reduction.
+--low-freq=20     # low cutoff frequency for mel bins... this is high-bandwidth data, so
+                  # there might be some information at the low end.
+--high-freq=7600 # high cutoff frequently, relative to Nyquist of 8000 (=7600)
+--snip-edges=false 
diff --git a/egs/fame/v2/conf/vad.conf b/egs/fame/v2/conf/vad.conf
new file mode 100644 (file)
index 0000000..a0ca244
--- /dev/null
@@ -0,0 +1,2 @@
+--vad-energy-threshold=5.5
+--vad-energy-mean-scale=0.5
diff --git a/egs/fame/v2/local b/egs/fame/v2/local
new file mode 120000 (symlink)
index 0000000..ce1cbf9
--- /dev/null
@@ -0,0 +1 @@
+../v1/local
\ No newline at end of file
diff --git a/egs/fame/v2/path.sh b/egs/fame/v2/path.sh
new file mode 100755 (executable)
index 0000000..2d17b17
--- /dev/null
@@ -0,0 +1,6 @@
+export KALDI_ROOT=`pwd`/../../..
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
+. $KALDI_ROOT/tools/config/common_path.sh
+export LC_ALL=C
diff --git a/egs/fame/v2/run.sh b/egs/fame/v2/run.sh
new file mode 100755 (executable)
index 0000000..17127a6
--- /dev/null
@@ -0,0 +1,402 @@
+#!/bin/bash
+# Copyright 2015-2016   David Snyder
+#                2015   Johns Hopkins University (Author: Daniel Garcia-Romero)
+#                2015   Johns Hopkins University (Author: Daniel Povey)
+#                2017   Radboud University (Author: Emre Yilmaz)      
+# Apache 2.0.
+#
+# See README.txt for more info on data required.
+# Results (EERs) are inline in comments below.
+#
+# This example script shows how to replace the GMM-UBM
+# with a DNN trained for ASR.
+
+. cmd.sh
+. path.sh
+set -e
+mfccdir=`pwd`/mfcc
+vaddir=`pwd`/mfcc
+nnet=exp/nnet2_online/nnet_ms_a/final.mdl
+famecorpus=./corpus
+
+# Data preparation
+
+if [ -d $famecorpus ] ; then
+  echo "Fame corpus present. OK."
+elif [ -f ./fame.tar.gz ] ; then
+  echo "Unpacking..."
+  tar xzf fame.tar.gz
+elif [ ! -d $famecorpus ] && [ ! -f ./fame.tar.gz ] ; then
+  echo "The Fame! corpus is not present. Please register here: http://www.ru.nl/clst/datasets/ "
+  echo " and download the corpus and put it at $famecorpus" && exit 1
+fi
+
+# Train a DNN on about 10 hours of Frisian-Dutch speech.
+
+local/dnn/train_dnn.sh
+
+echo "Preparing data/train.."
+local/prepare_train.sh $famecorpus/SC
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Preparing data/fame_${task}_${subtask}_${sets}.."
+      trials_female=data/fame_${task}_${subtask}_${sets}_female/trials
+      trials_male=data/fame_${task}_${subtask}_${sets}_male/trials
+      trials=data/fame_${task}_${subtask}_${sets}/trials
+      local/make_fame_test.pl $famecorpus/SV data $task $subtask $sets
+      local/make_fame_train.pl $famecorpus/SV data $task $subtask $sets 
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Preparing data/fame_${task}_${subtask}_${sets}${year}.."
+        trials_female=data/fame_${task}_${subtask}_${sets}${year}_female/trials
+        trials_male=data/fame_${task}_${subtask}_${sets}${year}_male/trials
+        trials=data/fame_${task}_${subtask}_${sets}${year}/trials
+        local/make_fame_test_year.pl $famecorpus/SV data $task $subtask $sets $year
+        local/make_fame_train_year.pl $famecorpus/SV data $task $subtask $sets $year 
+
+      done
+    done
+  done
+done
+
+echo "Copying data/train.."
+
+cp -r data/train data/train_dnn
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Copying data/fame_${task}_${subtask}_${sets}.."
+      cp -r data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_enroll_dnn
+      cp -r data/fame_${task}_${subtask}_${sets}_test data/fame_${task}_${subtask}_${sets}_test_dnn
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Copying data/fame_${task}_${subtask}_${sets}${year}.."
+        cp -r data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn
+        cp -r data/fame_${task}_${subtask}_${sets}${year}_test data/fame_${task}_${subtask}_${sets}${year}_test_dnn
+
+      done
+    done
+  done
+done
+
+# MFCC extraction
+
+echo "Extracting MFCC features for data/train.."
+
+steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+    data/train exp/make_mfcc $mfccdir
+utils/fix_data_dir.sh data/train
+
+steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \
+    data/train_dnn exp/make_mfcc $mfccdir
+utils/fix_data_dir.sh data/train_dnn
+
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Extracting MFCC features for data/fame_${task}_${subtask}_${sets}.."
+      steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_enroll exp/make_mfcc $mfccdir
+      utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_enroll
+      steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_enroll_dnn exp/make_mfcc $mfccdir
+      utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_enroll_dnn
+
+      steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_test exp/make_mfcc $mfccdir
+      utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_test
+      steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_test_dnn exp/make_mfcc $mfccdir
+      utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_test_dnn
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Extracting MFCC features for data/fame_${task}_${subtask}_${sets}${year}.."
+        steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_enroll exp/make_mfcc $mfccdir
+        utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_enroll
+        steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn exp/make_mfcc $mfccdir
+        utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn
+
+        steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_test exp/make_mfcc $mfccdir
+        utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_test
+        steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_test_dnn exp/make_mfcc $mfccdir
+        utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_test_dnn
+
+      done
+    done
+  done
+done
+
+# VAD computation
+
+echo "Computing VAD for data/train.."
+
+sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+    data/train exp/make_vad $vaddir
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Computing VAD for data/fame_${task}_${subtask}_${sets}.."
+      sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_enroll exp/make_vad $vaddir
+      sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+          data/fame_${task}_${subtask}_${sets}_test exp/make_vad $vaddir 
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Computing VAD for data/fame_${task}_${subtask}_${sets}${year}.."
+        sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_enroll exp/make_vad $vaddir
+        sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \
+            data/fame_${task}_${subtask}_${sets}${year}_test exp/make_vad $vaddir
+      
+      done
+    done
+  done
+done
+
+echo "Copying VAD for data/train.."
+cp data/train/vad.scp data/train_dnn/vad.scp
+cp data/train/utt2spk data/train_dnn/utt2spk
+cp data/train/spk2utt data/train_dnn/spk2utt 
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Copying VAD for data/fame_${task}_${subtask}_${sets}.."
+      cp data/fame_${task}_${subtask}_${sets}_enroll/vad.scp data/fame_${task}_${subtask}_${sets}_enroll_dnn/vad.scp
+      cp data/fame_${task}_${subtask}_${sets}_test/vad.scp data/fame_${task}_${subtask}_${sets}_test_dnn/vad.scp
+      cp data/fame_${task}_${subtask}_${sets}_enroll/utt2spk data/fame_${task}_${subtask}_${sets}_enroll_dnn/utt2spk
+      cp data/fame_${task}_${subtask}_${sets}_test/utt2spk data/fame_${task}_${subtask}_${sets}_test_dnn/utt2spk
+      cp data/fame_${task}_${subtask}_${sets}_enroll/spk2utt data/fame_${task}_${subtask}_${sets}_enroll_dnn/spk2utt
+      cp data/fame_${task}_${subtask}_${sets}_test/spk2utt data/fame_${task}_${subtask}_${sets}_test_dnn/spk2utt
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Copying VAD for data/fame_${task}_${subtask}_${sets}${year}.."
+        cp data/fame_${task}_${subtask}_${sets}${year}_enroll/vad.scp data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn/vad.scp
+        cp data/fame_${task}_${subtask}_${sets}${year}_test/vad.scp data/fame_${task}_${subtask}_${sets}${year}_test_dnn/vad.scp
+        cp data/fame_${task}_${subtask}_${sets}${year}_enroll/utt2spk data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn/utt2spk
+        cp data/fame_${task}_${subtask}_${sets}${year}_test/utt2spk data/fame_${task}_${subtask}_${sets}${year}_test_dnn/utt2spk
+        cp data/fame_${task}_${subtask}_${sets}${year}_enroll/spk2utt data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn/spk2utt
+        cp data/fame_${task}_${subtask}_${sets}${year}_test/spk2utt data/fame_${task}_${subtask}_${sets}${year}_test_dnn/spk2utt
+
+      done
+    done
+  done
+done
+
+# Train UBM and i-vector extractor
+
+echo "Training DNN-UBM and the i-vector extractor.."
+
+sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd" \
+  data/train data/train_dnn $nnet exp/full_ubm
+
+sid/train_ivector_extractor_dnn.sh \
+  --cmd "$train_cmd" \
+  --min-post 0.015 \
+  --ivector-dim 600 \
+  --num-iters 5 exp/full_ubm/final.ubm $nnet \
+  data/train \
+  data/train_dnn \
+  exp/extractor_dnn
+
+# Extract i-vectors.
+
+echo "Extracting i-vectors for data/train.."
+
+sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 exp/extractor_dnn $nnet data/train data/train_dnn exp/ivectors_train_dnn
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      echo "Extracting i-vectors for data/fame_${task}_${subtask}_${sets}"
+      sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 \
+         exp/extractor_dnn \
+         $nnet \
+         data/fame_${task}_${subtask}_${sets}_enroll \
+         data/fame_${task}_${subtask}_${sets}_enroll_dnn \
+         exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn
+      sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 \
+         exp/extractor_dnn \
+         $nnet \
+         data/fame_${task}_${subtask}_${sets}_test \
+         data/fame_${task}_${subtask}_${sets}_test_dnn \
+         exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        echo "Extracting i-vectors for data/fame_${task}_${subtask}_${sets}${year}"
+        sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 \
+           exp/extractor_dnn \
+           $nnet \
+           data/fame_${task}_${subtask}_${sets}${year}_enroll \
+           data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn \
+           exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn
+        sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 \
+           exp/extractor_dnn \
+           $nnet \
+           data/fame_${task}_${subtask}_${sets}${year}_test \
+           data/fame_${task}_${subtask}_${sets}${year}_test_dnn \
+           exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn
+
+      done  
+    done
+  done
+done
+
+# Calculate i-vector means used by the scoring scripts
+
+echo "Calculating i-vectors means.."
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      local/scoring_common.sh data/train data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_test \
+        exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn
+
+      trials_female=data/fame_${task}_${subtask}_${sets}_test_female/trials
+      trials_male=data/fame_${task}_${subtask}_${sets}_test_male/trials
+      trials=data/fame_${task}_${subtask}_${sets}_test/trials
+
+      local/plda_scoring.sh data/train data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_test \
+        exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn $trials local/scores_dnn_ind_pooled_${task}_${subtask}_${sets}
+
+      local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}_enroll_female data/fame_${task}_${subtask}_${sets}_test_female \
+        exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn_female exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn_female $trials_female local/scores_dnn_ind_female_${task}_${subtask}_${sets}
+
+      local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}_enroll_male data/fame_${task}_${subtask}_${sets}_test_male \
+        exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn_male exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn_male $trials_male local/scores_dnn_ind_male_${task}_${subtask}_${sets}
+             
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        local/scoring_common.sh data/train data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_test \
+          exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn
+
+        trials_female=data/fame_${task}_${subtask}_${sets}${year}_test_female/trials
+        trials_male=data/fame_${task}_${subtask}_${sets}${year}_test_male/trials
+        trials=data/fame_${task}_${subtask}_${sets}${year}_test/trials
+
+        local/plda_scoring.sh data/train data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_test \
+          exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn $trials local/scores_dnn_ind_pooled_${task}_${subtask}_${sets}${year}
+
+        local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}${year}_enroll_female data/fame_${task}_${subtask}_${sets}${year}_test_female \
+          exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn_female exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn_female $trials_female local/scores_dnn_ind_female_${task}_${subtask}_${sets}${year}
+
+        local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}${year}_enroll_male data/fame_${task}_${subtask}_${sets}${year}_test_male \
+          exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn_male exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn_male $trials_male local/scores_dnn_ind_male_${task}_${subtask}_${sets}${year}
+              
+      done
+    done
+  done
+done
+
+# Calculating EER 
+
+echo "Calculating EER.."
+
+for task in complete ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+
+      trials=data/fame_${task}_${subtask}_${sets}_test/trials
+      echo "DNN EER for fame_${task}_${subtask}_${sets}"
+      for x in ind; do
+        for y in female male pooled; do
+          echo "python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}_${task}_${subtask}_${sets}/plda_scores"
+          eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}_${task}_${subtask}_${sets}/plda_scores) 2> /dev/null`
+          echo "${x} ${y}: $eer"
+        done
+      done
+
+    done
+  done
+done
+
+for task in ageing; do
+  for subtask in 3sec 10sec 30sec; do
+    for sets in eval; do
+      for year in _1t3 _4t10 _mt10; do
+
+        trials=data/fame_${task}_${subtask}_${sets}${year}_test/trials
+        echo "DNN EER for fame_${task}_${subtask}_${sets}${year}"
+        for x in ind; do
+          for y in female male pooled; do
+            echo "python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}_${task}_${subtask}_${sets}${year}/plda_scores"
+            eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}_${task}_${subtask}_${sets}${year}/plda_scores) 2> /dev/null`
+            echo "${x} ${y}: $eer"
+          done
+        done
+      done
+    done
+  done
+done
diff --git a/egs/fame/v2/sid b/egs/fame/v2/sid
new file mode 120000 (symlink)
index 0000000..893a12f
--- /dev/null
@@ -0,0 +1 @@
+../../sre08/v1/sid
\ No newline at end of file
diff --git a/egs/fame/v2/steps b/egs/fame/v2/steps
new file mode 120000 (symlink)
index 0000000..6e99bf5
--- /dev/null
@@ -0,0 +1 @@
+../../wsj/s5/steps
\ No newline at end of file
diff --git a/egs/fame/v2/utils b/egs/fame/v2/utils
new file mode 120000 (symlink)
index 0000000..b240885
--- /dev/null
@@ -0,0 +1 @@
+../../wsj/s5/utils
\ No newline at end of file