From d4bb00a9bc980a3e7a4d4358cb1fbfe9300792fc Mon Sep 17 00:00:00 2001 From: Emre Yilmaz Date: Sun, 24 Sep 2017 21:06:22 -0700 Subject: [PATCH] [egs] Speaker recognition scripts for FAME! Speech Corpus (#1897) --- egs/fame/README.txt | 8 +- egs/fame/s5/RESULTS | 56 +-- egs/fame/s5/local/fame_data_prep.sh | 2 +- egs/fame/s5/local/fame_dict_prep.sh | 2 +- egs/fame/v1/RESULTS | 105 +++++ egs/fame/v1/cmd.sh | 23 + egs/fame/v1/conf/mfcc_16k.conf | 1 + egs/fame/v1/conf/vad.conf | 2 + egs/fame/v1/local/dnn/run_nnet2_common.sh | 22 + .../v1/local/dnn/run_nnet2_multisplice.sh | 64 +++ egs/fame/v1/local/dnn/train_dnn.sh | 89 ++++ egs/fame/v1/local/fame_data_prep.sh | 53 +++ egs/fame/v1/local/fame_dict_prep.sh | 35 ++ egs/fame/v1/local/make_fame_test.pl | 85 ++++ egs/fame/v1/local/make_fame_test_year.pl | 85 ++++ egs/fame/v1/local/make_fame_train.pl | 60 +++ egs/fame/v1/local/make_fame_train_year.pl | 60 +++ egs/fame/v1/local/plda_scoring.sh | 51 +++ egs/fame/v1/local/prepare_for_eer.py | 15 + egs/fame/v1/local/prepare_train.sh | 40 ++ egs/fame/v1/local/scoring_common.sh | 69 +++ egs/fame/v1/path.sh | 6 + egs/fame/v1/run.sh | 300 +++++++++++++ egs/fame/v1/sid | 1 + egs/fame/v1/steps | 1 + egs/fame/v1/utils | 1 + egs/fame/v2/RESULTS | 105 +++++ egs/fame/v2/cmd.sh | 23 + egs/fame/v2/conf/decode.config | 3 + egs/fame/v2/conf/decode_dnn.config | 2 + egs/fame/v2/conf/mfcc_16k.conf | 4 + egs/fame/v2/conf/mfcc_asr.conf | 2 + egs/fame/v2/conf/mfcc_hires_16k.conf | 11 + egs/fame/v2/conf/vad.conf | 2 + egs/fame/v2/local | 1 + egs/fame/v2/path.sh | 6 + egs/fame/v2/run.sh | 402 ++++++++++++++++++ egs/fame/v2/sid | 1 + egs/fame/v2/steps | 1 + egs/fame/v2/utils | 1 + 40 files changed, 1766 insertions(+), 34 deletions(-) create mode 100644 egs/fame/v1/RESULTS create mode 100644 egs/fame/v1/cmd.sh create mode 100644 egs/fame/v1/conf/mfcc_16k.conf create mode 100644 egs/fame/v1/conf/vad.conf create mode 100755 egs/fame/v1/local/dnn/run_nnet2_common.sh create mode 100755 egs/fame/v1/local/dnn/run_nnet2_multisplice.sh create mode 100755 egs/fame/v1/local/dnn/train_dnn.sh create mode 100755 egs/fame/v1/local/fame_data_prep.sh create mode 100755 egs/fame/v1/local/fame_dict_prep.sh create mode 100755 egs/fame/v1/local/make_fame_test.pl create mode 100755 egs/fame/v1/local/make_fame_test_year.pl create mode 100755 egs/fame/v1/local/make_fame_train.pl create mode 100755 egs/fame/v1/local/make_fame_train_year.pl create mode 100755 egs/fame/v1/local/plda_scoring.sh create mode 100755 egs/fame/v1/local/prepare_for_eer.py create mode 100755 egs/fame/v1/local/prepare_train.sh create mode 100755 egs/fame/v1/local/scoring_common.sh create mode 100755 egs/fame/v1/path.sh create mode 100755 egs/fame/v1/run.sh create mode 120000 egs/fame/v1/sid create mode 120000 egs/fame/v1/steps create mode 120000 egs/fame/v1/utils create mode 100644 egs/fame/v2/RESULTS create mode 100644 egs/fame/v2/cmd.sh create mode 100644 egs/fame/v2/conf/decode.config create mode 100644 egs/fame/v2/conf/decode_dnn.config create mode 100644 egs/fame/v2/conf/mfcc_16k.conf create mode 100644 egs/fame/v2/conf/mfcc_asr.conf create mode 100644 egs/fame/v2/conf/mfcc_hires_16k.conf create mode 100644 egs/fame/v2/conf/vad.conf create mode 120000 egs/fame/v2/local create mode 100755 egs/fame/v2/path.sh create mode 100755 egs/fame/v2/run.sh create mode 120000 egs/fame/v2/sid create mode 120000 egs/fame/v2/steps create mode 120000 egs/fame/v2/utils diff --git a/egs/fame/README.txt b/egs/fame/README.txt index d2ed39eef..d0037dc33 100644 --- a/egs/fame/README.txt +++ b/egs/fame/README.txt @@ -6,10 +6,10 @@ The Corpus consists of short utterances extracted from 203 audio segments of app A full description of the FAME! Speech Corpus is provided in: -Yilmaz, E., Heuvel, H. van den, Van de Velde, H., Kampstra, F., Algra, J., Leeuwen, D. van: +E. Yılmaz, H. van den Heuvel, J. Dijkstra, H. Van de Velde, F. Kampstra, J. Algra and D. van Leeuwen, "Open Source Speech and Language Resources for Frisian," In Proc. INTERSPEECH, pp. 1536-1540, San Francisco, CA, USA, Sept. 2016. -Open Source Speech and Language Resources for Frisian Language. +Speaker clustering and verification corpus details are provided in: -In: Proceedings Interspeech 2016, pp. 1536--1540, 8-12 September 2016, San Francisco +E. Yılmaz, J. Dijkstra, H. Van de Velde, F. Kampstra, J. Algra, H. van den Heuvel and D. van Leeuwen, "Longitudinal Speaker Clustering and Verification Corpus with Code-switching Frisian-Dutch Speech," in Proc. INTERSPEECH, pp. 37-41 Stockholm, Sweden, August 2017. -Please check http://www.ru.nl/clst/datasets/ to get the FAME! Speech Corpus +Please check http://www.ru.nl/clst/datasets/ to get the FAME! Speech Corpus. The ASR scripts are in ./s5. The GMM-UBM and DNN-UBM SV scripts are in ./v1 and ./v2 respectively. diff --git a/egs/fame/s5/RESULTS b/egs/fame/s5/RESULTS index a8541fba6..c1a1267a6 100644 --- a/egs/fame/s5/RESULTS +++ b/egs/fame/s5/RESULTS @@ -1,28 +1,28 @@ -%WER 41.10 [ 4974 / 12101, 522 ins, 1223 del, 3229 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_devel/wer_11_0.0 -%WER 38.10 [ 4909 / 12886, 527 ins, 1220 del, 3162 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_test/wer_11_0.0 -%WER 41.06 [ 4969 / 12101, 514 ins, 1277 del, 3178 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 -%WER 40.38 [ 4886 / 12101, 515 ins, 1225 del, 3146 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.0 -%WER 40.15 [ 4859 / 12101, 514 ins, 1177 del, 3168 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_10_0.5 -%WER 37.86 [ 4879 / 12886, 596 ins, 1083 del, 3200 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it1/wer_10_0.0 -%WER 37.16 [ 4789 / 12886, 592 ins, 1056 del, 3141 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it3/wer_10_0.0 -%WER 36.92 [ 4757 / 12886, 618 ins, 1010 del, 3129 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it6/wer_10_0.0 -%WER 42.38 [ 5129 / 12101, 576 ins, 1171 del, 3382 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_devel/wer_11_0.0 -%WER 39.14 [ 5043 / 12886, 536 ins, 1172 del, 3335 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_test/wer_11_0.0 -%WER 42.05 [ 5088 / 12101, 525 ins, 1282 del, 3281 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 -%WER 41.41 [ 5011 / 12101, 461 ins, 1345 del, 3205 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.5 -%WER 40.97 [ 4958 / 12101, 485 ins, 1279 del, 3194 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_11_0.5 -%WER 38.79 [ 4998 / 12886, 512 ins, 1194 del, 3292 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it1/wer_11_0.0 -%WER 38.16 [ 4917 / 12886, 544 ins, 1128 del, 3245 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0 -%WER 37.68 [ 4856 / 12886, 564 ins, 1068 del, 3224 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it6/wer_11_0.0 -%WER 70.85 [ 8574 / 12101, 414 ins, 2596 del, 5564 sub ] exp/mono/decode_devel/wer_9_0.0 -%WER 68.17 [ 8785 / 12886, 413 ins, 2704 del, 5668 sub ] exp/mono/decode_test/wer_9_0.0 -%WER 44.05 [ 5330 / 12101, 560 ins, 1467 del, 3303 sub ] exp/sgmm2/decode_devel/wer_10_0.0 -%WER 40.22 [ 5183 / 12886, 680 ins, 1142 del, 3361 sub ] exp/sgmm2/decode_test/wer_9_0.0 -%WER 54.39 [ 6582 / 12101, 695 ins, 1595 del, 4292 sub ] exp/tri1/decode_devel/wer_10_0.0 -%WER 51.60 [ 6649 / 12886, 630 ins, 1706 del, 4313 sub ] exp/tri1/decode_test/wer_11_0.0 -%WER 51.53 [ 6236 / 12101, 659 ins, 1675 del, 3902 sub ] exp/tri2/decode_devel/wer_11_0.0 -%WER 48.32 [ 6226 / 12886, 643 ins, 1669 del, 3914 sub ] exp/tri2/decode_test/wer_12_0.0 -%WER 47.15 [ 5706 / 12101, 580 ins, 1537 del, 3589 sub ] exp/tri3/decode_devel/wer_13_0.0 -%WER 52.13 [ 6308 / 12101, 623 ins, 1706 del, 3979 sub ] exp/tri3/decode_devel.si/wer_11_0.5 -%WER 43.71 [ 5632 / 12886, 594 ins, 1538 del, 3500 sub ] exp/tri3/decode_test/wer_14_0.0 -%WER 48.21 [ 6212 / 12886, 825 ins, 1358 del, 4029 sub ] exp/tri3/decode_test.si/wer_10_0.0 +%WER 41.89 [ 5087 / 12143, 603 ins, 1108 del, 3376 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_devel/wer_10_0.0 +%WER 38.71 [ 5019 / 12966, 529 ins, 1192 del, 3298 sub ] exp/dnn4b_pretrain-dbn_dnn/decode_test/wer_11_0.0 +%WER 41.64 [ 5056 / 12143, 497 ins, 1300 del, 3259 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_10_0.5 +%WER 40.90 [ 4966 / 12143, 456 ins, 1375 del, 3135 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_11_0.5 +%WER 40.47 [ 4914 / 12143, 458 ins, 1329 del, 3127 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_11_0.5 +%WER 38.32 [ 4969 / 12966, 581 ins, 1102 del, 3286 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it1/wer_10_0.0 +%WER 37.76 [ 4896 / 12966, 536 ins, 1133 del, 3227 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0 +%WER 37.42 [ 4852 / 12966, 485 ins, 1221 del, 3146 sub ] exp/dnn4b_pretrain-dbn_dnn_smbr/decode_test_it6/wer_12_0.0 +%WER 43.14 [ 5239 / 12143, 530 ins, 1319 del, 3390 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_devel/wer_10_0.5 +%WER 39.71 [ 5149 / 12966, 597 ins, 1091 del, 3461 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn/decode_test/wer_10_0.0 +%WER 42.94 [ 5214 / 12143, 569 ins, 1228 del, 3417 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it1/wer_11_0.0 +%WER 42.11 [ 5114 / 12143, 555 ins, 1169 del, 3390 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it3/wer_10_0.5 +%WER 41.74 [ 5069 / 12143, 570 ins, 1110 del, 3389 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_devel_it6/wer_10_0.5 +%WER 39.33 [ 5099 / 12966, 452 ins, 1342 del, 3305 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it1/wer_11_0.5 +%WER 38.65 [ 5012 / 12966, 577 ins, 1105 del, 3330 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it3/wer_11_0.0 +%WER 38.23 [ 4957 / 12966, 542 ins, 1171 del, 3244 sub ] exp/dnn4d-fbank_pretrain-dbn_dnn_smbr/decode_test_it6/wer_12_0.0 +%WER 69.62 [ 8454 / 12143, 535 ins, 2189 del, 5730 sub ] exp/mono/decode_devel/wer_7_0.0 +%WER 68.26 [ 8851 / 12966, 572 ins, 2393 del, 5886 sub ] exp/mono/decode_test/wer_7_0.0 +%WER 44.31 [ 5381 / 12143, 551 ins, 1465 del, 3365 sub ] exp/sgmm2/decode_devel/wer_9_0.5 +%WER 40.58 [ 5261 / 12966, 607 ins, 1305 del, 3349 sub ] exp/sgmm2/decode_test/wer_10_0.0 +%WER 54.29 [ 6593 / 12143, 477 ins, 2120 del, 3996 sub ] exp/tri1/decode_devel/wer_11_0.5 +%WER 51.25 [ 6645 / 12966, 679 ins, 1715 del, 4251 sub ] exp/tri1/decode_test/wer_11_0.0 +%WER 51.25 [ 6223 / 12143, 564 ins, 1811 del, 3848 sub ] exp/tri2/decode_devel/wer_10_1.0 +%WER 48.87 [ 6337 / 12966, 629 ins, 1799 del, 3909 sub ] exp/tri2/decode_test/wer_13_0.0 +%WER 47.19 [ 5730 / 12143, 647 ins, 1449 del, 3634 sub ] exp/tri3/decode_devel/wer_12_0.0 +%WER 52.43 [ 6366 / 12143, 643 ins, 1711 del, 4012 sub ] exp/tri3/decode_devel.si/wer_11_0.5 +%WER 44.14 [ 5723 / 12966, 693 ins, 1404 del, 3626 sub ] exp/tri3/decode_test/wer_13_0.0 +%WER 48.77 [ 6324 / 12966, 754 ins, 1524 del, 4046 sub ] exp/tri3/decode_test.si/wer_12_0.0 diff --git a/egs/fame/s5/local/fame_data_prep.sh b/egs/fame/s5/local/fame_data_prep.sh index 429495058..11c28c1d1 100755 --- a/egs/fame/s5/local/fame_data_prep.sh +++ b/egs/fame/s5/local/fame_data_prep.sh @@ -8,7 +8,7 @@ corpus=$1 set -e -o pipefail if [ -z "$corpus" ] ; then - echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database" + echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech corpus" exit 1 fi if [ ! -d "$corpus" ] ; then diff --git a/egs/fame/s5/local/fame_dict_prep.sh b/egs/fame/s5/local/fame_dict_prep.sh index c6530217a..95b5d846e 100755 --- a/egs/fame/s5/local/fame_dict_prep.sh +++ b/egs/fame/s5/local/fame_dict_prep.sh @@ -7,7 +7,7 @@ corpus=$1 if [ -z "$corpus" ] ; then - echo >&2 "The script $0 expects one parameter -- the location of the Iban corpus" + echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech corpus" exit 1 fi if [ ! -d "$corpus" ] ; then diff --git a/egs/fame/v1/RESULTS b/egs/fame/v1/RESULTS new file mode 100644 index 000000000..5ef6daad5 --- /dev/null +++ b/egs/fame/v1/RESULTS @@ -0,0 +1,105 @@ +GMM-2048 EER for fame_complete_3sec_eval +python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_gmm_2048_ind_female_complete_3sec_eval/plda_scores +ind female: 25.88 +python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_gmm_2048_ind_male_complete_3sec_eval/plda_scores +ind male: 16.95 +python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_gmm_2048_ind_pooled_complete_3sec_eval/plda_scores +ind pooled: 21.9 +GMM-2048 EER for fame_complete_10sec_eval +python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_gmm_2048_ind_female_complete_10sec_eval/plda_scores +ind female: 18.14 +python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_gmm_2048_ind_male_complete_10sec_eval/plda_scores +ind male: 9.454 +python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_gmm_2048_ind_pooled_complete_10sec_eval/plda_scores +ind pooled: 14.15 +GMM-2048 EER for fame_complete_30sec_eval +python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_gmm_2048_ind_female_complete_30sec_eval/plda_scores +ind female: 15.49 +python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_gmm_2048_ind_male_complete_30sec_eval/plda_scores +ind male: 6.985 +python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_gmm_2048_ind_pooled_complete_30sec_eval/plda_scores +ind pooled: 11.51 +GMM-2048 EER for fame_ageing_3sec_eval +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_gmm_2048_ind_female_ageing_3sec_eval/plda_scores +ind female: 25.03 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_gmm_2048_ind_male_ageing_3sec_eval/plda_scores +ind male: 18.38 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_gmm_2048_ind_pooled_ageing_3sec_eval/plda_scores +ind pooled: 22.87 +GMM-2048 EER for fame_ageing_10sec_eval +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_gmm_2048_ind_female_ageing_10sec_eval/plda_scores +ind female: 16.31 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_gmm_2048_ind_male_ageing_10sec_eval/plda_scores +ind male: 10.4 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_gmm_2048_ind_pooled_ageing_10sec_eval/plda_scores +ind pooled: 14.51 +GMM-2048 EER for fame_ageing_30sec_eval +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_gmm_2048_ind_female_ageing_30sec_eval/plda_scores +ind female: 11.53 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_gmm_2048_ind_male_ageing_30sec_eval/plda_scores +ind male: 6.869 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_gmm_2048_ind_pooled_ageing_30sec_eval/plda_scores +ind pooled: 10.76 +GMM-2048 EER for fame_ageing_3sec_eval_1t3 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_gmm_2048_ind_female_ageing_3sec_eval_1t3/plda_scores +ind female: 25.28 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_gmm_2048_ind_male_ageing_3sec_eval_1t3/plda_scores +ind male: 14.07 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_gmm_2048_ind_pooled_ageing_3sec_eval_1t3/plda_scores +ind pooled: 21.08 +GMM-2048 EER for fame_ageing_3sec_eval_4t10 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_gmm_2048_ind_female_ageing_3sec_eval_4t10/plda_scores +ind female: 23.81 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_gmm_2048_ind_male_ageing_3sec_eval_4t10/plda_scores +ind male: 17.2 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_gmm_2048_ind_pooled_ageing_3sec_eval_4t10/plda_scores +ind pooled: 21.57 +GMM-2048 EER for fame_ageing_3sec_eval_mt10 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_gmm_2048_ind_female_ageing_3sec_eval_mt10/plda_scores +ind female: 26.47 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_gmm_2048_ind_male_ageing_3sec_eval_mt10/plda_scores +ind male: 22.74 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_gmm_2048_ind_pooled_ageing_3sec_eval_mt10/plda_scores +ind pooled: 25.9 +GMM-2048 EER for fame_ageing_10sec_eval_1t3 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_gmm_2048_ind_female_ageing_10sec_eval_1t3/plda_scores +ind female: 15.91 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_gmm_2048_ind_male_ageing_10sec_eval_1t3/plda_scores +ind male: 7.064 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_gmm_2048_ind_pooled_ageing_10sec_eval_1t3/plda_scores +ind pooled: 13.5 +GMM-2048 EER for fame_ageing_10sec_eval_4t10 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_gmm_2048_ind_female_ageing_10sec_eval_4t10/plda_scores +ind female: 14.36 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_gmm_2048_ind_male_ageing_10sec_eval_4t10/plda_scores +ind male: 10.5 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_gmm_2048_ind_pooled_ageing_10sec_eval_4t10/plda_scores +ind pooled: 13.41 +GMM-2048 EER for fame_ageing_10sec_eval_mt10 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_gmm_2048_ind_female_ageing_10sec_eval_mt10/plda_scores +ind female: 18.26 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_gmm_2048_ind_male_ageing_10sec_eval_mt10/plda_scores +ind male: 11.93 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_gmm_2048_ind_pooled_ageing_10sec_eval_mt10/plda_scores +ind pooled: 16.59 +GMM-2048 EER for fame_ageing_30sec_eval_1t3 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_gmm_2048_ind_female_ageing_30sec_eval_1t3/plda_scores +ind female: 13.16 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_gmm_2048_ind_male_ageing_30sec_eval_1t3/plda_scores +ind male: 4.425 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_gmm_2048_ind_pooled_ageing_30sec_eval_1t3/plda_scores +ind pooled: 10.38 +GMM-2048 EER for fame_ageing_30sec_eval_4t10 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_gmm_2048_ind_female_ageing_30sec_eval_4t10/plda_scores +ind female: 9.536 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_gmm_2048_ind_male_ageing_30sec_eval_4t10/plda_scores +ind male: 6.826 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_gmm_2048_ind_pooled_ageing_30sec_eval_4t10/plda_scores +ind pooled: 9.692 +GMM-2048 EER for fame_ageing_30sec_eval_mt10 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_gmm_2048_ind_female_ageing_30sec_eval_mt10/plda_scores +ind female: 9.677 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_gmm_2048_ind_male_ageing_30sec_eval_mt10/plda_scores +ind male: 7.983 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_gmm_2048_ind_pooled_ageing_30sec_eval_mt10/plda_scores +ind pooled: 11.99 diff --git a/egs/fame/v1/cmd.sh b/egs/fame/v1/cmd.sh new file mode 100644 index 000000000..23721d04c --- /dev/null +++ b/egs/fame/v1/cmd.sh @@ -0,0 +1,23 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd=queue.pl +export decode_cmd="queue.pl --mem 2G" +# the use of cuda_cmd is deprecated, used only in 'nnet1', +export cuda_cmd="queue.pl --gpu 1" + +if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then + queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf, + export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2" + export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1" + export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G" +fi diff --git a/egs/fame/v1/conf/mfcc_16k.conf b/egs/fame/v1/conf/mfcc_16k.conf new file mode 100644 index 000000000..736150909 --- /dev/null +++ b/egs/fame/v1/conf/mfcc_16k.conf @@ -0,0 +1 @@ +--use-energy=false # only non-default option. diff --git a/egs/fame/v1/conf/vad.conf b/egs/fame/v1/conf/vad.conf new file mode 100644 index 000000000..a0ca2449b --- /dev/null +++ b/egs/fame/v1/conf/vad.conf @@ -0,0 +1,2 @@ +--vad-energy-threshold=5.5 +--vad-energy-mean-scale=0.5 diff --git a/egs/fame/v1/local/dnn/run_nnet2_common.sh b/egs/fame/v1/local/dnn/run_nnet2_common.sh new file mode 100755 index 000000000..df5804d7d --- /dev/null +++ b/egs/fame/v1/local/dnn/run_nnet2_common.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# Make the features. + +. ./cmd.sh +. ./path.sh +stage=1 +set -e +. ./utils/parse_options.sh + +mkdir -p exp/nnet2_online + +if [ $stage -le 1 ]; then + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=mfcc + utils/copy_data_dir.sh data/train_asr data/train_hires_asr + steps/make_mfcc.sh --nj 10 --mfcc-config conf/mfcc_hires_16k.conf \ + --cmd "$train_cmd" data/train_hires_asr exp/make_hires/train $mfccdir || exit 1; +fi diff --git a/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh b/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh new file mode 100755 index 000000000..bba54c558 --- /dev/null +++ b/egs/fame/v1/local/dnn/run_nnet2_multisplice.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Copyright 2017 Radboud University (Author: Emre Yilmaz) +# +# This script is based on run_nnet2_multisplice.sh in +# egs/fisher_english/s5/local/online. It has been modified +# for speaker recognition. + +stage=1 +train_stage=-10 +use_gpu=true +set -e +. ./cmd.sh +. ./path.sh + +. utils/parse_options.sh + +# assume use_gpu=true since it would be way too slow otherwise. + +if $use_gpu; then + if ! cuda-compiled; then + cat <" data/local/lang data/lang || exit 1; + utils/format_lm.sh data/lang data/local/LM.gz data/local/dict/lexicon.txt data/lang_test || exit 1; +fi + +if [ $stage -le 2 ]; then + # Feature extraction + for x in train_asr devel_asr test_asr; do + steps/make_mfcc.sh --nj $feat_nj --mfcc-config conf/mfcc_asr.conf --cmd "$train_cmd" data/$x exp/make_mfcc/$x mfcc || exit 1; + steps/compute_cmvn_stats.sh data/$x exp/make_mfcc/$x mfcc || exit 1; + done +fi + +if [ $stage -le 3 ]; then + ### Monophone + echo "Starting monophone training." + steps/train_mono.sh --nj $train_nj --cmd "$train_cmd" data/train_asr data/lang exp/mono || exit 1; + echo "Mono training done." + +fi + +if [ $stage -le 4 ]; then + ### Triphone + echo "Starting triphone training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train_asr data/lang exp/mono exp/mono_ali || exit 1; + steps/train_deltas.sh --boost-silence 1.25 --cmd "$train_cmd" $numLeavesTri1 $numGaussTri1 data/train_asr data/lang exp/mono_ali exp/tri1 || exit 1; + echo "Triphone training done." + +fi + +if [ $stage -le 5 ]; then + ### Triphone + LDA and MLLT + echo "Starting LDA+MLLT training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" data/train_asr data/lang exp/tri1 exp/tri1_ali || exit 1; + steps/train_lda_mllt.sh --cmd "$train_cmd" --splice-opts "--left-context=3 --right-context=3" $numLeavesMLLT $numGaussMLLT data/train_asr data/lang exp/tri1_ali exp/tri2 || exit 1; + echo "LDA+MLLT training done." + +fi + +if [ $stage -le 6 ]; then + ### Triphone + LDA and MLLT + SAT and FMLLR + echo "Starting SAT+FMLLR training." + steps/align_si.sh --nj $train_nj --cmd "$train_cmd" --use-graphs true data/train_asr data/lang exp/tri2 exp/tri2_ali || exit 1; + steps/train_sat.sh --cmd "$train_cmd" $numLeavesSAT $numGaussSAT data/train_asr data/lang exp/tri2_ali exp/tri3 || exit 1; + echo "SAT+FMLLR training done." + + echo "Decoding the development and test sets using SAT+FMLLR models." + utils/mkgraph.sh data/lang_test exp/tri3 exp/tri3/graph || exit 1; + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --skip-scoring true exp/tri3/graph data/devel_asr exp/tri3/decode_devel || exit 1; + steps/decode_fmllr.sh --nj $decode_nj --cmd "$decode_cmd" --skip-scoring true exp/tri3/graph data/test_asr exp/tri3/decode_test || exit 1; + echo "SAT+FMLLR decoding done." +fi + +local/dnn/run_nnet2_multisplice.sh diff --git a/egs/fame/v1/local/fame_data_prep.sh b/egs/fame/v1/local/fame_data_prep.sh new file mode 100755 index 000000000..bbe30976d --- /dev/null +++ b/egs/fame/v1/local/fame_data_prep.sh @@ -0,0 +1,53 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2017 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +set -e -o pipefail +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +echo "Preparing train, development and test data" +mkdir -p data data/local data/train_asr data/devel_asr data/test_asr + +for x in train devel test; do + echo "Copy spk2utt, utt2spk, wav.scp, text for $x" + cp $corpus/data/$x/text data/${x}_asr/text || exit 1; + cp $corpus/data/$x/spk2utt data/${x}_asr/spk2utt || exit 1; + cp $corpus/data/$x/utt2spk data/${x}_asr/utt2spk || exit 1; + + # the corpus wav.scp contains physical paths, so we just re-generate + # the file again from scratchn instead of figuring out how to edit it + for rec in $(awk '{print $1}' $corpus/data/$x/text) ; do + spk=${rec%_*} + filename=$corpus/fame/wav/${x}/${rec:8}.wav + if [ ! -f "$filename" ] ; then + echo >&2 "The file $filename could not be found ($rec)" + exit 1 + fi + # we might want to store physical paths as a general rule + filename=$(readlink -f $filename) + echo "$rec $filename" + done > data/${x}_asr/wav.scp + + # fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp, + # duplicate entries and so on). Also, it regenerates the spk2utt from + # utt2sp + utils/fix_data_dir.sh data/${x}_asr +done + +echo "Copying language model" +if [ -f $corpus/lm/LM_FR_IKN3G ] ; then + gzip -c $corpus/lm/LM_FR_IKN3G > data/local/LM.gz +fi + +echo "Data preparation completed." + diff --git a/egs/fame/v1/local/fame_dict_prep.sh b/egs/fame/v1/local/fame_dict_prep.sh new file mode 100755 index 000000000..122c34c83 --- /dev/null +++ b/egs/fame/v1/local/fame_dict_prep.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2016 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the FAME speech corpus" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +mkdir -p data/lang data/local/dict + +cat $corpus/lexicon/lex.asr $corpus/lexicon/lex.oov > data/local/dict/lexicon.txt +echo "!SIL SIL" >> data/local/dict/lexicon.txt +echo " SPN" >> data/local/dict/lexicon.txt +env LC_ALL=C sort -u -o data/local/dict/lexicon.txt data/local/dict/lexicon.txt +cat data/local/dict/lexicon.txt | \ + perl -ane 'print join("\n", @F[1..$#F]) . "\n"; ' | \ + sort -u | grep -v 'SIL' > data/local/dict/nonsilence_phones.txt + + +touch data/local/dict/extra_questions.txt +touch data/local/dict/optional_silence.txt + +echo "SIL" > data/local/dict/optional_silence.txt +echo "SIL" > data/local/dict/silence_phones.txt +echo "" > data/local/dict/oov.txt + +echo "Dictionary preparation succeeded" diff --git a/egs/fame/v1/local/make_fame_test.pl b/egs/fame/v1/local/make_fame_test.pl new file mode 100755 index 000000000..2098dc1ed --- /dev/null +++ b/egs/fame/v1/local/make_fame_test.pl @@ -0,0 +1,85 @@ +#!/usr/bin/perl +# +# Copyright 2015 David Snyder +# 2017 Radboud University (Author: Emre Yilmaz) +# Apache 2.0. +# Usage: make_fame_test.pl corpus/SV/ data/ complete 3sec eval. + +if (@ARGV != 5) { + print STDERR "Usage: $0 \n"; + print STDERR "e.g. $0 corpus/SV/ data/ complete 3sec eval\n"; + exit(1); +} + +($db_base, $out_base_dir, $task, $subtask, $sets) = @ARGV; +$out_dir = "$out_base_dir/fame_${task}_${subtask}_${sets}_test"; + +$tmp_dir = "$out_dir/tmp"; +if (system("mkdir -p $tmp_dir") != 0) { + die "Error making directory $tmp_dir"; +} + +open(IN_TRIALS, "<", "$db_base/docs/$task/${task}_${subtask}_${sets}_trials_key") or die "cannot open trials list"; +open(OUT_TRIALS, ">", "$out_dir/trials") or die "cannot open trials list"; +%trials = (); +while() { + chomp; + ($spkr,$utt,$side,$is_target) = split(",", $_); + $side = uc $side; + $key = "${spkr} ${utt}_${side}"; # Just keep track of the spkr-utterance pairs we want. + $trials{$key} = 1; # Just keep track of the spkr-utterance pairs we want. + print OUT_TRIALS "$spkr ${utt}_${side} $is_target\n"; +} + +close(OUT_TRIALS) || die; +close(IN_TRIALS) || die; + +open(WAVLIST, "<", "$db_base/docs/$task/${task}_${subtask}_${sets}_trials") or die "cannot open wav list"; +open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender"; +open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; +open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; + +%spk2gender = (); +%utts = (); +while() { + chomp; + $sph = $_; + ($spkr, $gender, $wav_and_side) = split(" ", $sph); + ($wav, $side) = split(":", $wav_and_side); + $wav = "${db_base}/data/${task}/${sets}/${subtask}/${wav}"; + @A = split("/", $wav); + $basename = $A[$#A]; + $raw_basename = $basename; + $raw_basename =~ s/\.wav$// || die "bad basename $basename"; + $uttId = $raw_basename . "_" . $side; + $key = "${spkr} ${uttId}"; + if ( (not exists($trials{"${spkr} ${uttId}"}) ) or exists($utts{$uttId}) ) { + next; + } + $utts{$uttId} = 1; + if ($side eq "A") { + $channel = 1; + } elsif ($side eq "B") { + $channel = 2; + } else { + die "unknown channel $side\n"; + } + print WAV "$uttId"," $wav\n"; + print SPKR "$uttId $uttId\n"; + print GNDR "$uttId $gender\n"; + $spk2gender{$spkr} = $gender; +} + +close(SPKR) || die; +close(WAV) || die; +close(WAVLIST) || die; +close(GNDR) || die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} +system("utils/fix_data_dir.sh $out_dir"); +if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/fame/v1/local/make_fame_test_year.pl b/egs/fame/v1/local/make_fame_test_year.pl new file mode 100755 index 000000000..c881a70d3 --- /dev/null +++ b/egs/fame/v1/local/make_fame_test_year.pl @@ -0,0 +1,85 @@ +#!/usr/bin/perl +# +# Copyright 2015 David Snyder +# 2017 Radboud University (Author: Emre Yilmaz) +# Apache 2.0. +# Usage: make_fame_test_year.pl corpus/SV/ data/ complete 3sec eval 1t3. + +if (@ARGV != 5) { + print STDERR "Usage: $0 \n"; + print STDERR "e.g. $0 corpus/SV/ data/ complete 3sec eval\n"; + exit(1); +} + +($db_base, $out_base_dir, $task, $subtask, $sets, $year) = @ARGV; +$out_dir = "$out_base_dir/fame_${task}_${subtask}_${sets}${year}_test"; + +$tmp_dir = "$out_dir/tmp"; +if (system("mkdir -p $tmp_dir") != 0) { + die "Error making directory $tmp_dir"; +} + +open(IN_TRIALS, "<", "$db_base/docs/$task/${task}_${subtask}_${sets}_trials${year}_key") or die "cannot open trials list"; +open(OUT_TRIALS, ">", "$out_dir/trials") or die "cannot open trials list"; +%trials = (); +while() { + chomp; + ($spkr,$utt,$side,$is_target) = split(",", $_); + $side = uc $side; + $key = "${spkr} ${utt}_${side}"; # Just keep track of the spkr-utterance pairs we want. + $trials{$key} = 1; # Just keep track of the spkr-utterance pairs we want. + print OUT_TRIALS "$spkr ${utt}_${side} $is_target\n"; +} + +close(OUT_TRIALS) || die; +close(IN_TRIALS) || die; + +open(WAVLIST, "<", "$db_base/docs/$task/${task}_${subtask}_${sets}_trials${year}") or die "cannot open wav list"; +open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender"; +open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; +open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; + +%spk2gender = (); +%utts = (); +while() { + chomp; + $sph = $_; + ($spkr, $gender, $wav_and_side) = split(" ", $sph); + ($wav, $side) = split(":", $wav_and_side); + $wav = "${db_base}/data/${task}/${sets}/${subtask}/${wav}"; + @A = split("/", $wav); + $basename = $A[$#A]; + $raw_basename = $basename; + $raw_basename =~ s/\.wav$// || die "bad basename $basename"; + $uttId = $raw_basename . "_" . $side; + $key = "${spkr} ${uttId}"; + if ( (not exists($trials{"${spkr} ${uttId}"}) ) or exists($utts{$uttId}) ) { + next; + } + $utts{$uttId} = 1; + if ($side eq "A") { + $channel = 1; + } elsif ($side eq "B") { + $channel = 2; + } else { + die "unknown channel $side\n"; + } + print WAV "$uttId"," $wav\n"; + print SPKR "$uttId $uttId\n"; + print GNDR "$uttId $gender\n"; + $spk2gender{$spkr} = $gender; +} + +close(SPKR) || die; +close(WAV) || die; +close(WAVLIST) || die; +close(GNDR) || die; + +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} +system("utils/fix_data_dir.sh $out_dir"); +if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/fame/v1/local/make_fame_train.pl b/egs/fame/v1/local/make_fame_train.pl new file mode 100755 index 000000000..f105549b7 --- /dev/null +++ b/egs/fame/v1/local/make_fame_train.pl @@ -0,0 +1,60 @@ +#!/usr/bin/perl +# +# Copyright 2015 David Snyder +# 2017 Radboud University (Author: Emre Yilmaz) +# Apache 2.0. +# Usage: make_fame_train.pl corpus/SV/ data/ complete 3sec eval. + +if (@ARGV != 5) { + print STDERR "Usage: $0 \n"; + print STDERR "e.g. $0 corpus/SV/ data/ complete 3sec eval\n"; + exit(1); +} + +($db_base, $out_base_dir, $task, $subtask, $sets) = @ARGV; +$out_dir = "$out_base_dir/fame_${task}_${subtask}_${sets}_enroll"; + +$tmp_dir = "$out_dir/tmp"; +if (system("mkdir -p $tmp_dir") != 0) { + die "Error making directory $tmp_dir"; +} + +open(WAVLIST, "<", "$db_base/docs/${task}/${task}_${subtask}_${sets}_enroll") or die "cannot open wav list"; +open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; +open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender"; +open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; + +while() { + chomp; + $sph = $_; + ($spkr, $gender, $wav_and_side) = split(" ", $sph); + ($wav, $side) = split(":", $wav_and_side); + @A = split("/", $wav); + $wav = "${db_base}/data/${task}/${sets}/${subtask}/${wav}"; + $basename = $A[$#A]; + $raw_basename = $basename; + $raw_basename =~ s/\.wav$// || die "bad basename $basename"; + $uttId = $raw_basename . "_" . $side; # prefix spkr-id to utt-id to ensure sorted order. + if ($side eq "A") { + $channel = 1; + } elsif ($side eq "B") { + $channel = 2; + } else { + die "unknown channel $side\n"; + } + print GNDR "$spkr $gender\n"; + print WAV "$uttId"," $wav", "\n"; + print SPKR "$uttId"," $spkr","\n"; +} +close(GNDR) || die; +close(SPKR) || die; +close(WAV) || die; +close(WAVLIST) || die; +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} +system("utils/fix_data_dir.sh $out_dir"); +if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/fame/v1/local/make_fame_train_year.pl b/egs/fame/v1/local/make_fame_train_year.pl new file mode 100755 index 000000000..2d9dcf884 --- /dev/null +++ b/egs/fame/v1/local/make_fame_train_year.pl @@ -0,0 +1,60 @@ +#!/usr/bin/perl +# +# Copyright 2015 David Snyder +# 2017 Radboud University (Author: Emre Yilmaz) +# Apache 2.0. +# Usage: make_fame_train_year.pl corpus/SV/ data/ complete 3sec eval 1t3. + +if (@ARGV != 5) { + print STDERR "Usage: $0 \n"; + print STDERR "e.g. $0 corpus/SV/ data/ complete 3sec eval\n"; + exit(1); +} + +($db_base, $out_base_dir, $task, $subtask, $sets, $year) = @ARGV; +$out_dir = "$out_base_dir/fame_${task}_${subtask}_${sets}${year}_enroll"; + +$tmp_dir = "$out_dir/tmp"; +if (system("mkdir -p $tmp_dir") != 0) { + die "Error making directory $tmp_dir"; +} + +open(WAVLIST, "<", "$db_base/docs/${task}/${task}_${subtask}_${sets}_enroll${year}") or die "cannot open wav list"; +open(SPKR,">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk"; +open(GNDR,">", "$out_dir/spk2gender") or die "Could not open the output file $out_dir/spk2gender"; +open(WAV,">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp"; + +while() { + chomp; + $sph = $_; + ($spkr, $gender, $wav_and_side) = split(" ", $sph); + ($wav, $side) = split(":", $wav_and_side); + @A = split("/", $wav); + $wav = "${db_base}/data/${task}/${sets}/${subtask}/${wav}"; + $basename = $A[$#A]; + $raw_basename = $basename; + $raw_basename =~ s/\.wav$// || die "bad basename $basename"; + $uttId = $raw_basename . "_" . $side; # prefix spkr-id to utt-id to ensure sorted order. + if ($side eq "A") { + $channel = 1; + } elsif ($side eq "B") { + $channel = 2; + } else { + die "unknown channel $side\n"; + } + print GNDR "$spkr $gender\n"; + print WAV "$uttId"," $wav", "\n"; + print SPKR "$uttId"," $spkr","\n"; +} +close(GNDR) || die; +close(SPKR) || die; +close(WAV) || die; +close(WAVLIST) || die; +if (system( + "utils/utt2spk_to_spk2utt.pl $out_dir/utt2spk >$out_dir/spk2utt") != 0) { + die "Error creating spk2utt file in directory $out_dir"; +} +system("utils/fix_data_dir.sh $out_dir"); +if (system("utils/validate_data_dir.sh --no-text --no-feats $out_dir") != 0) { + die "Error validating directory $out_dir"; +} diff --git a/egs/fame/v1/local/plda_scoring.sh b/egs/fame/v1/local/plda_scoring.sh new file mode 100755 index 000000000..63d4a4f0d --- /dev/null +++ b/egs/fame/v1/local/plda_scoring.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2015 David Snyder +# Apache 2.0. +# +# This script trains PLDA models and does scoring. + +use_existing_models=false +simple_length_norm=false # If true, replace the default length normalization + # performed in PLDA by an alternative that + # normalizes the length of the iVectors to be equal + # to the square root of the iVector dimension. + +echo "$0 $@" # Print the command line for logging + +if [ -f path.sh ]; then . ./path.sh; fi +. parse_options.sh || exit 1; + +if [ $# != 8 ]; then + echo "Usage: $0 " +fi + +plda_data_dir=$1 +enroll_data_dir=$2 +test_data_dir=$3 +plda_ivec_dir=$4 +enroll_ivec_dir=$5 +test_ivec_dir=$6 +trials=$7 +scores_dir=$8 + +if [ "$use_existing_models" == "true" ]; then + for f in ${plda_ivec_dir}/mean.vec ${plda_ivec_dir}/plda ; do + [ ! -f $f ] && echo "No such file $f" && exit 1; + done +else + run.pl $plda_ivec_dir/log/plda.log \ + ivector-compute-plda ark:$plda_data_dir/spk2utt \ + "ark:ivector-normalize-length scp:${plda_ivec_dir}/ivector.scp ark:- |" \ + $plda_ivec_dir/plda || exit 1; +fi + +mkdir -p $scores_dir/log + +run.pl $scores_dir/log/plda_scoring.log \ + ivector-plda-scoring --normalize-length=true \ + --simple-length-normalization=$simple_length_norm \ + --num-utts=ark:${enroll_ivec_dir}/num_utts.ark \ + "ivector-copy-plda --smoothing=0.0 ${plda_ivec_dir}/plda - |" \ + "ark:ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec scp:${enroll_ivec_dir}/spk_ivector.scp ark:- | ivector-normalize-length ark:- ark:- |" \ + "ark:ivector-normalize-length scp:${test_ivec_dir}/ivector.scp ark:- | ivector-subtract-global-mean ${plda_ivec_dir}/mean.vec ark:- ark:- | ivector-normalize-length ark:- ark:- |" \ + "cat '$trials' | cut -d\ --fields=1,2 |" $scores_dir/plda_scores || exit 1; diff --git a/egs/fame/v1/local/prepare_for_eer.py b/egs/fame/v1/local/prepare_for_eer.py new file mode 100755 index 000000000..59d2985e7 --- /dev/null +++ b/egs/fame/v1/local/prepare_for_eer.py @@ -0,0 +1,15 @@ +# Copyright 2015 David Snyder +# Apache 2.0. +# +# Given a trials and scores file, this script +# prepares input for the binary compute-eer. +import sys +trials = open(sys.argv[1], 'r').readlines() +scores = open(sys.argv[2], 'r').readlines() +spkrutt2target = {} +for line in trials: + spkr, utt, target = line.strip().split() + spkrutt2target[spkr+utt]=target +for line in scores: + spkr, utt, score = line.strip().split() + print score, spkrutt2target[spkr+utt] diff --git a/egs/fame/v1/local/prepare_train.sh b/egs/fame/v1/local/prepare_train.sh new file mode 100755 index 000000000..0a3979dd6 --- /dev/null +++ b/egs/fame/v1/local/prepare_train.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# Copyright 2015-2016 Sarah Flora Juan +# Copyright 2016 Johns Hopkins University (Author: Yenda Trmal) +# Copyright 2017 Radboud University (Author: Emre Yilmaz) + +# Apache 2.0 + +corpus=$1 +set -e -o pipefail +if [ -z "$corpus" ] ; then + echo >&2 "The script $0 expects one parameter -- the location of the FAME! speech database" + exit 1 +fi +if [ ! -d "$corpus" ] ; then + echo >&2 "The directory $corpus does not exist" +fi + +mkdir -p data data/train + +cp $corpus/data/spk2utt data/train/spk2utt || exit 1; +cp $corpus/data/utt2spk data/train/utt2spk || exit 1; + +# the corpus wav.scp contains physical paths, so we just re-generate +# the file again from scratchn instead of figuring out how to edit it +for rec in $(awk '{print $1}' $corpus/data/utt2spk) ; do + spk=${rec%_*} + filename=$corpus/SD/${rec}.wav + if [ ! -f "$filename" ] ; then + echo >&2 "The file $filename could not be found ($rec)" + exit 1 + fi + # we might want to store physical paths as a general rule + filename=$(utils/make_absolute.sh $filename) + echo "$rec $filename" +done > data/train/wav.scp + +# fix_data_dir.sh fixes common mistakes (unsorted entries in wav.scp, +# duplicate entries and so on). Also, it regenerates the spk2utt from +# utt2sp +utils/fix_data_dir.sh data/train diff --git a/egs/fame/v1/local/scoring_common.sh b/egs/fame/v1/local/scoring_common.sh new file mode 100755 index 000000000..63950ae57 --- /dev/null +++ b/egs/fame/v1/local/scoring_common.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# Copyright 2015 David Snyder +# Copyright 2017 Emre Yilmaz (Adapted) +# Apache 2.0. +# +if [ $# != 6 ]; then + echo "Usage: $0 " +fi +plda_data_dir=${1%/} +enroll_data_dir=${2%/} +test_data_dir=${3%/} +plda_ivec_dir=${4%/} +enroll_ivec_dir=${5%/} +test_ivec_dir=${6%/} + +if [ ! -f ${test_data_dir}/trials ]; then + echo "${test_data_dir} needs a trial file." + exit; +fi + +mkdir -p local/.tmp + +# Partition the SRE data into male and female subsets. +cat ${test_data_dir}/spk2gender | grep -w f > local/.tmp/female_spklist +utils/subset_data_dir.sh --spk-list local/.tmp/female_spklist ${test_data_dir} ${test_data_dir}_female +cat ${enroll_data_dir}/spk2gender | grep -w f > local/.tmp/female_spklist +utils/subset_data_dir.sh --spk-list local/.tmp/female_spklist ${enroll_data_dir} ${enroll_data_dir}_female +cat ${test_data_dir}/spk2gender | grep -w m > local/.tmp/male_spklist +utils/subset_data_dir.sh --spk-list local/.tmp/male_spklist ${test_data_dir} ${test_data_dir}_male +cat ${enroll_data_dir}/spk2gender | grep -w m > local/.tmp/male_spklist +utils/subset_data_dir.sh --spk-list local/.tmp/male_spklist ${enroll_data_dir} ${enroll_data_dir}_male + +# Prepare female and male trials. +trials_female=${test_data_dir}_female/trials +cat ${test_data_dir}/trials | awk '{print $2, $0}' | \ + utils/filter_scp.pl ${test_data_dir}_female/utt2spk | cut -d ' ' -f 2- \ + > $trials_female +trials_male=${test_data_dir}_male/trials +cat ${test_data_dir}/trials | awk '{print $2, $0}' | \ + utils/filter_scp.pl ${test_data_dir}_male/utt2spk | cut -d ' ' -f 2- \ + > $trials_male + +mkdir -p ${test_ivec_dir}_male +mkdir -p ${test_ivec_dir}_female +mkdir -p ${enroll_ivec_dir}_male +mkdir -p ${enroll_ivec_dir}_female + +# Partition the i-vectors into male and female subsets. +utils/filter_scp.pl ${enroll_data_dir}_male/utt2spk \ + ${enroll_ivec_dir}/ivector.scp > ${enroll_ivec_dir}_male/ivector.scp +utils/filter_scp.pl ${test_data_dir}_male/utt2spk \ + ${test_ivec_dir}/ivector.scp > ${test_ivec_dir}_male/ivector.scp +utils/filter_scp.pl ${enroll_data_dir}_female/utt2spk \ + ${enroll_ivec_dir}/ivector.scp > ${enroll_ivec_dir}_female/ivector.scp +utils/filter_scp.pl ${test_data_dir}_female/utt2spk \ + ${test_ivec_dir}/ivector.scp > ${test_ivec_dir}_female/ivector.scp +utils/filter_scp.pl ${enroll_data_dir}_male/spk2utt \ + ${enroll_ivec_dir}/spk_ivector.scp > ${enroll_ivec_dir}_male/spk_ivector.scp +utils/filter_scp.pl ${enroll_data_dir}_female/spk2utt \ + ${enroll_ivec_dir}/spk_ivector.scp > ${enroll_ivec_dir}_female/spk_ivector.scp +utils/filter_scp.pl ${enroll_data_dir}_male/spk2utt \ + ${enroll_ivec_dir}/num_utts.ark > ${enroll_ivec_dir}_male/num_utts.ark +utils/filter_scp.pl ${enroll_data_dir}_female/spk2utt \ + ${enroll_ivec_dir}/num_utts.ark > ${enroll_ivec_dir}_female/num_utts.ark + +# Compute gender independent and dependent i-vector means. +ivector-mean scp:${plda_ivec_dir}/ivector.scp ${plda_ivec_dir}/mean.vec + +rm -rf local/.tmp diff --git a/egs/fame/v1/path.sh b/egs/fame/v1/path.sh new file mode 100755 index 000000000..2d17b17a8 --- /dev/null +++ b/egs/fame/v1/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/fame/v1/run.sh b/egs/fame/v1/run.sh new file mode 100755 index 000000000..34c425adc --- /dev/null +++ b/egs/fame/v1/run.sh @@ -0,0 +1,300 @@ +#!/bin/bash +# Copyright 2015 David Snyder +# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) +# 2015 Johns Hopkins University (Author: Daniel Povey) +# 2017 Radboud University (Author Emre Yilmaz) +# Apache 2.0. +# +# See README.txt for more info on data required. +# Results (EERs) are inline in comments below. + +. ./cmd.sh +. ./path.sh +set -e + +mfccdir=`pwd`/mfcc +vaddir=`pwd`/mfcc +famecorpus=./corpus +num_components=2048 + +# Data preparation + +if [ -d $famecorpus ] ; then + echo "Fame corpus present. OK." +elif [ -f ./fame.tar.gz ] ; then + echo "Unpacking..." + tar xzf fame.tar.gz +elif [ ! -d $famecorpus ] && [ ! -f ./fame.tar.gz ] ; then + echo "The Fame! corpus is not present. Please register here: http://www.ru.nl/clst/datasets/ " + echo " and download the corpus and put it at $famecorpus" && exit 1 +fi + +echo "Preparing data/train.." +local/prepare_train.sh $famecorpus/SC + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Preparing data/fame_${task}_${subtask}_${sets}.." + trials_female=data/fame_${task}_${subtask}_${sets}_female/trials + trials_male=data/fame_${task}_${subtask}_${sets}_male/trials + trials=data/fame_${task}_${subtask}_${sets}/trials + local/make_fame_test.pl $famecorpus/SV data $task $subtask $sets + local/make_fame_train.pl $famecorpus/SV data $task $subtask $sets + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Preparing data/fame_${task}_${subtask}_${sets}${year}.." + trials_female=data/fame_${task}_${subtask}_${sets}${year}_female/trials + trials_male=data/fame_${task}_${subtask}_${sets}${year}_male/trials + trials=data/fame_${task}_${subtask}_${sets}${year}/trials + local/make_fame_test_year.pl $famecorpus/SV data $task $subtask $sets $year + local/make_fame_train_year.pl $famecorpus/SV data $task $subtask $sets $year + + done + done + done +done + +# MFCC extraction + +echo "Extracting MFCC features for data/train.." + +steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/train exp/make_mfcc $mfccdir +utils/fix_data_dir.sh data/train + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Extracting MFCC features for data/fame_${task}_${subtask}_${sets}.." + steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_enroll exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_enroll + steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_test exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_test + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Extracting MFCC features for data/fame_${task}_${subtask}_${sets}${year}.." + steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_enroll exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_enroll + steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_test exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_test + + done + done + done +done + +# VAD computation + +echo "Computing VAD for data/train.." + +sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/train exp/make_vad $vaddir + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Computing VAD for data/fame_${task}_${subtask}_${sets}.." + sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_enroll exp/make_vad $vaddir + sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_test exp/make_vad $vaddir + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Computing VAD for data/fame_${task}_${subtask}_${sets}${year}.." + sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_enroll exp/make_vad $vaddir + sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_test exp/make_vad $vaddir + + done + done + done +done + + +# Train UBM and i-vector extractor + +echo "Training UBM and the i-vector extractor.." + +sid/train_diag_ubm.sh --nj 40 --cmd "$train_cmd" \ + data/train $num_components \ + exp/diag_ubm_$num_components + +sid/train_full_ubm.sh --nj 40 --remove-low-count-gaussians false \ + --cmd "$train_cmd" data/train \ + exp/diag_ubm_$num_components exp/full_ubm_$num_components + +sid/train_ivector_extractor.sh --cmd "$train_cmd" --nj 20 --num-threads 4 --num-processes 2 \ + --ivector-dim 600 \ + --num-iters 5 exp/full_ubm_$num_components/final.ubm data/train \ + exp/extractor + +# Extract i-vectors + +echo "Extracting i-vectors for data/train.." + +sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \ + exp/extractor data/train \ + exp/ivectors_train + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Extracting i-vectors for data/fame_${task}_${subtask}_${sets}" + sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \ + exp/extractor data/fame_${task}_${subtask}_${sets}_enroll \ + exp/ivectors_fame_${task}_${subtask}_${sets}_enroll + sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \ + exp/extractor data/fame_${task}_${subtask}_${sets}_test \ + exp/ivectors_fame_${task}_${subtask}_${sets}_test + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Extracting i-vectors for data/fame_${task}_${subtask}_${sets}${year}" + sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \ + exp/extractor data/fame_${task}_${subtask}_${sets}${year}_enroll \ + exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll + sid/extract_ivectors.sh --cmd "$train_cmd" --nj 100 \ + exp/extractor data/fame_${task}_${subtask}_${sets}${year}_test \ + exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test + + done + done + done +done + +# Calculate i-vector means used by the scoring scripts + +echo "Calculating i-vectors means.." + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + local/scoring_common.sh data/train data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_test \ + exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}_enroll exp/ivectors_fame_${task}_${subtask}_${sets}_test + + trials_female=data/fame_${task}_${subtask}_${sets}_test_female/trials + trials_male=data/fame_${task}_${subtask}_${sets}_test_male/trials + trials=data/fame_${task}_${subtask}_${sets}_test/trials + + local/plda_scoring.sh data/train data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_test \ + exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}_enroll exp/ivectors_fame_${task}_${subtask}_${sets}_test $trials local/scores_gmm_2048_ind_pooled_${task}_${subtask}_${sets} + + local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}_enroll_female data/fame_${task}_${subtask}_${sets}_test_female \ + exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_female exp/ivectors_fame_${task}_${subtask}_${sets}_test_female $trials_female local/scores_gmm_2048_ind_female_${task}_${subtask}_${sets} + + local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}_enroll_male data/fame_${task}_${subtask}_${sets}_test_male \ + exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_male exp/ivectors_fame_${task}_${subtask}_${sets}_test_male $trials_male local/scores_gmm_2048_ind_male_${task}_${subtask}_${sets} + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + local/scoring_common.sh data/train data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_test \ + exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test + + trials_female=data/fame_${task}_${subtask}_${sets}${year}_test_female/trials + trials_male=data/fame_${task}_${subtask}_${sets}${year}_test_male/trials + trials=data/fame_${task}_${subtask}_${sets}${year}_test/trials + + local/plda_scoring.sh data/train data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_test \ + exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test $trials local/scores_gmm_2048_ind_pooled_${task}_${subtask}_${sets}${year} + + local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}${year}_enroll_female data/fame_${task}_${subtask}_${sets}${year}_test_female \ + exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_female exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_female $trials_female local/scores_gmm_2048_ind_female_${task}_${subtask}_${sets}${year} + + local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}${year}_enroll_male data/fame_${task}_${subtask}_${sets}${year}_test_male \ + exp/ivectors_train exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_male exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_male $trials_male local/scores_gmm_2048_ind_male_${task}_${subtask}_${sets}${year} + + done + done + done +done + +# Calculating EER + +echo "Calculating EER.." + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + trials=data/fame_${task}_${subtask}_${sets}_test/trials + echo "GMM-$num_components EER for fame_${task}_${subtask}_${sets}" + for x in ind; do + for y in female male pooled; do + echo "python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}_${task}_${subtask}_${sets}/plda_scores" + eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}_${task}_${subtask}_${sets}/plda_scores) 2> /dev/null` + echo "${x} ${y}: $eer" + done + done + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + trials=data/fame_${task}_${subtask}_${sets}${year}_test/trials + echo "GMM-$num_components EER for fame_${task}_${subtask}_${sets}${year}" + for x in ind; do + for y in female male pooled; do + echo "python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}_${task}_${subtask}_${sets}${year}/plda_scores" + eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_gmm_${num_components}_${x}_${y}_${task}_${subtask}_${sets}${year}/plda_scores) 2> /dev/null` + echo "${x} ${y}: $eer" + done + done + + done + done + done +done diff --git a/egs/fame/v1/sid b/egs/fame/v1/sid new file mode 120000 index 000000000..893a12f30 --- /dev/null +++ b/egs/fame/v1/sid @@ -0,0 +1 @@ +../../sre08/v1/sid \ No newline at end of file diff --git a/egs/fame/v1/steps b/egs/fame/v1/steps new file mode 120000 index 000000000..6e99bf5b5 --- /dev/null +++ b/egs/fame/v1/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/fame/v1/utils b/egs/fame/v1/utils new file mode 120000 index 000000000..b24088521 --- /dev/null +++ b/egs/fame/v1/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file diff --git a/egs/fame/v2/RESULTS b/egs/fame/v2/RESULTS new file mode 100644 index 000000000..375bb0b40 --- /dev/null +++ b/egs/fame/v2/RESULTS @@ -0,0 +1,105 @@ +DNN EER for fame_complete_3sec_eval +python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_dnn_ind_female_complete_3sec_eval/plda_scores +ind female: 20.71 +python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_dnn_ind_male_complete_3sec_eval/plda_scores +ind male: 13.6 +python local/prepare_for_eer.py data/fame_complete_3sec_eval_test/trials local/scores_dnn_ind_pooled_complete_3sec_eval/plda_scores +ind pooled: 16.89 +DNN EER for fame_complete_10sec_eval +python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_dnn_ind_female_complete_10sec_eval/plda_scores +ind female: 13.21 +python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_dnn_ind_male_complete_10sec_eval/plda_scores +ind male: 7.391 +python local/prepare_for_eer.py data/fame_complete_10sec_eval_test/trials local/scores_dnn_ind_pooled_complete_10sec_eval/plda_scores +ind pooled: 9.929 +DNN EER for fame_complete_30sec_eval +python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_dnn_ind_female_complete_30sec_eval/plda_scores +ind female: 10.27 +python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_dnn_ind_male_complete_30sec_eval/plda_scores +ind male: 4.963 +python local/prepare_for_eer.py data/fame_complete_30sec_eval_test/trials local/scores_dnn_ind_pooled_complete_30sec_eval/plda_scores +ind pooled: 7.469 +DNN EER for fame_ageing_3sec_eval +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_dnn_ind_female_ageing_3sec_eval/plda_scores +ind female: 20.05 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_dnn_ind_male_ageing_3sec_eval/plda_scores +ind male: 14.52 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_test/trials local/scores_dnn_ind_pooled_ageing_3sec_eval/plda_scores +ind pooled: 17.71 +DNN EER for fame_ageing_10sec_eval +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_dnn_ind_female_ageing_10sec_eval/plda_scores +ind female: 12.06 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_dnn_ind_male_ageing_10sec_eval/plda_scores +ind male: 7.947 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_test/trials local/scores_dnn_ind_pooled_ageing_10sec_eval/plda_scores +ind pooled: 10.35 +DNN EER for fame_ageing_30sec_eval +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_dnn_ind_female_ageing_30sec_eval/plda_scores +ind female: 7.518 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_dnn_ind_male_ageing_30sec_eval/plda_scores +ind male: 4.624 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_test/trials local/scores_dnn_ind_pooled_ageing_30sec_eval/plda_scores +ind pooled: 7.118 +DNN EER for fame_ageing_3sec_eval_1t3 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_dnn_ind_female_ageing_3sec_eval_1t3/plda_scores +ind female: 20.57 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_dnn_ind_male_ageing_3sec_eval_1t3/plda_scores +ind male: 10.88 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_1t3_test/trials local/scores_dnn_ind_pooled_ageing_3sec_eval_1t3/plda_scores +ind pooled: 16.38 +DNN EER for fame_ageing_3sec_eval_4t10 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_dnn_ind_female_ageing_3sec_eval_4t10/plda_scores +ind female: 18.02 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_dnn_ind_male_ageing_3sec_eval_4t10/plda_scores +ind male: 14.48 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_4t10_test/trials local/scores_dnn_ind_pooled_ageing_3sec_eval_4t10/plda_scores +ind pooled: 16.54 +DNN EER for fame_ageing_3sec_eval_mt10 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_dnn_ind_female_ageing_3sec_eval_mt10/plda_scores +ind female: 21.83 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_dnn_ind_male_ageing_3sec_eval_mt10/plda_scores +ind male: 17.1 +python local/prepare_for_eer.py data/fame_ageing_3sec_eval_mt10_test/trials local/scores_dnn_ind_pooled_ageing_3sec_eval_mt10/plda_scores +ind pooled: 20.09 +DNN EER for fame_ageing_10sec_eval_1t3 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_dnn_ind_female_ageing_10sec_eval_1t3/plda_scores +ind female: 12.81 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_dnn_ind_male_ageing_10sec_eval_1t3/plda_scores +ind male: 5.076 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_1t3_test/trials local/scores_dnn_ind_pooled_ageing_10sec_eval_1t3/plda_scores +ind pooled: 9.946 +DNN EER for fame_ageing_10sec_eval_4t10 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_dnn_ind_female_ageing_10sec_eval_4t10/plda_scores +ind female: 9.812 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_dnn_ind_male_ageing_10sec_eval_4t10/plda_scores +ind male: 9.193 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_4t10_test/trials local/scores_dnn_ind_pooled_ageing_10sec_eval_4t10/plda_scores +ind pooled: 9.746 +DNN EER for fame_ageing_10sec_eval_mt10 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_dnn_ind_female_ageing_10sec_eval_mt10/plda_scores +ind female: 13.19 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_dnn_ind_male_ageing_10sec_eval_mt10/plda_scores +ind male: 7.711 +python local/prepare_for_eer.py data/fame_ageing_10sec_eval_mt10_test/trials local/scores_dnn_ind_pooled_ageing_10sec_eval_mt10/plda_scores +ind pooled: 11.04 +DNN EER for fame_ageing_30sec_eval_1t3 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_dnn_ind_female_ageing_30sec_eval_1t3/plda_scores +ind female: 8.882 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_dnn_ind_male_ageing_30sec_eval_1t3/plda_scores +ind male: 2.212 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_1t3_test/trials local/scores_dnn_ind_pooled_ageing_30sec_eval_1t3/plda_scores +ind pooled: 7.547 +DNN EER for fame_ageing_30sec_eval_4t10 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_dnn_ind_female_ageing_30sec_eval_4t10/plda_scores +ind female: 5.155 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_dnn_ind_male_ageing_30sec_eval_4t10/plda_scores +ind male: 5.461 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_4t10_test/trials local/scores_dnn_ind_pooled_ageing_30sec_eval_4t10/plda_scores +ind pooled: 6.021 +DNN EER for fame_ageing_30sec_eval_mt10 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_dnn_ind_female_ageing_30sec_eval_mt10/plda_scores +ind female: 8.244 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_dnn_ind_male_ageing_30sec_eval_mt10/plda_scores +ind male: 4.202 +python local/prepare_for_eer.py data/fame_ageing_30sec_eval_mt10_test/trials local/scores_dnn_ind_pooled_ageing_30sec_eval_mt10/plda_scores +ind pooled: 7.737 diff --git a/egs/fame/v2/cmd.sh b/egs/fame/v2/cmd.sh new file mode 100644 index 000000000..23721d04c --- /dev/null +++ b/egs/fame/v2/cmd.sh @@ -0,0 +1,23 @@ +# you can change cmd.sh depending on what type of queue you are using. +# If you have no queueing system and want to run on a local machine, you +# can change all instances 'queue.pl' to run.pl (but be careful and run +# commands one by one: most recipes will exhaust the memory on your +# machine). queue.pl works with GridEngine (qsub). slurm.pl works +# with slurm. Different queues are configured differently, with different +# queue names and different ways of specifying things like memory; +# to account for these differences you can create and edit the file +# conf/queue.conf to match your queue's configuration. Search for +# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information, +# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl. + +export train_cmd=queue.pl +export decode_cmd="queue.pl --mem 2G" +# the use of cuda_cmd is deprecated, used only in 'nnet1', +export cuda_cmd="queue.pl --gpu 1" + +if [ "$(hostname -d)" == "fit.vutbr.cz" ]; then + queue_conf=$HOME/queue_conf/default.conf # see example /homes/kazi/iveselyk/queue_conf/default.conf, + export train_cmd="queue.pl --config $queue_conf --mem 2G --matylda 0.2" + export decode_cmd="queue.pl --config $queue_conf --mem 3G --matylda 0.1" + export cuda_cmd="queue.pl --config $queue_conf --gpu 1 --mem 10G --tmp 40G" +fi diff --git a/egs/fame/v2/conf/decode.config b/egs/fame/v2/conf/decode.config new file mode 100644 index 000000000..2f36bcc30 --- /dev/null +++ b/egs/fame/v2/conf/decode.config @@ -0,0 +1,3 @@ +beam=11.0 # beam for decoding. Was 13.0 in the scripts. +first_beam=8.0 # beam for 1st-pass decoding in SAT. +lattice_beam=6.0 diff --git a/egs/fame/v2/conf/decode_dnn.config b/egs/fame/v2/conf/decode_dnn.config new file mode 100644 index 000000000..ab8dcc1dc --- /dev/null +++ b/egs/fame/v2/conf/decode_dnn.config @@ -0,0 +1,2 @@ +beam=13.0 # beam for decoding. Was 13.0 in the scripts. +lattice_beam=8.0 # this has most effect on size of the lattices. diff --git a/egs/fame/v2/conf/mfcc_16k.conf b/egs/fame/v2/conf/mfcc_16k.conf new file mode 100644 index 000000000..2436c264f --- /dev/null +++ b/egs/fame/v2/conf/mfcc_16k.conf @@ -0,0 +1,4 @@ +--low-freq=20 # the default. +--high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case). +--num-ceps=20 # higher than the default which is 12. +--snip-edges=false diff --git a/egs/fame/v2/conf/mfcc_asr.conf b/egs/fame/v2/conf/mfcc_asr.conf new file mode 100644 index 000000000..16205681e --- /dev/null +++ b/egs/fame/v2/conf/mfcc_asr.conf @@ -0,0 +1,2 @@ +--use-energy=false # only non-default option. +--snip-edges=false diff --git a/egs/fame/v2/conf/mfcc_hires_16k.conf b/egs/fame/v2/conf/mfcc_hires_16k.conf new file mode 100644 index 000000000..6dacd1c74 --- /dev/null +++ b/egs/fame/v2/conf/mfcc_hires_16k.conf @@ -0,0 +1,11 @@ +# config for high-resolution MFCC features, intended for neural network training +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=20 # low cutoff frequency for mel bins... this is high-bandwidth data, so + # there might be some information at the low end. +--high-freq=7600 # high cutoff frequently, relative to Nyquist of 8000 (=7600) +--snip-edges=false diff --git a/egs/fame/v2/conf/vad.conf b/egs/fame/v2/conf/vad.conf new file mode 100644 index 000000000..a0ca2449b --- /dev/null +++ b/egs/fame/v2/conf/vad.conf @@ -0,0 +1,2 @@ +--vad-energy-threshold=5.5 +--vad-energy-mean-scale=0.5 diff --git a/egs/fame/v2/local b/egs/fame/v2/local new file mode 120000 index 000000000..ce1cbf907 --- /dev/null +++ b/egs/fame/v2/local @@ -0,0 +1 @@ +../v1/local \ No newline at end of file diff --git a/egs/fame/v2/path.sh b/egs/fame/v2/path.sh new file mode 100755 index 000000000..2d17b17a8 --- /dev/null +++ b/egs/fame/v2/path.sh @@ -0,0 +1,6 @@ +export KALDI_ROOT=`pwd`/../../.. +[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh +export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH +[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 +. $KALDI_ROOT/tools/config/common_path.sh +export LC_ALL=C diff --git a/egs/fame/v2/run.sh b/egs/fame/v2/run.sh new file mode 100755 index 000000000..17127a643 --- /dev/null +++ b/egs/fame/v2/run.sh @@ -0,0 +1,402 @@ +#!/bin/bash +# Copyright 2015-2016 David Snyder +# 2015 Johns Hopkins University (Author: Daniel Garcia-Romero) +# 2015 Johns Hopkins University (Author: Daniel Povey) +# 2017 Radboud University (Author: Emre Yilmaz) +# Apache 2.0. +# +# See README.txt for more info on data required. +# Results (EERs) are inline in comments below. +# +# This example script shows how to replace the GMM-UBM +# with a DNN trained for ASR. + +. cmd.sh +. path.sh +set -e +mfccdir=`pwd`/mfcc +vaddir=`pwd`/mfcc +nnet=exp/nnet2_online/nnet_ms_a/final.mdl +famecorpus=./corpus + +# Data preparation + +if [ -d $famecorpus ] ; then + echo "Fame corpus present. OK." +elif [ -f ./fame.tar.gz ] ; then + echo "Unpacking..." + tar xzf fame.tar.gz +elif [ ! -d $famecorpus ] && [ ! -f ./fame.tar.gz ] ; then + echo "The Fame! corpus is not present. Please register here: http://www.ru.nl/clst/datasets/ " + echo " and download the corpus and put it at $famecorpus" && exit 1 +fi + +# Train a DNN on about 10 hours of Frisian-Dutch speech. + +local/dnn/train_dnn.sh + +echo "Preparing data/train.." +local/prepare_train.sh $famecorpus/SC + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Preparing data/fame_${task}_${subtask}_${sets}.." + trials_female=data/fame_${task}_${subtask}_${sets}_female/trials + trials_male=data/fame_${task}_${subtask}_${sets}_male/trials + trials=data/fame_${task}_${subtask}_${sets}/trials + local/make_fame_test.pl $famecorpus/SV data $task $subtask $sets + local/make_fame_train.pl $famecorpus/SV data $task $subtask $sets + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Preparing data/fame_${task}_${subtask}_${sets}${year}.." + trials_female=data/fame_${task}_${subtask}_${sets}${year}_female/trials + trials_male=data/fame_${task}_${subtask}_${sets}${year}_male/trials + trials=data/fame_${task}_${subtask}_${sets}${year}/trials + local/make_fame_test_year.pl $famecorpus/SV data $task $subtask $sets $year + local/make_fame_train_year.pl $famecorpus/SV data $task $subtask $sets $year + + done + done + done +done + +echo "Copying data/train.." + +cp -r data/train data/train_dnn + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Copying data/fame_${task}_${subtask}_${sets}.." + cp -r data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_enroll_dnn + cp -r data/fame_${task}_${subtask}_${sets}_test data/fame_${task}_${subtask}_${sets}_test_dnn + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Copying data/fame_${task}_${subtask}_${sets}${year}.." + cp -r data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn + cp -r data/fame_${task}_${subtask}_${sets}${year}_test data/fame_${task}_${subtask}_${sets}${year}_test_dnn + + done + done + done +done + +# MFCC extraction + +echo "Extracting MFCC features for data/train.." + +steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/train exp/make_mfcc $mfccdir +utils/fix_data_dir.sh data/train + +steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \ + data/train_dnn exp/make_mfcc $mfccdir +utils/fix_data_dir.sh data/train_dnn + + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Extracting MFCC features for data/fame_${task}_${subtask}_${sets}.." + steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_enroll exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_enroll + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_enroll_dnn exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_enroll_dnn + + steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_test exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_test + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_test_dnn exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}_test_dnn + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Extracting MFCC features for data/fame_${task}_${subtask}_${sets}${year}.." + steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_enroll exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_enroll + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn + + steps/make_mfcc.sh --mfcc-config conf/mfcc_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_test exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_test + steps/make_mfcc.sh --mfcc-config conf/mfcc_hires_16k.conf --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_test_dnn exp/make_mfcc $mfccdir + utils/fix_data_dir.sh data/fame_${task}_${subtask}_${sets}${year}_test_dnn + + done + done + done +done + +# VAD computation + +echo "Computing VAD for data/train.." + +sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/train exp/make_vad $vaddir + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Computing VAD for data/fame_${task}_${subtask}_${sets}.." + sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_enroll exp/make_vad $vaddir + sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}_test exp/make_vad $vaddir + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Computing VAD for data/fame_${task}_${subtask}_${sets}${year}.." + sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_enroll exp/make_vad $vaddir + sid/compute_vad_decision.sh --nj 100 --cmd "$train_cmd" \ + data/fame_${task}_${subtask}_${sets}${year}_test exp/make_vad $vaddir + + done + done + done +done + +echo "Copying VAD for data/train.." +cp data/train/vad.scp data/train_dnn/vad.scp +cp data/train/utt2spk data/train_dnn/utt2spk +cp data/train/spk2utt data/train_dnn/spk2utt + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Copying VAD for data/fame_${task}_${subtask}_${sets}.." + cp data/fame_${task}_${subtask}_${sets}_enroll/vad.scp data/fame_${task}_${subtask}_${sets}_enroll_dnn/vad.scp + cp data/fame_${task}_${subtask}_${sets}_test/vad.scp data/fame_${task}_${subtask}_${sets}_test_dnn/vad.scp + cp data/fame_${task}_${subtask}_${sets}_enroll/utt2spk data/fame_${task}_${subtask}_${sets}_enroll_dnn/utt2spk + cp data/fame_${task}_${subtask}_${sets}_test/utt2spk data/fame_${task}_${subtask}_${sets}_test_dnn/utt2spk + cp data/fame_${task}_${subtask}_${sets}_enroll/spk2utt data/fame_${task}_${subtask}_${sets}_enroll_dnn/spk2utt + cp data/fame_${task}_${subtask}_${sets}_test/spk2utt data/fame_${task}_${subtask}_${sets}_test_dnn/spk2utt + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Copying VAD for data/fame_${task}_${subtask}_${sets}${year}.." + cp data/fame_${task}_${subtask}_${sets}${year}_enroll/vad.scp data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn/vad.scp + cp data/fame_${task}_${subtask}_${sets}${year}_test/vad.scp data/fame_${task}_${subtask}_${sets}${year}_test_dnn/vad.scp + cp data/fame_${task}_${subtask}_${sets}${year}_enroll/utt2spk data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn/utt2spk + cp data/fame_${task}_${subtask}_${sets}${year}_test/utt2spk data/fame_${task}_${subtask}_${sets}${year}_test_dnn/utt2spk + cp data/fame_${task}_${subtask}_${sets}${year}_enroll/spk2utt data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn/spk2utt + cp data/fame_${task}_${subtask}_${sets}${year}_test/spk2utt data/fame_${task}_${subtask}_${sets}${year}_test_dnn/spk2utt + + done + done + done +done + +# Train UBM and i-vector extractor + +echo "Training DNN-UBM and the i-vector extractor.." + +sid/init_full_ubm_from_dnn.sh --cmd "$train_cmd" \ + data/train data/train_dnn $nnet exp/full_ubm + +sid/train_ivector_extractor_dnn.sh \ + --cmd "$train_cmd" \ + --min-post 0.015 \ + --ivector-dim 600 \ + --num-iters 5 exp/full_ubm/final.ubm $nnet \ + data/train \ + data/train_dnn \ + exp/extractor_dnn + +# Extract i-vectors. + +echo "Extracting i-vectors for data/train.." + +sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 exp/extractor_dnn $nnet data/train data/train_dnn exp/ivectors_train_dnn + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + echo "Extracting i-vectors for data/fame_${task}_${subtask}_${sets}" + sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 \ + exp/extractor_dnn \ + $nnet \ + data/fame_${task}_${subtask}_${sets}_enroll \ + data/fame_${task}_${subtask}_${sets}_enroll_dnn \ + exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn + sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 \ + exp/extractor_dnn \ + $nnet \ + data/fame_${task}_${subtask}_${sets}_test \ + data/fame_${task}_${subtask}_${sets}_test_dnn \ + exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + echo "Extracting i-vectors for data/fame_${task}_${subtask}_${sets}${year}" + sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 \ + exp/extractor_dnn \ + $nnet \ + data/fame_${task}_${subtask}_${sets}${year}_enroll \ + data/fame_${task}_${subtask}_${sets}${year}_enroll_dnn \ + exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn + sid/extract_ivectors_dnn.sh --cmd "$train_cmd" --nj 10 \ + exp/extractor_dnn \ + $nnet \ + data/fame_${task}_${subtask}_${sets}${year}_test \ + data/fame_${task}_${subtask}_${sets}${year}_test_dnn \ + exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn + + done + done + done +done + +# Calculate i-vector means used by the scoring scripts + +echo "Calculating i-vectors means.." + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + local/scoring_common.sh data/train data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_test \ + exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn + + trials_female=data/fame_${task}_${subtask}_${sets}_test_female/trials + trials_male=data/fame_${task}_${subtask}_${sets}_test_male/trials + trials=data/fame_${task}_${subtask}_${sets}_test/trials + + local/plda_scoring.sh data/train data/fame_${task}_${subtask}_${sets}_enroll data/fame_${task}_${subtask}_${sets}_test \ + exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn $trials local/scores_dnn_ind_pooled_${task}_${subtask}_${sets} + + local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}_enroll_female data/fame_${task}_${subtask}_${sets}_test_female \ + exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn_female exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn_female $trials_female local/scores_dnn_ind_female_${task}_${subtask}_${sets} + + local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}_enroll_male data/fame_${task}_${subtask}_${sets}_test_male \ + exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}_enroll_dnn_male exp/ivectors_fame_${task}_${subtask}_${sets}_test_dnn_male $trials_male local/scores_dnn_ind_male_${task}_${subtask}_${sets} + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + local/scoring_common.sh data/train data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_test \ + exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn + + trials_female=data/fame_${task}_${subtask}_${sets}${year}_test_female/trials + trials_male=data/fame_${task}_${subtask}_${sets}${year}_test_male/trials + trials=data/fame_${task}_${subtask}_${sets}${year}_test/trials + + local/plda_scoring.sh data/train data/fame_${task}_${subtask}_${sets}${year}_enroll data/fame_${task}_${subtask}_${sets}${year}_test \ + exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn $trials local/scores_dnn_ind_pooled_${task}_${subtask}_${sets}${year} + + local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}${year}_enroll_female data/fame_${task}_${subtask}_${sets}${year}_test_female \ + exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn_female exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn_female $trials_female local/scores_dnn_ind_female_${task}_${subtask}_${sets}${year} + + local/plda_scoring.sh --use-existing-models true data/train data/fame_${task}_${subtask}_${sets}${year}_enroll_male data/fame_${task}_${subtask}_${sets}${year}_test_male \ + exp/ivectors_train_dnn exp/ivectors_fame_${task}_${subtask}_${sets}${year}_enroll_dnn_male exp/ivectors_fame_${task}_${subtask}_${sets}${year}_test_dnn_male $trials_male local/scores_dnn_ind_male_${task}_${subtask}_${sets}${year} + + done + done + done +done + +# Calculating EER + +echo "Calculating EER.." + +for task in complete ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + + trials=data/fame_${task}_${subtask}_${sets}_test/trials + echo "DNN EER for fame_${task}_${subtask}_${sets}" + for x in ind; do + for y in female male pooled; do + echo "python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}_${task}_${subtask}_${sets}/plda_scores" + eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}_${task}_${subtask}_${sets}/plda_scores) 2> /dev/null` + echo "${x} ${y}: $eer" + done + done + + done + done +done + +for task in ageing; do + for subtask in 3sec 10sec 30sec; do + for sets in eval; do + for year in _1t3 _4t10 _mt10; do + + trials=data/fame_${task}_${subtask}_${sets}${year}_test/trials + echo "DNN EER for fame_${task}_${subtask}_${sets}${year}" + for x in ind; do + for y in female male pooled; do + echo "python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}_${task}_${subtask}_${sets}${year}/plda_scores" + eer=`compute-eer <(python local/prepare_for_eer.py $trials local/scores_dnn_${x}_${y}_${task}_${subtask}_${sets}${year}/plda_scores) 2> /dev/null` + echo "${x} ${y}: $eer" + done + done + + done + done + done +done diff --git a/egs/fame/v2/sid b/egs/fame/v2/sid new file mode 120000 index 000000000..893a12f30 --- /dev/null +++ b/egs/fame/v2/sid @@ -0,0 +1 @@ +../../sre08/v1/sid \ No newline at end of file diff --git a/egs/fame/v2/steps b/egs/fame/v2/steps new file mode 120000 index 000000000..6e99bf5b5 --- /dev/null +++ b/egs/fame/v2/steps @@ -0,0 +1 @@ +../../wsj/s5/steps \ No newline at end of file diff --git a/egs/fame/v2/utils b/egs/fame/v2/utils new file mode 120000 index 000000000..b24088521 --- /dev/null +++ b/egs/fame/v2/utils @@ -0,0 +1 @@ +../../wsj/s5/utils \ No newline at end of file -- 2.39.2