[egs] Update VoxCeleb Recipe (kaldi-asr#2403)

* [egs]: updating the voxceleb recipe so that it uses more of the available data, and uses a better performing wideband MFCC config * [egs]: fixing comment error in mfcc.conf * [egs] updating voxceleb/v1/run.sh results * [egs] changing url to download voxceleb1 test set from, updating READMEs * [egs] fixing comment in voxceleb/v2/run.sh * [egs] adding check that ffmpeg exists in voxceleb2 data prep
vimalmanohar · May 11, 2018 · 90363ea · 90363ea
1 parent 08b47be
commit 90363ea
Show file tree

Hide file tree

Showing 11 changed files with 268 additions and 194 deletions.
diff --git a/egs/sre16/v1/local/make_mx6_calls.pl b/egs/sre16/v1/local/make_mx6_calls.pl
@@ -39,6 +39,7 @@
 if (system("find $db_base/mx6_speech/data/ulaw_sphere/ -name '*.sph' > $tmp_dir/sph.list") != 0) {
   die "Error getting list of sph files";
 }
+
 open(SPHLIST, "<$tmp_dir/sph.list") or die "cannot open wav list";
 
 while(<SPHLIST>) {

diff --git a/egs/voxceleb/README.txt b/egs/voxceleb/README.txt
@@ -1,6 +1,13 @@
-This is a Kaldi recipe for speaker verification using the VoxCeleb1 and VoxCeleb2 corpora.
-See http://www.robots.ox.ac.uk/~vgg/data/voxceleb/ and 
-http://www.robots.ox.ac.uk/~vgg/data/voxceleb2/ for additional details and
-information on how to obtain them.
+
+ This is a Kaldi recipe for speaker verification using the VoxCeleb1 and
+ VoxCeleb2 corpora.  See http://www.robots.ox.ac.uk/~vgg/data/voxceleb/ and 
+ http://www.robots.ox.ac.uk/~vgg/data/voxceleb2/ for additional details and
+ information on how to obtain them.
 
-Note: This recipe requires ffmpeg to be installed and its location included in $PATH
+ Note: This recipe requires ffmpeg to be installed and its location included
+ in $PATH
+
+ The subdirectories "v1" and so on are different speaker recognition
+ recipes. The recipe in v1 demonstrates a standard approach using a
+ full-covariance GMM-UBM, iVectors, and a PLDA backend.  The example 
+ in v2 demonstrates DNN speaker embeddings with a PLDA backend.
diff --git a/egs/voxceleb/v1/conf/mfcc.conf b/egs/voxceleb/v1/conf/mfcc.conf
@@ -1,8 +1,7 @@
 --sample-frequency=16000
 --frame-length=25 # the default is 25
---num-mel-bins=40 #higher than the default which is 23
---num-ceps=20 # higher than the default which is 12.
---low-freq=50 # the default is 20.
---high-freq=7500 # the default is zero meaning use the Nyquist (8k in this case).
---num-ceps=20 # higher than the default which is 12.
+--low-freq=20 # the default.
+--high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case).
+--num-mel-bins=30
+--num-ceps=24
 --snip-edges=false
diff --git a/egs/voxceleb/v1/local/make_voxceleb1.pl b/egs/voxceleb/v1/local/make_voxceleb1.pl
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+#
+# Copyright 2018  Ewald Enzinger
+#           2018  David Snyder
+#
+# Usage: make_voxceleb1.pl /export/voxceleb1 data/
+
+if (@ARGV != 2) {
+  print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n";
+  print STDERR "e.g. $0 /export/voxceleb1 data/\n";
+  exit(1);
+}
+
+($data_base, $out_dir) = @ARGV;
+my $out_test_dir = "$out_dir/voxceleb1_test";
+my $out_train_dir = "$out_dir/voxceleb1_train";
+
+if (! -e "$data_base/voxceleb1_test.txt") {
+  system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt");
+}
+
+if (system("mkdir -p $out_test_dir") != 0) {
+  die "Error making directory $out_test_dir";
+}
+
+if (system("mkdir -p $out_train_dir") != 0) {
+  die "Error making directory $out_train_dir";
+}
+
+opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!";
+my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh);
+closedir $dh;
+
+open(TRIAL_IN, "<", "$data_base/voxceleb1_test.txt") or die "Could not open the verification trials file $data_base/voxceleb1_test.txt";
+open(SPKR_TEST, ">", "$out_test_dir/utt2spk") or die "Could not open the output file $out_test_dir/utt2spk";
+open(WAV_TEST, ">", "$out_test_dir/wav.scp") or die "Could not open the output file $out_test_dir/wav.scp";
+open(SPKR_TRAIN, ">", "$out_train_dir/utt2spk") or die "Could not open the output file $out_train_dir/utt2spk";
+open(WAV_TRAIN, ">", "$out_train_dir/wav.scp") or die "Could not open the output file $out_train_dir/wav.scp";
+open(TRIAL_OUT, ">", "$out_test_dir/trials") or die "Could not open the output file $out_test_dir/trials";
+
+my %test_utts = ();
+while (<TRIAL_IN>) {
+  chomp;
+  my ($tar_or_none, $path1, $path2) = split;
+
+  # Create entry for left-hand side of trial
+  my $wav = "$data_base/voxceleb1_wav/$path1";
+  my ($spkr_id, $filename) = split('/', $path1);
+  my $rec_id = substr($filename, 0, 11);
+  my $segment = substr($filename, 12, 7);
+  my $utt_id1 = "$spkr_id-$rec_id-$segment";
+  $test_utts{$utt_id1} = ();
+
+  # Create entry for right-hand side of trial
+  my $wav = "$data_base/voxceleb1_wav/$path2";
+  my ($spkr_id, $filename) = split('/', $path2);
+  my $rec_id = substr($filename, 0, 11);
+  my $segment = substr($filename, 12, 7);
+  my $utt_id2 = "$spkr_id-$rec_id-$segment";
+  $test_utts{$utt_id2} = ();
+
+  my $target = "nontarget";
+  if ($tar_or_none eq "1") {
+    $target = "target";
+  }
+  print TRIAL_OUT "$utt_id1 $utt_id2 $target\n";
+}
+
+foreach (@spkr_dirs) {
+  my $spkr_id = $_;
+  opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!";
+  my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh);
+  closedir $dh;
+  foreach (@files) {
+    my $filename = $_;
+    my $rec_id = substr($filename, 0, 11);
+    my $segment = substr($filename, 12, 7);
+    my $utt_id = "$spkr_id-$rec_id-$segment";
+    my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav";
+    if (exists $test_utts{$utt_id}) {
+      print WAV_TEST "$utt_id", " $wav", "\n";
+      print SPKR_TEST "$utt_id", " $spkr_id", "\n";
+    } else {
+      print WAV_TRAIN "$utt_id", " $wav", "\n";
+      print SPKR_TRAIN "$utt_id", " $spkr_id", "\n";
+    }
+  }
+}
+
+close(SPKR_TEST) or die;
+close(WAV_TEST) or die;
+close(SPKR_TRAIN) or die;
+close(WAV_TRAIN) or die;
+close(TRIAL_OUT) or die;
+close(TRIAL_IN) or die;
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_test_dir/utt2spk >$out_test_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_test_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_test_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_test_dir") != 0) {
+  die "Error validating directory $out_test_dir";
+}
+
+if (system(
+  "utils/utt2spk_to_spk2utt.pl $out_train_dir/utt2spk >$out_train_dir/spk2utt") != 0) {
+  die "Error creating spk2utt file in directory $out_train_dir";
+}
+system("env LC_COLLATE=C utils/fix_data_dir.sh $out_train_dir");
+if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_train_dir") != 0) {
+  die "Error validating directory $out_train_dir";
+}
diff --git a/egs/voxceleb/v1/local/make_voxceleb1_test.pl b/egs/voxceleb/v1/local/make_voxceleb1_test.pl
diff --git a/egs/voxceleb/v1/local/make_voxceleb2.pl b/egs/voxceleb/v1/local/make_voxceleb2.pl
@@ -12,6 +12,11 @@
   exit(1);
 }
 
+# Check that ffmpeg is installed.
+if (`which ffmpeg` eq "") {
+  die "Error: this script requires that ffmpeg is installed.";
+}
+
 ($data_base, $dataset, $out_dir) = @ARGV;
 
 if ("$dataset" ne "dev" && "$dataset" ne "test") {
@@ -22,9 +27,10 @@
 my @spkr_dirs = grep {-d "$data_base/$dataset/aac/$_" && ! /^\.{1,2}$/} readdir($dh);
 closedir $dh;
 
-if (! -d "$out_dir") {
-  mkdir($out_dir) or die "Could not create directory $!";
+if (system("mkdir -p $out_dir") != 0) {
+  die "Error making directory $out_dir";
 }
+
 open(SPKR, ">", "$out_dir/utt2spk") or die "Could not open the output file $out_dir/utt2spk";
 open(WAV, ">", "$out_dir/wav.scp") or die "Could not open the output file $out_dir/wav.scp";
 

diff --git a/egs/voxceleb/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh b/egs/voxceleb/v1/local/nnet3/xvector/tuning/run_xvector_1a.sh
@@ -53,7 +53,7 @@ num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l)
 # the number of archives and increases the number of examples per archive.
 # Decreasing this value increases the number of archives, while decreasing the
 # number of examples per archive.
-if [ $stage -le 4 ]; then
+if [ $stage -le 6 ]; then
   echo "$0: Getting neural network training egs";
   # dump egs.
   if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
@@ -68,11 +68,11 @@ if [ $stage -le 4 ]; then
     --min-frames-per-chunk 200 \
     --max-frames-per-chunk 400 \
     --num-diagnostic-archives 3 \
-    --num-repeats 35 \
+    --num-repeats 50 \
     "$data" $egs_dir
 fi
 
-if [ $stage -le 5 ]; then
+if [ $stage -le 7 ]; then
   echo "$0: creating neural net configs using the xconfig parser";
   num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}')
   feat_dim=$(cat $egs_dir/info/feat_dim)
@@ -129,7 +129,7 @@ fi
 
 dropout_schedule='0,[email protected],[email protected],0'
 srand=123
-if [ $stage -le 6 ]; then
+if [ $stage -le 8 ]; then
   steps/nnet3/train_raw_dnn.py --stage=$train_stage \
     --cmd="$train_cmd" \
     --trainer.optimization.proportional-shrink 10 \