forked from kaldi-asr/kaldi
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[egs] Update VoxCeleb Recipe (kaldi-asr#2403)
* [egs]: updating the voxceleb recipe so that it uses more of the available data, and uses a better performing wideband MFCC config * [egs]: fixing comment error in mfcc.conf * [egs] updating voxceleb/v1/run.sh results * [egs] changing url to download voxceleb1 test set from, updating READMEs * [egs] fixing comment in voxceleb/v2/run.sh * [egs] adding check that ffmpeg exists in voxceleb2 data prep
- Loading branch information
1 parent
08b47be
commit 90363ea
Showing
11 changed files
with
268 additions
and
194 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,13 @@ | ||
This is a Kaldi recipe for speaker verification using the VoxCeleb1 and VoxCeleb2 corpora. | ||
See http://www.robots.ox.ac.uk/~vgg/data/voxceleb/ and | ||
http://www.robots.ox.ac.uk/~vgg/data/voxceleb2/ for additional details and | ||
information on how to obtain them. | ||
|
||
This is a Kaldi recipe for speaker verification using the VoxCeleb1 and | ||
VoxCeleb2 corpora. See http://www.robots.ox.ac.uk/~vgg/data/voxceleb/ and | ||
http://www.robots.ox.ac.uk/~vgg/data/voxceleb2/ for additional details and | ||
information on how to obtain them. | ||
|
||
Note: This recipe requires ffmpeg to be installed and its location included in $PATH | ||
Note: This recipe requires ffmpeg to be installed and its location included | ||
in $PATH | ||
|
||
The subdirectories "v1" and so on are different speaker recognition | ||
recipes. The recipe in v1 demonstrates a standard approach using a | ||
full-covariance GMM-UBM, iVectors, and a PLDA backend. The example | ||
in v2 demonstrates DNN speaker embeddings with a PLDA backend. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,7 @@ | ||
--sample-frequency=16000 | ||
--frame-length=25 # the default is 25 | ||
--num-mel-bins=40 #higher than the default which is 23 | ||
--num-ceps=20 # higher than the default which is 12. | ||
--low-freq=50 # the default is 20. | ||
--high-freq=7500 # the default is zero meaning use the Nyquist (8k in this case). | ||
--num-ceps=20 # higher than the default which is 12. | ||
--low-freq=20 # the default. | ||
--high-freq=7600 # the default is zero meaning use the Nyquist (8k in this case). | ||
--num-mel-bins=30 | ||
--num-ceps=24 | ||
--snip-edges=false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
#!/usr/bin/perl | ||
# | ||
# Copyright 2018 Ewald Enzinger | ||
# 2018 David Snyder | ||
# | ||
# Usage: make_voxceleb1.pl /export/voxceleb1 data/ | ||
|
||
if (@ARGV != 2) { | ||
print STDERR "Usage: $0 <path-to-voxceleb1> <path-to-data-dir>\n"; | ||
print STDERR "e.g. $0 /export/voxceleb1 data/\n"; | ||
exit(1); | ||
} | ||
|
||
($data_base, $out_dir) = @ARGV; | ||
my $out_test_dir = "$out_dir/voxceleb1_test"; | ||
my $out_train_dir = "$out_dir/voxceleb1_train"; | ||
|
||
if (! -e "$data_base/voxceleb1_test.txt") { | ||
system("wget -O $data_base/voxceleb1_test.txt http://www.openslr.org/resources/49/voxceleb1_test.txt"); | ||
} | ||
|
||
if (system("mkdir -p $out_test_dir") != 0) { | ||
die "Error making directory $out_test_dir"; | ||
} | ||
|
||
if (system("mkdir -p $out_train_dir") != 0) { | ||
die "Error making directory $out_train_dir"; | ||
} | ||
|
||
opendir my $dh, "$data_base/voxceleb1_wav" or die "Cannot open directory: $!"; | ||
my @spkr_dirs = grep {-d "$data_base/voxceleb1_wav/$_" && ! /^\.{1,2}$/} readdir($dh); | ||
closedir $dh; | ||
|
||
open(TRIAL_IN, "<", "$data_base/voxceleb1_test.txt") or die "Could not open the verification trials file $data_base/voxceleb1_test.txt"; | ||
open(SPKR_TEST, ">", "$out_test_dir/utt2spk") or die "Could not open the output file $out_test_dir/utt2spk"; | ||
open(WAV_TEST, ">", "$out_test_dir/wav.scp") or die "Could not open the output file $out_test_dir/wav.scp"; | ||
open(SPKR_TRAIN, ">", "$out_train_dir/utt2spk") or die "Could not open the output file $out_train_dir/utt2spk"; | ||
open(WAV_TRAIN, ">", "$out_train_dir/wav.scp") or die "Could not open the output file $out_train_dir/wav.scp"; | ||
open(TRIAL_OUT, ">", "$out_test_dir/trials") or die "Could not open the output file $out_test_dir/trials"; | ||
|
||
my %test_utts = (); | ||
while (<TRIAL_IN>) { | ||
chomp; | ||
my ($tar_or_none, $path1, $path2) = split; | ||
|
||
# Create entry for left-hand side of trial | ||
my $wav = "$data_base/voxceleb1_wav/$path1"; | ||
my ($spkr_id, $filename) = split('/', $path1); | ||
my $rec_id = substr($filename, 0, 11); | ||
my $segment = substr($filename, 12, 7); | ||
my $utt_id1 = "$spkr_id-$rec_id-$segment"; | ||
$test_utts{$utt_id1} = (); | ||
|
||
# Create entry for right-hand side of trial | ||
my $wav = "$data_base/voxceleb1_wav/$path2"; | ||
my ($spkr_id, $filename) = split('/', $path2); | ||
my $rec_id = substr($filename, 0, 11); | ||
my $segment = substr($filename, 12, 7); | ||
my $utt_id2 = "$spkr_id-$rec_id-$segment"; | ||
$test_utts{$utt_id2} = (); | ||
|
||
my $target = "nontarget"; | ||
if ($tar_or_none eq "1") { | ||
$target = "target"; | ||
} | ||
print TRIAL_OUT "$utt_id1 $utt_id2 $target\n"; | ||
} | ||
|
||
foreach (@spkr_dirs) { | ||
my $spkr_id = $_; | ||
opendir my $dh, "$data_base/voxceleb1_wav/$spkr_id/" or die "Cannot open directory: $!"; | ||
my @files = map{s/\.[^.]+$//;$_}grep {/\.wav$/} readdir($dh); | ||
closedir $dh; | ||
foreach (@files) { | ||
my $filename = $_; | ||
my $rec_id = substr($filename, 0, 11); | ||
my $segment = substr($filename, 12, 7); | ||
my $utt_id = "$spkr_id-$rec_id-$segment"; | ||
my $wav = "$data_base/voxceleb1_wav/$spkr_id/$filename.wav"; | ||
if (exists $test_utts{$utt_id}) { | ||
print WAV_TEST "$utt_id", " $wav", "\n"; | ||
print SPKR_TEST "$utt_id", " $spkr_id", "\n"; | ||
} else { | ||
print WAV_TRAIN "$utt_id", " $wav", "\n"; | ||
print SPKR_TRAIN "$utt_id", " $spkr_id", "\n"; | ||
} | ||
} | ||
} | ||
|
||
close(SPKR_TEST) or die; | ||
close(WAV_TEST) or die; | ||
close(SPKR_TRAIN) or die; | ||
close(WAV_TRAIN) or die; | ||
close(TRIAL_OUT) or die; | ||
close(TRIAL_IN) or die; | ||
|
||
if (system( | ||
"utils/utt2spk_to_spk2utt.pl $out_test_dir/utt2spk >$out_test_dir/spk2utt") != 0) { | ||
die "Error creating spk2utt file in directory $out_test_dir"; | ||
} | ||
system("env LC_COLLATE=C utils/fix_data_dir.sh $out_test_dir"); | ||
if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_test_dir") != 0) { | ||
die "Error validating directory $out_test_dir"; | ||
} | ||
|
||
if (system( | ||
"utils/utt2spk_to_spk2utt.pl $out_train_dir/utt2spk >$out_train_dir/spk2utt") != 0) { | ||
die "Error creating spk2utt file in directory $out_train_dir"; | ||
} | ||
system("env LC_COLLATE=C utils/fix_data_dir.sh $out_train_dir"); | ||
if (system("env LC_COLLATE=C utils/validate_data_dir.sh --no-text --no-feats $out_train_dir") != 0) { | ||
die "Error validating directory $out_train_dir"; | ||
} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,7 +53,7 @@ num_pdfs=$(awk '{print $2}' $data/utt2spk | sort | uniq -c | wc -l) | |
# the number of archives and increases the number of examples per archive. | ||
# Decreasing this value increases the number of archives, while decreasing the | ||
# number of examples per archive. | ||
if [ $stage -le 4 ]; then | ||
if [ $stage -le 6 ]; then | ||
echo "$0: Getting neural network training egs"; | ||
# dump egs. | ||
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then | ||
|
@@ -68,11 +68,11 @@ if [ $stage -le 4 ]; then | |
--min-frames-per-chunk 200 \ | ||
--max-frames-per-chunk 400 \ | ||
--num-diagnostic-archives 3 \ | ||
--num-repeats 35 \ | ||
--num-repeats 50 \ | ||
"$data" $egs_dir | ||
fi | ||
|
||
if [ $stage -le 5 ]; then | ||
if [ $stage -le 7 ]; then | ||
echo "$0: creating neural net configs using the xconfig parser"; | ||
num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}') | ||
feat_dim=$(cat $egs_dir/info/feat_dim) | ||
|
@@ -129,7 +129,7 @@ fi | |
|
||
dropout_schedule='0,[email protected],[email protected],0' | ||
srand=123 | ||
if [ $stage -le 6 ]; then | ||
if [ $stage -le 8 ]; then | ||
steps/nnet3/train_raw_dnn.py --stage=$train_stage \ | ||
--cmd="$train_cmd" \ | ||
--trainer.optimization.proportional-shrink 10 \ | ||
|
Oops, something went wrong.