diff --git a/audiomate/corpus/io/common_voice.py b/audiomate/corpus/io/common_voice.py index a830fa6..aef984a 100644 --- a/audiomate/corpus/io/common_voice.py +++ b/audiomate/corpus/io/common_voice.py @@ -97,7 +97,8 @@ def get_subset_ids(path): # We don't want to include the invalidated files # since there maybe corrupt files - if basename != 'invalidated': + # reported utterances are also causing issue, e.g. empty entries + if basename not in ('invalidated', 'reported'): subset_ids.append(basename) return subset_ids