Skip to content

Commit

Permalink
fix: always validate permissions_obtained
Browse files Browse the repository at this point in the history
also add a unittest
  • Loading branch information
roedoejet committed Jun 21, 2024
1 parent 4555605 commit 919ab4a
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 7 deletions.
1 change: 1 addition & 0 deletions everyvoice/config/preprocessing_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class Dataset(PartialLoadConfig):
permissions_obtained: bool = Field(
False,
description="An attestation that permission has been obtained to use this data. You may not use EveryVoice to build a TTS system with data that you do not have permission to use and there are serious possible consequences for doing so. Finding data online does not constitute permission. The speaker should be aware and consent to their data being used in this way.",
validate_default=True,
)
data_dir: PossiblyRelativePath = Field(
Path("/please/create/a/path/to/your/dataset/data"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ source_data:
filelist: ../r-filelist.psv
filelist_loader: everyvoice.utils.generic_psv_filelist_reader
label: dataset_0
permissions_obtained: true
sox_effects:
- [channel, '1']
train_split: 0.9
7 changes: 6 additions & 1 deletion everyvoice/tests/test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,12 @@ def test_shared_sox(self) -> None:
vocoder_config = VocoderConfig(
contact=self.contact,
preprocessing=PreprocessingConfig(
source_data=[Dataset(), Dataset(), Dataset(), Dataset()]
source_data=[
Dataset(permissions_obtained=True),
Dataset(permissions_obtained=True),
Dataset(permissions_obtained=True),
Dataset(permissions_obtained=True),
]
),
)
config: EveryVoiceConfig = EveryVoiceConfig(
Expand Down
24 changes: 18 additions & 6 deletions everyvoice/tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ def test_run_doctest(self):
def test_read_filelist(self):
self.assertEqual(self.filelist[0]["basename"], "LJ050-0269")

def test_no_permissions(self):
no_permissions_args = self.fp_config.model_dump()
no_permissions_args["preprocessing"]["source_data"][0][
"permissions_obtained"
] = False
with self.assertRaises(ValueError):
FeaturePredictionConfig(**no_permissions_args)

def test_process_audio_for_alignment(self):
config = AlignerConfig(contact=self.contact)
for entry in self.filelist[1:]:
Expand Down Expand Up @@ -392,9 +400,9 @@ def test_text_processing(self):
preprocessed_dir.mkdir(parents=True, exist_ok=True)
output_filelist = preprocessed_dir / "preprocessed_filelist.psv"
shutil.copyfile(filelist_test_info["path"], output_filelist)
fp_config.preprocessing.source_data[0].filelist = (
filelist_test_info["path"]
)
fp_config.preprocessing.source_data[
0
].filelist = filelist_test_info["path"]
fp_config.preprocessing.save_dir = preprocessed_dir
preprocessor = Preprocessor(fp_config)
with capture_stdout() as output, mute_logger(
Expand Down Expand Up @@ -489,9 +497,13 @@ def test_incremental_preprocess(self):
with tempfile.TemporaryDirectory(
prefix="test_incremental_preprocess", dir="."
) as tmpdir:
fp_config, lj_filelist, full_filelist, partial_filelist, to_process = (
self.get_simple_config(tmpdir)
)
(
fp_config,
lj_filelist,
full_filelist,
partial_filelist,
to_process,
) = self.get_simple_config(tmpdir)

fp_config.preprocessing.source_data[0].filelist = partial_filelist
with capture_stdout() as output, mute_logger("everyvoice.preprocessor"):
Expand Down

0 comments on commit 919ab4a

Please sign in to comment.