Skip to content

Commit

Permalink
fix: set global cleaners as the union of dataset cleaners in the wizard
Browse files Browse the repository at this point in the history
this should be fixed at a later date by fixing #359
  • Loading branch information
roedoejet committed Jun 21, 2024
1 parent feba08f commit 9ef98dd
Showing 1 changed file with 12 additions and 6 deletions.
18 changes: 12 additions & 6 deletions everyvoice/wizard/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,19 @@ def effect(self):
symbols = {}
multispeaker = False
multilingual = False
global_cleaners = (
[]
) # TODO: this should be fixed by https://github.com/roedoejet/EveryVoice/issues/359
for dataset in [key for key in self.state.keys() if key.startswith("dataset_")]:
dataset_state = self.state[dataset]
# Add Cleaners
# TODO: these should really be dataset-specific cleaners, not global cleaners
# so this should be fixed by https://github.com/roedoejet/EveryVoice/issues/359
if dataset_state.get(StepNames.text_processing_step):
global_cleaners += [
TextProcessingStep().process_lookup[x]["fn"]
for x in dataset_state[StepNames.text_processing_step]
]
# Gather Symbols for Text Configuration
# rename keys based on dataset name:
dataset_name = dataset_state[StepNames.dataset_name_step]
Expand Down Expand Up @@ -263,12 +274,7 @@ def effect(self):
)

text_config = TextConfig(symbols=Symbols(**symbols))
# Add Cleaners
if dataset_state.get(StepNames.text_processing_step):
text_config.cleaners += [
TextProcessingStep().process_lookup[x]["fn"]
for x in dataset_state[StepNames.text_processing_step]
]
text_config.cleaners += global_cleaners
text_config_path = Path(f"{TEXT_CONFIG_FILENAME_PREFIX}.{self.response}")
write_dict_to_config(
json.loads(text_config.model_dump_json(exclude_none=False)),
Expand Down

0 comments on commit 9ef98dd

Please sign in to comment.