From 4c4028c328ca36d95e3a161ceae490b963ea43f6 Mon Sep 17 00:00:00 2001 From: pere Date: Wed, 30 Oct 2024 09:19:36 +0100 Subject: [PATCH] Update run_pseudo_labelling.py Adding: token=training_args.hub_token --- training/run_pseudo_labelling.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/training/run_pseudo_labelling.py b/training/run_pseudo_labelling.py index a10ae5a..9823766 100644 --- a/training/run_pseudo_labelling.py +++ b/training/run_pseudo_labelling.py @@ -770,7 +770,7 @@ def prepare_dataset(batch): else: repo_name = training_args.hub_model_id create_repo(repo_name, repo_type="dataset", exist_ok=True, token=training_args.hub_token) - snapshot_download(repo_id=repo_name, repo_type="dataset", local_dir=output_dir) + snapshot_download(repo_id=repo_name, repo_type="dataset", local_dir=output_dir, token=training_args.hub_token) # Ensure large txt files can be pushed to the Hub with git-lfs with open(os.path.join(output_dir, ".gitattributes"), "r+") as f: @@ -920,6 +920,7 @@ def eval_step_with_save(split="eval"): folder_path=output_dir, repo_id=repo_name, repo_type="dataset", + token=training_args.hub_token, commit_message=f"Saving transcriptions for split {split} step {step}.", ) @@ -1008,12 +1009,13 @@ def add_concatenated_text(eval_preds, condition_on_prev): folder_path=output_dir, repo_id=repo_name, repo_type="dataset", + token=training_args.hub_token, commit_message=f"Saving final transcriptions for split {split.replace('.', '-').split('/')[-1]}", ) if not data_args.streaming and accelerator.is_main_process: raw_datasets.save_to_disk(output_dir, num_proc=num_workers) if training_args.push_to_hub: - raw_datasets.push_to_hub(repo_name, config_name=data_args.dataset_config_name) + raw_datasets.push_to_hub(repo_name, token=training_args.hub_token, config_name=data_args.dataset_config_name) accelerator.end_training()