Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
converting pathlib obj to string
  • Loading branch information
joey00072 committed Jun 10, 2024
1 parent 16941b3 commit d5f6dd3
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
2 changes: 1 addition & 1 deletion ohara/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(
fpath = str(
f"{self.dataset_name.replace('/','-')}--{self.tokenizer.name_or_path.replace('/','-')}"
)
fpath = path.joinpath(fpath).joinpath(split)
fpath = str(path.joinpath(fpath).joinpath(split))

self.ds = load_from_disk(fpath)
self.toks_cycle = cycle(self.ds)
Expand Down
3 changes: 2 additions & 1 deletion ohara/pretokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ def save_pre_tokenized_dataset(self, dataset, split):
fpath = str(
f"{self.dataset_name.replace('/','-')}--{self.tokenizer.name_or_path.replace('/','-')}"
)
dataset.save_to_disk(self.output_dir.joinpath(fpath).joinpath(split))
fpath = str(self.output_dir.joinpath(fpath).joinpath(split))
dataset.save_to_disk(fpath)

print(f"Dataset saved to {self.output_dir}")

Expand Down

0 comments on commit d5f6dd3

Please sign in to comment.