Skip to content

Commit

Permalink
add support for s3 checkpoint: need a fix on check_path_is_local
Browse files Browse the repository at this point in the history
  • Loading branch information
eliebak committed Aug 22, 2024
1 parent 03d67f2 commit 5fd1c07
Show file tree
Hide file tree
Showing 8 changed files with 642 additions and 49 deletions.
22 changes: 22 additions & 0 deletions src/nanotron/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from dataclasses import dataclass, fields
from pathlib import Path
from datasets.download.streaming_download_manager import xPath
from typing import List, Optional, Type, Union

import dacite
Expand Down Expand Up @@ -91,6 +92,22 @@ def __post_init__(self):
self.hf_dataset_splits = "train"


@dataclass
class S3UploadArgs:
"""Arguments related to uploading checkpoints on s3"""

upload_s3_path: xPath
remove_after_upload: bool
s5cmd_numworkers: Optional[int]
s5cmd_concurrency: Optional[int]
s5cmd_path: Optional[xPath]

def __post_init__(self):
if isinstance(self.upload_s3_path, str):
self.upload_s3_path = xPath(self.upload_s3_path)
if isinstance(self.s5cmd_path, str):
self.s5cmd_path = xPath(self.s5cmd_path)

@dataclass
class NanosetDatasetsArgs:
dataset_folder: Union[str, dict, List[str]]
Expand Down Expand Up @@ -338,13 +355,18 @@ class Config:
data_stages: Optional[List[DatasetStageArgs]] = None
profiler: Optional[ProfilerArgs] = None
lighteval: Optional[LightEvalConfig] = None
s3_upload : Optional[S3UploadArgs] = None

@classmethod
def create_empty(cls):
cls_fields = fields(cls)
return cls(**{f.name: None for f in cls_fields})

def __post_init__(self):

if self.s3_upload is not None:
self.s3_upload.__post_init__()

# Some final sanity checks across separate arguments sections:
if self.profiler is not None and self.profiler.profiler_export_path is not None:
assert self.tokens.train_steps < 10
Expand Down
3 changes: 2 additions & 1 deletion src/nanotron/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ def _vocab_size_with_padding(orig_vocab_size: int, pg_size: int, make_vocab_size

multiple = make_vocab_size_divisible_by * pg_size
after = int(ceil(orig_vocab_size / multiple) * multiple)

print("hello")
if after != orig_vocab_size:
print("i'm in")
log_rank(
f"[Vocab Size Padding] Padded vocab (size: {orig_vocab_size}) with {after - orig_vocab_size} dummy tokens (new size: {after})",
logger=logger,
Expand Down
4 changes: 4 additions & 0 deletions src/nanotron/s3_checkpoints/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .fsspec import check_path_is_local, fs_copy, fs_open
from .s3_mover import S3Mover

__all__ = ["S3Mover", "fs_open", "fs_copy", "check_path_is_local"]
38 changes: 38 additions & 0 deletions src/nanotron/s3_checkpoints/fsspec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import contextlib
from pathlib import Path
from typing import Tuple, Union

import fsspec
from fsspec.implementations import local


def get_filesystem_and_path(path: Path, storage_options=None) -> Tuple[fsspec.AbstractFileSystem, str]:
# Use supported filesystems in `fsspec`. If you need another one, please use `fsspec.registry.register_implementation`
# DO NOT USE `mode` argument as it adds a suffix `0.part` when using `mode="w"`.
fs, _, paths = fsspec.core.get_fs_token_paths(str(path), storage_options=storage_options)
assert len(paths) == 1
return fs, paths[0]


@contextlib.contextmanager
def fs_open(
file: Union[str, Path],
mode="r",
):
# TODO @thomasw21: pass storage options
fs, path = get_filesystem_and_path(file)
with fs.open(path, mode=mode) as f:
yield f


def fs_copy(
input_file: Union[str, Path],
output_file: Union[str, Path],
):
"""Copy file from input to output (possibly on s3/other fs)"""
with fs_open(input_file, mode="rb") as fi, fs_open(output_file, mode="wb") as fo:
fo.write(fi.read())


def check_path_is_local(path: Path, storage_options=None) -> bool:
return isinstance(get_filesystem_and_path(path=path, storage_options=storage_options)[0], local.LocalFileSystem)
Loading

0 comments on commit 5fd1c07

Please sign in to comment.