From 2414e062fadd7a86665f09ac8f93962e67fd5a7a Mon Sep 17 00:00:00 2001 From: yanghua Date: Fri, 1 Nov 2024 16:15:10 +0800 Subject: [PATCH] Bug: isdir API when not found should list it to check --- tosfs/compatible.py | 78 +++++++++++++++++++++++++++++++++++++++++++++ tosfs/core.py | 19 +++++++++++ 2 files changed, 97 insertions(+) diff --git a/tosfs/compatible.py b/tosfs/compatible.py index ea8175b..8523eda 100644 --- a/tosfs/compatible.py +++ b/tosfs/compatible.py @@ -13,9 +13,23 @@ # limitations under the License. """The compatible module about AbstractFileSystem in fsspec.""" +import re from typing import Any, Optional from fsspec import AbstractFileSystem +from fsspec.utils import other_paths + +magic_check_bytes = re.compile(b"([*?[])") +magic_check = re.compile("([*?[])") + + +def has_magic(s: str) -> bool: + """Check if a string has glob characters.""" + if isinstance(s, bytes): + match = magic_check_bytes.search(s) + else: + match = magic_check.search(s) + return match is not None class FsspecCompatibleFS(AbstractFileSystem): @@ -178,3 +192,67 @@ def find( # noqa # return names else: return {name: out[name] for name in names} + + def put( + self, + lpath: str, + rpath: str, + recursive: bool = False, + callback: Any = None, + maxdepth: Optional[int] = None, + **kwargs: Any, + ) -> None: + """Copy file(s) from local. + + Copies a specific file or tree of files (if recursive=True). If rpath + ends with a "/", it will be assumed to be a directory, and target files + will go within. + + Calls put_file for each source. + """ + if isinstance(lpath, list) and isinstance(rpath, list): + # No need to expand paths when both source and destination + # are provided as lists + rpaths = rpath + lpaths = lpath + else: + from fsspec.implementations.local import ( + LocalFileSystem, + make_path_posix, + trailing_sep, + ) + + source_is_str = isinstance(lpath, str) + if source_is_str: + lpath = make_path_posix(lpath) + fs = LocalFileSystem() + lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth) + if source_is_str and (not recursive or maxdepth is not None): + # Non-recursive glob does not copy directories + lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))] + if not lpaths: + return + + source_is_file = len(lpaths) == 1 + dest_is_dir = isinstance(rpath, str) and ( + trailing_sep(rpath) or self.isdir(rpath) + ) + + rpath = ( + self._strip_protocol(rpath) + if isinstance(rpath, str) + else [self._strip_protocol(p) for p in rpath] + ) + exists = source_is_str and ( + (has_magic(lpath) and source_is_file) + or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath)) + ) + rpaths = other_paths( + lpaths, + rpath, + exists=exists, + flatten=not source_is_str, + ) + + for lpath, rpath in zip(lpaths, rpaths): + self.put_file(lpath, rpath, **kwargs) diff --git a/tosfs/core.py b/tosfs/core.py index 9d3c10e..7a47c43 100644 --- a/tosfs/core.py +++ b/tosfs/core.py @@ -889,6 +889,25 @@ def isfile(self, path: str) -> bool: except Exception as e: raise TosfsError(f"Tosfs failed with unknown error: {e}") from e + def put( + self, + lpath: str, + rpath: str, + recursive: bool = False, + callback: Any = None, + maxdepth: Optional[int] = None, + **kwargs: Any, + ) -> None: + """Copy file(s) from local. + + Copies a specific file or tree of files (if recursive=True). If rpath + ends with a "/", it will be assumed to be a directory, and target files + will go within. + + Calls put_file for each source. + """ + super().put(lpath, rpath, recursive=recursive, **kwargs) + def put_file( self, lpath: str,