Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug: isdir API when not found should list it to check #261

Merged
merged 2 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added tosfs/.DS_Store
Binary file not shown.
78 changes: 78 additions & 0 deletions tosfs/compatible.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,23 @@
# limitations under the License.

"""The compatible module about AbstractFileSystem in fsspec."""
import re
from typing import Any, Optional

from fsspec import AbstractFileSystem
from fsspec.utils import other_paths

magic_check_bytes = re.compile(b"([*?[])")
magic_check = re.compile("([*?[])")


def has_magic(s: str) -> bool:
"""Check if a string has glob characters."""
if isinstance(s, bytes):
match = magic_check_bytes.search(s)
else:
match = magic_check.search(s)
return match is not None


class FsspecCompatibleFS(AbstractFileSystem):
Expand Down Expand Up @@ -178,3 +192,67 @@ def find( # noqa #
return names
else:
return {name: out[name] for name in names}

def put(
self,
lpath: str,
rpath: str,
recursive: bool = False,
callback: Any = None,
maxdepth: Optional[int] = None,
**kwargs: Any,
) -> None:
"""Copy file(s) from local.

Copies a specific file or tree of files (if recursive=True). If rpath
ends with a "/", it will be assumed to be a directory, and target files
will go within.

Calls put_file for each source.
"""
if isinstance(lpath, list) and isinstance(rpath, list):
# No need to expand paths when both source and destination
# are provided as lists
rpaths = rpath
lpaths = lpath
else:
from fsspec.implementations.local import (
LocalFileSystem,
make_path_posix,
trailing_sep,
)

source_is_str = isinstance(lpath, str)
if source_is_str:
lpath = make_path_posix(lpath)
fs = LocalFileSystem()
lpaths = fs.expand_path(lpath, recursive=recursive, maxdepth=maxdepth)
if source_is_str and (not recursive or maxdepth is not None):
# Non-recursive glob does not copy directories
lpaths = [p for p in lpaths if not (trailing_sep(p) or fs.isdir(p))]
if not lpaths:
return

source_is_file = len(lpaths) == 1
dest_is_dir = isinstance(rpath, str) and (
trailing_sep(rpath) or self.isdir(rpath)
)

rpath = (
self._strip_protocol(rpath)
if isinstance(rpath, str)
else [self._strip_protocol(p) for p in rpath]
)
exists = source_is_str and (
(has_magic(lpath) and source_is_file)
or (not has_magic(lpath) and dest_is_dir and not trailing_sep(lpath))
)
rpaths = other_paths(
lpaths,
rpath,
exists=exists,
flatten=not source_is_str,
)

for lpath, rpath in zip(lpaths, rpaths):
self.put_file(lpath, rpath, **kwargs)
30 changes: 29 additions & 1 deletion tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -839,7 +839,16 @@ def isdir(self, path: str) -> bool:
raise e
except TosServerError as e:
if e.status_code == TOS_SERVER_STATUS_CODE_NOT_FOUND:
return False
out = retryable_func_executor(
lambda: self.tos_client.list_objects_type2(
bucket,
prefix=key,
delimiter="/",
max_keys=1,
),
max_retry_num=self.max_retry_num,
)
return out.key_count > 0
else:
raise e
except Exception as e:
Expand Down Expand Up @@ -880,6 +889,25 @@ def isfile(self, path: str) -> bool:
except Exception as e:
raise TosfsError(f"Tosfs failed with unknown error: {e}") from e

def put(
self,
lpath: str,
rpath: str,
recursive: bool = False,
callback: Any = None,
maxdepth: Optional[int] = None,
**kwargs: Any,
) -> None:
"""Copy file(s) from local.

Copies a specific file or tree of files (if recursive=True). If rpath
ends with a "/", it will be assumed to be a directory, and target files
will go within.

Calls put_file for each source.
"""
super().put(lpath, rpath, recursive=recursive, **kwargs)

def put_file(
self,
lpath: str,
Expand Down
21 changes: 21 additions & 0 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,27 @@ def test_ls_dir(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) ->

assert tosfs.ls(f"{bucket}/{temporary_workspace}/nonexistent", detail=False) == []

path = f"{bucket}/{temporary_workspace}/a/b/c/d"
bucket, key, _ = tosfs._split_path(path)
tosfs.tos_client.put_object(bucket=bucket, key=key, content="")
assert tosfs.isdir(f"{bucket}/{temporary_workspace}/a")
assert not tosfs.isfile(f"{bucket}/{temporary_workspace}/a")
assert tosfs.info(f"{bucket}/{temporary_workspace}/a")["type"] == "directory"
assert tosfs.exists(f"{bucket}/{temporary_workspace}/a")
assert not tosfs.isdir(f"{bucket}/{temporary_workspace}/b")
assert not tosfs.isfile(f"{bucket}/{temporary_workspace}/b")
assert tosfs.isdir(f"{bucket}/{temporary_workspace}/a/b")
assert not tosfs.isfile(f"{bucket}/{temporary_workspace}/a/b")
assert tosfs.info(f"{bucket}/{temporary_workspace}/a/b")["type"] == "directory"
assert tosfs.exists(f"{bucket}/{temporary_workspace}/a/b")
assert tosfs.isdir(f"{bucket}/{temporary_workspace}/a/b/c")
assert not tosfs.isfile(f"{bucket}/{temporary_workspace}/a/b/c")
assert tosfs.info(f"{bucket}/{temporary_workspace}/a/b/c")["type"] == "directory"
assert tosfs.exists(f"{bucket}/{temporary_workspace}/a/b/c")
assert not tosfs.isdir(f"{bucket}/{temporary_workspace}/a/b/c/d")
assert tosfs.isfile(f"{bucket}/{temporary_workspace}/a/b/c/d")
assert tosfs.info(f"{bucket}/{temporary_workspace}/a/b/c/d")["type"] == "file"


def test_ls_iterate(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
Expand Down