Skip to content

Commit

Permalink
Core: Implement expand_path API (#56)
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua authored Sep 6, 2024
1 parent 9b403e3 commit abe0d1f
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 17 deletions.
83 changes: 74 additions & 9 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import mimetypes
import os
import time
from glob import has_magic
from typing import Any, BinaryIO, Generator, List, Optional, Tuple, Union

import tos
Expand Down Expand Up @@ -497,6 +498,9 @@ def isfile(self, path: str) -> bool:
return False

bucket, key, _ = self._split_path(path)
if not key:
return False

try:
# Attempt to get the object metadata
self.tos_client.head_object(bucket, key)
Expand Down Expand Up @@ -776,6 +780,68 @@ def find(
else:
return [o["name"] for o in out]

def expand_path(
self,
path: Union[str, List[str]],
recursive: bool = False,
maxdepth: Optional[int] = None,
) -> List[str]:
"""Expand path to a list of files.
Parameters
----------
path : str
The path to expand.
recursive : bool, optional
Whether to expand recursively (default is False).
maxdepth : int, optional
The maximum depth to expand to (default is None).
**kwargs : Any, optional
Additional arguments.
Returns
-------
List[str]
A list of expanded paths.
"""
if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

if isinstance(path, str):
return self.expand_path([path], recursive, maxdepth)

out = set()
path = [self._strip_protocol(p) for p in path]
for p in path: # can gather here
if has_magic(p):
bit = set(self.glob(p, maxdepth=maxdepth))
out |= bit
if recursive:
# glob call above expanded one depth so if maxdepth is defined
# then decrement it in expand_path call below. If it is zero
# after decrementing then avoid expand_path call.
if maxdepth is not None and maxdepth <= 1:
continue
out |= set(
self.expand_path(
list(bit),
recursive=recursive,
maxdepth=maxdepth - 1 if maxdepth is not None else None,
)
)
continue
elif recursive:
rec = set(self.find(p, maxdepth=maxdepth, withdirs=True))
out |= rec
if p not in out and (recursive is False or self.exists(p)):
# should only check once, for the root
out.add(p)

if not out:
raise FileNotFoundError(path)
return sorted(out)

def cp_file(
self,
path1: str,
Expand Down Expand Up @@ -992,9 +1058,9 @@ def _find_file_dir(
par = self._parent(o["name"])
if len(path) <= len(par):
d = {
"Key": self._split_path(par)[1],
"Key": self._split_path(par)[1].rstrip("/"),
"Size": 0,
"name": par,
"name": par.rstrip("/"),
"type": "directory",
}
dirs.append(d)
Expand Down Expand Up @@ -1204,13 +1270,10 @@ def exists(self, path: str, **kwargs: Any) -> bool:
# if the path is a bucket
if not key:
return self._exists_bucket(bucket)
elif self.isfile(path):
return self._exists_object(bucket, key, path, version_id)
else:
object_exists = self._exists_object(bucket, key, path, version_id)
if not object_exists:
return self._exists_object(
bucket, key.rstrip("/") + "/", path, version_id
)
return object_exists
return self._exists_object(bucket, key.rstrip("/") + "/", path, version_id)

def _exists_bucket(self, bucket: str) -> bool:
"""Check if a bucket exists in the TOS.
Expand Down Expand Up @@ -1561,7 +1624,9 @@ def _split_path(self, path: str) -> Tuple[str, str, Optional[str]]:
def _fill_dir_info(
bucket: str, common_prefix: Optional[CommonPrefixInfo], key: str = ""
) -> dict:
name = "/".join([bucket, common_prefix.prefix[:-1] if common_prefix else key])
name = "/".join(
[bucket, common_prefix.prefix[:-1] if common_prefix else key]
).rstrip("/")
return {
"name": name,
"Key": name,
Expand Down
48 changes: 40 additions & 8 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,15 +422,10 @@ def test_find(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> No
)
assert len(result) == len([bucket, f"{bucket}/{temporary_workspace}/"])
assert (
result[f"{bucket}/{temporary_workspace}/"]["name"]
== f"{bucket}/{temporary_workspace}/"
result[f"{bucket}/{temporary_workspace}"]["name"]
== f"{bucket}/{temporary_workspace}"
)
assert result[f"{bucket}/{temporary_workspace}/"]["type"] == "directory"

result = tosfs.find(
f"{bucket}/{temporary_workspace}", withdirs=True, maxdepth=1, detail=True
)
assert len(result) == 1
assert result[f"{bucket}/{temporary_workspace}"]["type"] == "directory"

dir_name = random_str()
sub_dir_name = random_str()
Expand Down Expand Up @@ -503,6 +498,43 @@ def test_cp_file(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) ->
assert tosfs.info(dest_path_with_etag)["ETag"] == tosfs.info(dest_path)["ETag"]


def test_expand_path(
tosfs: TosFileSystem, bucket: str, temporary_workspace: str
) -> None:
assert tosfs.expand_path(bucket) == [bucket]
assert tosfs.expand_path(f"{bucket}/") == [bucket]
assert tosfs.expand_path(f"{bucket}/{temporary_workspace}/") == [
f"{bucket}/{temporary_workspace}"
]
tosfs.touch(f"{bucket}/{temporary_workspace}/file")
assert tosfs.expand_path(f"{bucket}/{temporary_workspace}", recursive=True) == [
f"{bucket}/{temporary_workspace}",
f"{bucket}/{temporary_workspace}/file",
]
sub_dir_name = random_str()
tosfs.mkdir(f"{bucket}/{temporary_workspace}/{sub_dir_name}")
tosfs.touch(f"{bucket}/{temporary_workspace}/{sub_dir_name}/file")
assert tosfs.expand_path(
f"{bucket}/{temporary_workspace}", recursive=True, maxdepth=1
) == sorted(
[
f"{bucket}/{temporary_workspace}",
f"{bucket}/{temporary_workspace}/file",
f"{bucket}/{temporary_workspace}/{sub_dir_name}",
]
)
assert tosfs.expand_path(
f"{bucket}/{temporary_workspace}", recursive=True
) == sorted(
[
f"{bucket}/{temporary_workspace}",
f"{bucket}/{temporary_workspace}/{sub_dir_name}",
f"{bucket}/{temporary_workspace}/file",
f"{bucket}/{temporary_workspace}/{sub_dir_name}/file",
]
)


###########################################################
# File operation tests #
###########################################################
Expand Down

0 comments on commit abe0d1f

Please sign in to comment.