Skip to content

Commit

Permalink
Support delimiter for ls_iterate API
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua committed Nov 1, 2024
1 parent faa597f commit a4d6883
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 22 deletions.
15 changes: 9 additions & 6 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,9 +401,9 @@ def ls_iterate(
detail: bool = False,
versions: bool = False,
batch_size: int = LS_OPERATION_DEFAULT_MAX_ITEMS,
delimiter: str = "/",
recursive: bool = False,
**kwargs: Union[str, bool, float, None],
) -> Generator[Union[dict, str], None, None]:
) -> Generator[Union[List[dict], List[str]], None, None]:
"""List objects under the given path in batches then returns an iterator.
Parameters
Expand All @@ -416,8 +416,8 @@ def ls_iterate(
Whether to list object versions (default is False).
batch_size : int, optional
The number of items to fetch in each batch (default is 1000).
delimiter : str, optional
The delimiter to use for the list operation (default is '/').
recursive : bool, optional
Whether to list objects recursively (default is False).
**kwargs : dict, optional
Additional arguments.
Expand Down Expand Up @@ -453,7 +453,7 @@ def _call_list_objects_type2(
bucket,
prefix,
start_after=prefix,
delimiter=delimiter,
delimiter=None if recursive else "/",
max_keys=batch_size,
continuation_token=continuation_token,
)
Expand All @@ -467,6 +467,7 @@ def _call_list_objects_type2(
continuation_token = resp.next_continuation_token
results = resp.contents + resp.common_prefixes

batch = []
for obj in results:
if isinstance(obj, CommonPrefixInfo):
info = self._fill_dir_info(bucket, obj)
Expand All @@ -475,7 +476,9 @@ def _call_list_objects_type2(
else:
info = self._fill_file_info(obj, bucket, versions)

yield info if detail else info["name"]
batch.append(info if detail else info["name"])

yield batch

def info(
self,
Expand Down
42 changes: 26 additions & 16 deletions tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,43 +73,53 @@ def test_ls_iterate(
)

# Test listing without detail
result = list(tosfs.ls_iterate(f"{bucket}/{temporary_workspace}"))
result = [
item
for batch in tosfs.ls_iterate(f"{bucket}/{temporary_workspace}")
for item in batch
]
assert f"{bucket}/{temporary_workspace}/{dir_name}" in result

# Test listing with detail
result = list(tosfs.ls_iterate(f"{bucket}/{temporary_workspace}", detail=True))
result = [
item
for batch in tosfs.ls_iterate(f"{bucket}/{temporary_workspace}", detail=True)
for item in batch
]
assert any(
item["name"] == f"{bucket}/{temporary_workspace}/{dir_name}" for item in result
)

# Test list with iterate
for item in tosfs.ls_iterate(f"{bucket}/{temporary_workspace}", detail=True):
assert item["name"] in sorted(
[
f"{bucket}/{temporary_workspace}/{dir_name}",
f"{bucket}/{temporary_workspace}/{another_dir_name}",
]
)
for batch in tosfs.ls_iterate(f"{bucket}/{temporary_workspace}", detail=True):
for item in batch:
assert item["name"] in sorted(
[
f"{bucket}/{temporary_workspace}/{dir_name}",
f"{bucket}/{temporary_workspace}/{another_dir_name}",
]
)

# Test listing with batch size and while loop more than one time
result = []
for batch in tosfs.ls_iterate(f"{bucket}/{temporary_workspace}", batch_size=1):
result.append(batch)
for item in batch:
result.append(item)
assert len(result) == len([dir_name, another_dir_name])

# test list recursively
# Test list recursively
expected = [
f"{bucket}/{temporary_workspace}/{dir_name}",
f"{bucket}/{temporary_workspace}/{dir_name}/{file_name}",
f"{bucket}/{temporary_workspace}/{dir_name}/{sub_dir_name}",
f"{bucket}/{temporary_workspace}/{dir_name}/{sub_dir_name}/{sub_file_name}",
f"{bucket}/{temporary_workspace}/{another_dir_name}",
]
result = list(
tosfs.ls_iterate(
f"{bucket}/{temporary_workspace}", delimiter="", recursive=True
)
)
result = [
item
for batch in tosfs.ls_iterate(f"{bucket}/{temporary_workspace}", recursive=True)
for item in batch
]
assert sorted(result) == sorted(expected)


Expand Down

0 comments on commit a4d6883

Please sign in to comment.