Skip to content

Commit

Permalink
Missed check logic in some APIs (#171)
Browse files Browse the repository at this point in the history
  • Loading branch information
yanghua authored Oct 9, 2024
1 parent 82d810b commit 9662ce9
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 18 deletions.
42 changes: 26 additions & 16 deletions tosfs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def info(
if info := self._object_info(bucket, key, version_id):
return info

return self._try_dir_info(bucket, key, path, fullpath)
return self._get_dir_info(bucket, key, path, fullpath)

def exists(self, path: str, **kwargs: Any) -> bool:
"""Check if a path exists in the TOS.
Expand Down Expand Up @@ -1108,6 +1108,9 @@ def find(
if not bucket:
raise ValueError("Cannot access all of TOS without specify a bucket.")

if maxdepth is not None and maxdepth < 1:
raise ValueError("maxdepth must be at least 1")

if maxdepth and prefix:
raise ValueError(
"Can not specify 'prefix' option alongside 'maxdepth' options."
Expand Down Expand Up @@ -1596,6 +1599,7 @@ def _find_file_dir(
self._parent(o["name"]): {
"Key": self._parent(o["name"]).rstrip("/"),
"Size": 0,
"size": 0,
"name": self._parent(o["name"]).rstrip("/"),
"type": "directory",
}
Expand Down Expand Up @@ -1745,7 +1749,7 @@ def _object_info(

return {}

def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict:
def _get_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict:
try:
# We check to see if the path is a directory by attempting to list its
# contexts. If anything is found, it is indeed a directory
Expand All @@ -1762,9 +1766,10 @@ def _try_dir_info(self, bucket: str, key: str, path: str, fullpath: str) -> dict
if out.key_count > 0 or out.contents or out.common_prefixes:
return {
"name": fullpath,
"type": "directory",
"Key": fullpath,
"Size": 0,
"size": 0,
"StorageClass": "DIRECTORY",
"type": "directory",
}

raise FileNotFoundError(path)
Expand Down Expand Up @@ -2023,6 +2028,7 @@ def _fill_dir_info(
"name": name,
"Key": name,
"Size": 0,
"size": 0,
"type": "directory",
}

Expand Down Expand Up @@ -2228,18 +2234,6 @@ def __init__(
):
"""Instantiate a TOS file."""
bucket, key, path_version_id = fs._split_path(path)
self._check_init_params(key, path, mode, block_size)

if "r" not in mode:
self.multipart_uploader = MultipartUploader(
fs=fs,
bucket=bucket,
key=key,
part_size=fs.multipart_size,
thread_pool_size=fs.multipart_thread_pool_size,
staging_buffer_size=fs.multipart_staging_buffer_size,
multipart_threshold=fs.multipart_threshold,
)

super().__init__(
fs,
Expand All @@ -2260,6 +2254,19 @@ def __init__(
self.append_block = False
self.buffer: Optional[io.BytesIO] = io.BytesIO()

self._check_init_params(key, path, mode, block_size)

if "r" not in mode:
self.multipart_uploader = MultipartUploader(
fs=fs,
bucket=bucket,
key=key,
part_size=fs.multipart_size,
thread_pool_size=fs.multipart_thread_pool_size,
staging_buffer_size=fs.multipart_staging_buffer_size,
multipart_threshold=fs.multipart_threshold,
)

if "a" in mode:
try:
head = retryable_func_executor(
Expand Down Expand Up @@ -2291,6 +2298,9 @@ def _check_init_params(
if not key:
raise ValueError("Attempt to open non key-like path: %s" % path)

if "r" in mode and self.fs.isdir(path):
raise IsADirectoryError("Can not read a directory path: %s" % path)

if "r" not in mode and int(block_size) < MPU_PART_SIZE_THRESHOLD:
raise ValueError(
f"Block size must be >= {MPU_PART_SIZE_THRESHOLD // (2**20)}MB."
Expand Down
57 changes: 56 additions & 1 deletion tosfs/tests/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,9 @@ def test_du(fsspecfs: Any, bucket: str, temporary_workspace: str):
}
assert sizes == expected_sizes, f"Expected sizes {expected_sizes}, got {sizes}"

with pytest.raises(ValueError, match="maxdepth must be at least 1"):
fsspecfs.du(f"{bucket}/{temporary_workspace}", total=False, maxdepth=0)

# Test maxdepth
sizes_maxdepth_1 = fsspecfs.du(
f"{bucket}/{temporary_workspace}", total=False, maxdepth=2
Expand Down Expand Up @@ -681,11 +684,19 @@ def test_cat(fsspecfs: Any, bucket: str, temporary_workspace: str):

# Test Case 3: Error handling with on_error="omit"
non_existent_path = f"{subdir_path}/nonexistent.txt"
result = fsspecfs.cat([file1_path, non_existent_path], on_error="omit")
result = fsspecfs.cat(
[file1_path, non_existent_path], recursive=True, on_error="omit"
)
assert result == {
fsspecfs._strip_protocol(file1_path): b"Hello, World!"
}, "Error handling with omit failed"

result = fsspecfs.cat(f"{subdir_path}/*", recursive=True, on_error="omit")
assert result == {
fsspecfs._strip_protocol(file1_path): b"Hello, World!",
fsspecfs._strip_protocol(file2_path): b"Goodbye, World!",
}, "Error handling with omit failed"

# Test Case 4: Error handling with on_error="return"
result = fsspecfs.cat([file1_path, non_existent_path], on_error="return")
file1_in_result = fsspecfs._strip_protocol(file1_path) in result
Expand Down Expand Up @@ -822,3 +833,47 @@ def test_put(fsspecfs: Any, bucket: str, temporary_workspace: str):
assert (
downloaded_data == test_data
), "The downloaded file's contents should match the original test data."


def test_fs_read_block(fsspecfs: Any, bucket: str, temporary_workspace: str):
file_name = random_str()
path = f"{bucket}/{temporary_workspace}/{file_name}.txt"
data = b"ab\n" + b"a" * (1 * 2**20) + b"\nab"
with fsspecfs.open(path, "wb") as f:
f.write(data)

assert fsspecfs.read_block(path, 0, 3) == b"ab\n"

non_exist_path = f"{bucket}/{temporary_workspace}/non_exist.txt"
with pytest.raises(FileNotFoundError):
fsspecfs.read_block(non_exist_path, 0, 3)

dir_name = random_str()
path = f"{bucket}/{temporary_workspace}/{dir_name}"
fsspecfs.mkdir(path)
assert fsspecfs.exists(path)

with pytest.raises(IsADirectoryError):
fsspecfs.read_block(path, 0, 3)


def test_tail(fsspecfs: Any, bucket: str, temporary_workspace: str):
file_name = random_str()
path = f"{bucket}/{temporary_workspace}/{file_name}.txt"
data = b"ab\n" + b"a" * (1 * 2**20) + b"\nab"
with fsspecfs.open(path, "wb") as f:
f.write(data)

assert fsspecfs.tail(path, 3) == b"\nab"

non_exist_path = f"{bucket}/{temporary_workspace}/non_exist.txt"
with pytest.raises(FileNotFoundError):
fsspecfs.tail(non_exist_path, 3)

dir_name = random_str()
path = f"{bucket}/{temporary_workspace}/{dir_name}"
fsspecfs.mkdir(path)
assert fsspecfs.exists(path)

with pytest.raises(IsADirectoryError):
fsspecfs.tail(path, 3)
3 changes: 2 additions & 1 deletion tosfs/tests/test_tosfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,10 @@ def test_info(tosfs: TosFileSystem, bucket: str, temporary_workspace: str) -> No
}
assert tosfs.info(f"{bucket}/{temporary_workspace}") == {
"name": f"{bucket}/{temporary_workspace}",
"Key": f"{bucket}/{temporary_workspace}",
"type": "directory",
"size": 0,
"StorageClass": "DIRECTORY",
"Size": 0,
}
tosfs.touch(f"{bucket}/{temporary_workspace}/file")
file_info = tosfs.info(f"{bucket}/{temporary_workspace}/file")
Expand Down

0 comments on commit 9662ce9

Please sign in to comment.