diff --git a/adlfs/spec.py b/adlfs/spec.py index 9e904078..ff3e3f53 100644 --- a/adlfs/spec.py +++ b/adlfs/spec.py @@ -1036,7 +1036,7 @@ async def _find(self, path, withdirs=False, prefix="", with_parent=False, **kwar return {name: files[name] for name in names} async def _glob_find(self, path, maxdepth=None, withdirs=False, **kwargs): - """List all files below path in a recusrsive manner. + """List all files below path in a recursive manner. Like posix ``find`` command without conditions Parameters ---------- @@ -1049,6 +1049,9 @@ async def _glob_find(self, path, maxdepth=None, withdirs=False, **kwargs): kwargs are passed to ``ls``. """ # TODO: allow equivalent of -name parameter + + path = path.rstrip('*') + path = path.rstrip('/') path = self._strip_protocol(path) out = dict() detail = kwargs.pop("detail", False) @@ -1511,7 +1514,7 @@ async def _expand_path(self, path, recursive=False, maxdepth=None, **kwargs): ) # Sets whether to return the parent dir if isinstance(path, list): - path = [f"{p.strip('/')}" for p in path if not p.endswith("*")] + path = [f"{p.strip('/')}" if not p.endswith("*") else p for p in path] else: if not path.endswith("*"): path = f"{path.strip('/')}" @@ -1527,7 +1530,7 @@ async def _expand_path(self, path, recursive=False, maxdepth=None, **kwargs): bit = set(await self._glob(p)) out |= bit if recursive: - bit2 = set(await self._expand_path(p)) + bit2 = set(await self._glob_find(p, withdirs=True)) out |= bit2 continue elif recursive: @@ -1567,7 +1570,7 @@ async def _put_file( container_name, path = self.split_path(rpath, delimiter=delimiter) if os.path.isdir(lpath): - self.makedirs(rpath, exist_ok=True) + return else: try: with open(lpath, "rb") as f1: diff --git a/adlfs/tests/test_spec.py b/adlfs/tests/test_spec.py index 99bf8760..126bad18 100644 --- a/adlfs/tests/test_spec.py +++ b/adlfs/tests/test_spec.py @@ -1348,3 +1348,41 @@ def test_find_with_prefix(storage): assert test_1s == [test_bucket_name + "/prefixes/test_1"] + [ test_bucket_name + f"/prefixes/test_{cursor}" for cursor in range(10, 20) ] + + +def test_expand_path(storage): + test_bucket = "data" + test_dir = f"{test_bucket}/testexpandpath" + sub_dir_1 = f"{test_dir}/subdir1" + sub_dir_2 = f"{sub_dir_1}/subdir2" + test_blobs = [ + f"{test_dir}/blob1", + f"{test_dir}/blob2", + f"{test_dir}/subdir1/blob3", + f"{test_dir}/subdir1/blob4", + f"{test_dir}/subdir1/subdir2/blob5", + ] + + expected_dirs_w_trailing_slash = test_blobs.copy() + expected_dirs_w_trailing_slash.append(test_dir) + expected_dirs_w_trailing_slash.append(sub_dir_1 + "/") + expected_dirs_w_trailing_slash.append(sub_dir_2 + "/") + + expected_dirs_wo_trailing_slash = test_blobs.copy() + expected_dirs_wo_trailing_slash.append(sub_dir_1) + expected_dirs_wo_trailing_slash.append(sub_dir_2) + + fs = AzureBlobFileSystem( + account_name=storage.account_name, connection_string=CONN_STR + ) + for blob in test_blobs: + fs.touch(blob) + + result_without_slash = fs.expand_path(test_dir, recursive=True) + assert sorted(result_without_slash) == sorted(expected_dirs_w_trailing_slash) + + result_with_slash = fs.expand_path(test_dir + "/", recursive=True) + assert sorted(result_with_slash) == sorted(expected_dirs_w_trailing_slash) + + result_glob = fs.expand_path(test_dir + "/*", recursive=True) + assert sorted(result_glob) == sorted(expected_dirs_wo_trailing_slash)