From f2b7b00af564509aac21dd55b318a0eb28219205 Mon Sep 17 00:00:00 2001 From: skshetry <18718008+skshetry@users.noreply.github.com> Date: Thu, 29 Aug 2024 18:51:21 +0545 Subject: [PATCH] dvcfilesystem.get: prefer downloading from repo.fs before dvc.fs (#10543) --- dvc/fs/dvc.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/dvc/fs/dvc.py b/dvc/fs/dvc.py index e1a74faee9..e5f7774736 100644 --- a/dvc/fs/dvc.py +++ b/dvc/fs/dvc.py @@ -558,25 +558,29 @@ def _get( # noqa: C901 for d in _dirs: os.mkdir(d) + repo_fs = self.repo.fs + def _get_file(arg): dvc_fs, src, dest, info = arg dvc_info = info.get("dvc_info") - if dvc_info and dvc_fs: + fs_info = info.get("fs_info") + if dvc_fs and dvc_info and not fs_info: dvc_path = dvc_info["name"] dvc_fs.get_file( dvc_path, dest, callback=callback, info=dvc_info, **kwargs ) else: - self.get_file(src, dest, callback=callback, **kwargs) + fs_path = fs_info["name"] + repo_fs.get_file(fs_path, dest, callback=callback, **kwargs) return src, dest, info with ThreadPoolExecutor(max_workers=batch_size) as executor: return list(executor.imap_unordered(_get_file, _files)) def get_file(self, rpath, lpath, **kwargs): + dvc_info = kwargs.pop("info", {}).pop("dvc_info", None) key = self._get_key_from_relative(rpath) fs_path = self._from_key(key) - dirpath = os.path.dirname(lpath) if dirpath: # makedirs raises error if the string is empty @@ -590,7 +594,7 @@ def get_file(self, rpath, lpath, **kwargs): raise dvc_path = _get_dvc_path(dvc_fs, subkey) - return dvc_fs.get_file(dvc_path, lpath, **kwargs) + return dvc_fs.get_file(dvc_path, lpath, info=dvc_info, **kwargs) def du(self, path, total=True, maxdepth=None, withdirs=False, **kwargs): if maxdepth is not None: