From a535b4bec313cfbf9046b4e96027c970aaae91c5 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Fri, 22 Dec 2023 23:02:36 +0200 Subject: [PATCH] index: md5: handle missing files and cloud versioned files missing version_id --- src/dvc_data/index/index.py | 14 +++++++++++--- src/dvc_data/index/save.py | 18 +++++++++++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/dvc_data/index/index.py b/src/dvc_data/index/index.py index a975adba..f89d93b8 100644 --- a/src/dvc_data/index/index.py +++ b/src/dvc_data/index/index.py @@ -259,9 +259,17 @@ def get(self, entry: "DataIndexEntry") -> Tuple["FileSystem", str]: assert entry.key is not None assert entry.key[: len(self.prefix)] == self.prefix path = self.fs.join(self.path, *entry.key[len(self.prefix) :]) - if self.fs.version_aware and entry.meta and entry.meta.version_id: - path = self.fs.version_path(path, entry.meta.version_id) - return self.fs, path + + if not self.fs.version_aware: + return self.fs, path + + if not entry.meta or entry.meta.isdir: + return self.fs, path + + if entry.meta and entry.meta.version_id: + return self.fs, self.fs.version_path(path, entry.meta.version_id) + + raise ValueError(f"Missing version_id for {path}") def exists(self, entry: "DataIndexEntry", refresh: bool = False) -> bool: if self.index is None: diff --git a/src/dvc_data/index/save.py b/src/dvc_data/index/save.py index 9981979b..21ad764b 100644 --- a/src/dvc_data/index/save.py +++ b/src/dvc_data/index/save.py @@ -35,7 +35,10 @@ def md5( if entry.hash_info and entry.hash_info.name in ("md5", "md5-dos2unix"): continue - fs, path = index.storage_map.get_storage(entry, storage) + try: + fs, path = index.storage_map.get_storage(entry, storage) + except ValueError: + continue info = None if check_meta: @@ -48,7 +51,11 @@ def md5( if entry.meta != meta: continue - meta, hash_info = hash_file(path, fs, name, state=state, info=info) + try: + _, hash_info = hash_file(path, fs, name, state=state, info=info) + except FileNotFoundError: + continue + entries[key] = DataIndexEntry( key=entry.key, meta=entry.meta, @@ -126,7 +133,12 @@ def save( if entry.meta and entry.meta.isdir: dir_entries.append(key) continue - fs, path = index.storage_map.get_storage(entry, storage) + + try: + fs, path = index.storage_map.get_storage(entry, storage) + except ValueError: + continue + if entry.hash_info: cache = odb or index.storage_map.get_cache_odb(entry) assert cache