Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

index: check dep.hash_name for imports #10270

Merged
merged 1 commit into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions dvc/repo/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def is_out_or_ignored(root, directory):
dirs[:] = [d for d in dirs if not is_out_or_ignored(root, d)]


def _load_data_from_tree(index, prefix, ws, key, tree):
def _load_data_from_tree(index, prefix, ws, key, tree, hash_name):
from dvc_data.index import DataIndexEntry, Meta

parents = set()
Expand All @@ -117,7 +117,7 @@ def _load_data_from_tree(index, prefix, ws, key, tree):
index[(*prefix, ws, *fkey)] = DataIndexEntry(
key=fkey,
meta=ometa,
hash_info=ohi,
hash_info=ohi if (ohi and ohi.name == hash_name) else None,
)

for parent in parents:
Expand Down Expand Up @@ -151,7 +151,7 @@ def _load_data_from_outs(index, prefix, outs):
tree = out.get_obj()

if tree is not None:
_load_data_from_tree(index, prefix, ws, key, tree)
_load_data_from_tree(index, prefix, ws, key, tree, out.hash_name)

entry = DataIndexEntry(
key=key,
Expand Down Expand Up @@ -193,16 +193,22 @@ def _load_storage_from_import(storage_map, key, out):
return

dep = out.stage.deps[0]
if not out.hash_info and (
not dep.hash_info or dep.hash_info.name != storage_map[key].cache.odb.hash_name
):
# partial import
if not out.hash_info or dep.fs.version_aware:
if dep.meta and dep.meta.isdir:
meta_token = dep.hash_info.value
else:
meta_token = tokenize(dep.meta.to_dict())

fs_cache = out.repo.cache.fs_cache
storage_map.add_cache(
FileStorage(
key,
fs_cache.fs,
fs_cache.fs.join(fs_cache.path, dep.fs.protocol, tokenize(dep.fs_path)),
fs_cache.fs.join(
fs_cache.path,
dep.fs.protocol,
tokenize(dep.fs_path, meta_token),
),
)
)

Expand Down
2 changes: 2 additions & 0 deletions dvc/testing/workspace_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def test_import_file(self, tmp_dir, dvc, remote_version_aware):
(remote_version_aware / "file").write_text("modified")
assert dvc.status().get("file.dvc") == [
{"changed deps": {"remote://upstream/file": "update available"}},
{"changed outs": {"file": "not in cache"}},
Copy link
Contributor Author

@efiop efiop Feb 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dvc status doesn't yet work with fs-based cache and it happens to show up here, so this will have to wait a bit for #9333

]
dvc.update(str(tmp_dir / "file.dvc"))
assert (tmp_dir / "file").read_text() == "modified"
Expand Down Expand Up @@ -137,6 +138,7 @@ def test_import_dir(self, tmp_dir, dvc, remote_version_aware):
(remote_version_aware / "data_dir" / "new_file").write_text("new")
assert dvc.status().get("data_dir.dvc") == [
{"changed deps": {"remote://upstream/data_dir": "modified"}},
{"changed outs": {"data_dir": "not in cache"}},
]
dvc.update(str(tmp_dir / "data_dir.dvc"))
assert (tmp_dir / "data_dir" / "subdir" / "file").read_text() == "modified"
Expand Down
Loading