Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

studio: add subdir to live metrics post messages to support live experiments in monorepos #10303

Merged
merged 1 commit into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
)
from dvc.ignore import DvcIgnoreFilter
from dvc.log import logger
from dvc.utils import as_posix
from dvc.utils.objects import cached_property

if TYPE_CHECKING:
Expand Down Expand Up @@ -351,16 +350,6 @@ def fs(self, fs: "FileSystem"):
# fs.
self._reset()

@property
def subrepo_relpath(self) -> str:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[F] kept this the same as in #10291 because it doesn't fit nicely on Repo

from dvc.fs import GitFileSystem

scm_root_dir = "/" if isinstance(self.fs, GitFileSystem) else self.scm.root_dir

relpath = as_posix(self.fs.relpath(self.root_dir, scm_root_dir))

return "" if relpath == "." else relpath

@property
def data_index(self) -> "DataIndex":
from dvc_data.index import DataIndex
Expand Down
6 changes: 5 additions & 1 deletion dvc/repo/experiments/executor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
from dvc.stage.serialize import to_lockfile
from dvc.utils import dict_sha256, env2bool, relpath
from dvc.utils.fs import remove
from dvc.utils.studio import env_to_config
from dvc.utils.studio import (
env_to_config,
get_subrepo_relpath,
)

if TYPE_CHECKING:
from queue import Queue
Expand Down Expand Up @@ -624,6 +627,7 @@ def _repro_dvc(
params=to_studio_params(dvc.params.show()),
dvc_studio_config=dvc_studio_config,
message=message,
subdir=get_subrepo_relpath(dvc),
)
logger.debug("Running repro in '%s'", os.getcwd())
yield dvc
Expand Down
14 changes: 14 additions & 0 deletions dvc/utils/studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@
DVC_STUDIO_URL,
)
from dvc.log import logger
from dvc.utils import as_posix

if TYPE_CHECKING:
from requests import Response

from dvc.repo import Repo


logger = logger.getChild(__name__)

STUDIO_URL = "https://studio.iterative.ai"
Expand Down Expand Up @@ -111,3 +115,13 @@ def env_to_config(env: dict[str, Any]) -> dict[str, Any]:
if DVC_STUDIO_URL in env:
config["url"] = env[DVC_STUDIO_URL]
return config


def get_subrepo_relpath(repo: "Repo") -> str:
from dvc.fs import GitFileSystem

scm_root_dir = "/" if isinstance(repo.fs, GitFileSystem) else repo.scm.root_dir

relpath = as_posix(repo.fs.relpath(repo.root_dir, scm_root_dir))

return "" if relpath == "." else relpath
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ dependencies = [
"dvc-data>=3.13,<3.14",
"dvc-http>=2.29.0",
"dvc-render>=1.0.1,<2",
"dvc-studio-client>=0.19,<1",
"dvc-studio-client>=0.20,<1",
"dvc-task>=0.3.0,<1",
"flatten_dict<1,>=0.4.1",
# https://github.com/iterative/dvc/issues/9654
Expand Down
113 changes: 103 additions & 10 deletions tests/integration/test_studio_live_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@
DVC_STUDIO_TOKEN,
DVC_STUDIO_URL,
)
from dvc.repo import Repo
from dvc.testing.scripts import COPY_SCRIPT
from dvc.utils.studio import get_subrepo_relpath


@pytest.mark.studio
@pytest.mark.parametrize("tmp", [True, False])
@pytest.mark.parametrize("offline", [True, False])
def test_post_to_studio(
Expand Down Expand Up @@ -66,10 +70,45 @@ def test_post_to_studio(
}


@pytest.mark.studio
@pytest.mark.parametrize("tmp", [True, False])
def test_post_to_studio_custom_message(
tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp
):
def test_post_to_studio_subdir(tmp_dir, scm, mocker, monkeypatch, tmp):
live_exp_subdir = "project_a"

tmp_dir.scm_gen(
{
live_exp_subdir: {
"params.yaml": "foo: 1",
"metrics.yaml": "foo: 1",
"copy.py": COPY_SCRIPT.encode("utf-8"),
},
},
commit="git init",
)

project_a_dvc = Repo.init(tmp_dir / live_exp_subdir, subdir=True)
with monkeypatch.context() as m:
m.chdir(project_a_dvc.root_dir)

exp_stage = project_a_dvc.run(
cmd="python copy.py params.yaml metrics.yaml",
metrics_no_cache=["metrics.yaml"],
params=["foo"],
name="copy-file",
)

scm.add(
[
".gitignore",
"copy.py",
"dvc.lock",
"dvc.yaml",
"metrics.yaml",
"params.yaml",
]
)
scm.commit("dvc init project_a")

valid_response = mocker.MagicMock()
valid_response.status_code = 200
mocked_post = mocker.patch("requests.post", return_value=valid_response)
Expand All @@ -79,12 +118,16 @@ def test_post_to_studio_custom_message(
monkeypatch.setenv(env.STUDIO_TOKEN, "STUDIO_TOKEN")

baseline_sha = scm.get_rev()
exp_rev = first(
dvc.experiments.run(
exp_stage.addressing, params=["foo=1"], tmp_dir=tmp, message="foo"
with monkeypatch.context() as m:
m.chdir(project_a_dvc.root_dir)
exp_rev = first(
project_a_dvc.experiments.run(
exp_stage.addressing, params=["foo=24"], tmp_dir=tmp
)
)
)
name = dvc.experiments.get_exact_name([exp_rev])[exp_rev]

name = project_a_dvc.experiments.get_exact_name([exp_rev])[exp_rev]
project_a_dvc.close()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self: without calling close on the repo test_build_full_outs will fail when tmp is True. Example here.

assert mocked_post.call_count == 2

start_call = mocked_post.call_args_list[0]
Expand All @@ -94,7 +137,57 @@ def test_post_to_studio_custom_message(
"repo_url": "STUDIO_REPO_URL",
"baseline_sha": baseline_sha,
"name": name,
"params": {"params.yaml": {"foo": 1}},
"params": {"params.yaml": {"foo": 24}},
"subdir": live_exp_subdir,
"client": "dvc",
"message": "foo",
}


@pytest.mark.studio
def test_monorepo_relpath(tmp_dir, scm):
from dvc.repo.destroy import destroy

tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})

non_monorepo = Repo.init(tmp_dir)
assert get_subrepo_relpath(non_monorepo) == ""

destroy(non_monorepo)

monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)

assert get_subrepo_relpath(monorepo_project_a) == "project_a"

monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)

assert get_subrepo_relpath(monorepo_project_b) == "subdir/project_b"


@pytest.mark.studio
def test_virtual_monorepo_relpath(tmp_dir, scm):
from dvc.fs.git import GitFileSystem
from dvc.repo.destroy import destroy

tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
scm.commit("initial commit")
gfs = GitFileSystem(scm=scm, rev="master")

non_monorepo = Repo.init(tmp_dir)
non_monorepo.fs = gfs
non_monorepo.root_dir = "/"

assert get_subrepo_relpath(non_monorepo) == ""

destroy(non_monorepo)

monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
monorepo_project_a.fs = gfs
monorepo_project_a.root_dir = "/project_a"

assert get_subrepo_relpath(monorepo_project_a) == "project_a"

monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
monorepo_project_b.fs = gfs
monorepo_project_b.root_dir = "/subdir/project_b"

assert get_subrepo_relpath(monorepo_project_b) == "subdir/project_b"
48 changes: 0 additions & 48 deletions tests/unit/repo/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,51 +135,3 @@ def test_dynamic_cache_initialization(tmp_dir, scm):
dvc.close()

Repo(str(tmp_dir)).close()


def test_monorepo_relpath(tmp_dir, scm):
from dvc.repo.destroy import destroy

tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})

non_monorepo = Repo.init(tmp_dir)
assert non_monorepo.subrepo_relpath == ""

destroy(non_monorepo)

monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)

assert monorepo_project_a.subrepo_relpath == "project_a"

monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)

assert monorepo_project_b.subrepo_relpath == "subdir/project_b"


def test_virtual_monorepo_relpath(tmp_dir, scm):
from dvc.fs.git import GitFileSystem
from dvc.repo.destroy import destroy

tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
scm.commit("initial commit")
gfs = GitFileSystem(scm=scm, rev="master")

non_monorepo = Repo.init(tmp_dir)
non_monorepo.fs = gfs
non_monorepo.root_dir = "/"

assert non_monorepo.subrepo_relpath == ""

destroy(non_monorepo)

monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
monorepo_project_a.fs = gfs
monorepo_project_a.root_dir = "/project_a"

assert monorepo_project_a.subrepo_relpath == "project_a"

monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
monorepo_project_b.fs = gfs
monorepo_project_b.root_dir = "/subdir/project_b"

assert monorepo_project_b.subrepo_relpath == "subdir/project_b"
Loading