Skip to content

Commit

Permalink
studio: add subdir to live metrics post messages to support live expe…
Browse files Browse the repository at this point in the history
…riments in monorepos (iterative#10303)
  • Loading branch information
mattseddon authored and BradyJ27 committed Apr 22, 2024
1 parent 8eaf1c8 commit 0030eb5
Show file tree
Hide file tree
Showing 6 changed files with 123 additions and 71 deletions.
11 changes: 0 additions & 11 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
)
from dvc.ignore import DvcIgnoreFilter
from dvc.log import logger
from dvc.utils import as_posix
from dvc.utils.objects import cached_property

if TYPE_CHECKING:
Expand Down Expand Up @@ -351,16 +350,6 @@ def fs(self, fs: "FileSystem"):
# fs.
self._reset()

@property
def subrepo_relpath(self) -> str:
from dvc.fs import GitFileSystem

scm_root_dir = "/" if isinstance(self.fs, GitFileSystem) else self.scm.root_dir

relpath = as_posix(self.fs.relpath(self.root_dir, scm_root_dir))

return "" if relpath == "." else relpath

@property
def data_index(self) -> "DataIndex":
from dvc_data.index import DataIndex
Expand Down
6 changes: 5 additions & 1 deletion dvc/repo/experiments/executor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@
from dvc.stage.serialize import to_lockfile
from dvc.utils import dict_sha256, env2bool, relpath
from dvc.utils.fs import remove
from dvc.utils.studio import env_to_config
from dvc.utils.studio import (
env_to_config,
get_subrepo_relpath,
)

if TYPE_CHECKING:
from queue import Queue
Expand Down Expand Up @@ -624,6 +627,7 @@ def _repro_dvc(
params=to_studio_params(dvc.params.show()),
dvc_studio_config=dvc_studio_config,
message=message,
subdir=get_subrepo_relpath(dvc),
)
logger.debug("Running repro in '%s'", os.getcwd())
yield dvc
Expand Down
14 changes: 14 additions & 0 deletions dvc/utils/studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,14 @@
DVC_STUDIO_URL,
)
from dvc.log import logger
from dvc.utils import as_posix

if TYPE_CHECKING:
from requests import Response

from dvc.repo import Repo


logger = logger.getChild(__name__)

STUDIO_URL = "https://studio.iterative.ai"
Expand Down Expand Up @@ -111,3 +115,13 @@ def env_to_config(env: dict[str, Any]) -> dict[str, Any]:
if DVC_STUDIO_URL in env:
config["url"] = env[DVC_STUDIO_URL]
return config


def get_subrepo_relpath(repo: "Repo") -> str:
from dvc.fs import GitFileSystem

scm_root_dir = "/" if isinstance(repo.fs, GitFileSystem) else repo.scm.root_dir

relpath = as_posix(repo.fs.relpath(repo.root_dir, scm_root_dir))

return "" if relpath == "." else relpath
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ dependencies = [
"dvc-data>=3.13,<3.14",
"dvc-http>=2.29.0",
"dvc-render>=1.0.1,<2",
"dvc-studio-client>=0.19,<1",
"dvc-studio-client>=0.20,<1",
"dvc-task>=0.3.0,<1",
"flatten_dict<1,>=0.4.1",
# https://github.com/iterative/dvc/issues/9654
Expand Down
113 changes: 103 additions & 10 deletions tests/integration/test_studio_live_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@
DVC_STUDIO_TOKEN,
DVC_STUDIO_URL,
)
from dvc.repo import Repo
from dvc.testing.scripts import COPY_SCRIPT
from dvc.utils.studio import get_subrepo_relpath


@pytest.mark.studio
@pytest.mark.parametrize("tmp", [True, False])
@pytest.mark.parametrize("offline", [True, False])
def test_post_to_studio(
Expand Down Expand Up @@ -66,10 +70,45 @@ def test_post_to_studio(
}


@pytest.mark.studio
@pytest.mark.parametrize("tmp", [True, False])
def test_post_to_studio_custom_message(
tmp_dir, dvc, scm, exp_stage, mocker, monkeypatch, tmp
):
def test_post_to_studio_subdir(tmp_dir, scm, mocker, monkeypatch, tmp):
live_exp_subdir = "project_a"

tmp_dir.scm_gen(
{
live_exp_subdir: {
"params.yaml": "foo: 1",
"metrics.yaml": "foo: 1",
"copy.py": COPY_SCRIPT.encode("utf-8"),
},
},
commit="git init",
)

project_a_dvc = Repo.init(tmp_dir / live_exp_subdir, subdir=True)
with monkeypatch.context() as m:
m.chdir(project_a_dvc.root_dir)

exp_stage = project_a_dvc.run(
cmd="python copy.py params.yaml metrics.yaml",
metrics_no_cache=["metrics.yaml"],
params=["foo"],
name="copy-file",
)

scm.add(
[
".gitignore",
"copy.py",
"dvc.lock",
"dvc.yaml",
"metrics.yaml",
"params.yaml",
]
)
scm.commit("dvc init project_a")

valid_response = mocker.MagicMock()
valid_response.status_code = 200
mocked_post = mocker.patch("requests.post", return_value=valid_response)
Expand All @@ -79,12 +118,16 @@ def test_post_to_studio_custom_message(
monkeypatch.setenv(env.STUDIO_TOKEN, "STUDIO_TOKEN")

baseline_sha = scm.get_rev()
exp_rev = first(
dvc.experiments.run(
exp_stage.addressing, params=["foo=1"], tmp_dir=tmp, message="foo"
with monkeypatch.context() as m:
m.chdir(project_a_dvc.root_dir)
exp_rev = first(
project_a_dvc.experiments.run(
exp_stage.addressing, params=["foo=24"], tmp_dir=tmp
)
)
)
name = dvc.experiments.get_exact_name([exp_rev])[exp_rev]

name = project_a_dvc.experiments.get_exact_name([exp_rev])[exp_rev]
project_a_dvc.close()
assert mocked_post.call_count == 2

start_call = mocked_post.call_args_list[0]
Expand All @@ -94,7 +137,57 @@ def test_post_to_studio_custom_message(
"repo_url": "STUDIO_REPO_URL",
"baseline_sha": baseline_sha,
"name": name,
"params": {"params.yaml": {"foo": 1}},
"params": {"params.yaml": {"foo": 24}},
"subdir": live_exp_subdir,
"client": "dvc",
"message": "foo",
}


@pytest.mark.studio
def test_monorepo_relpath(tmp_dir, scm):
from dvc.repo.destroy import destroy

tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})

non_monorepo = Repo.init(tmp_dir)
assert get_subrepo_relpath(non_monorepo) == ""

destroy(non_monorepo)

monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)

assert get_subrepo_relpath(monorepo_project_a) == "project_a"

monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)

assert get_subrepo_relpath(monorepo_project_b) == "subdir/project_b"


@pytest.mark.studio
def test_virtual_monorepo_relpath(tmp_dir, scm):
from dvc.fs.git import GitFileSystem
from dvc.repo.destroy import destroy

tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
scm.commit("initial commit")
gfs = GitFileSystem(scm=scm, rev="master")

non_monorepo = Repo.init(tmp_dir)
non_monorepo.fs = gfs
non_monorepo.root_dir = "/"

assert get_subrepo_relpath(non_monorepo) == ""

destroy(non_monorepo)

monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
monorepo_project_a.fs = gfs
monorepo_project_a.root_dir = "/project_a"

assert get_subrepo_relpath(monorepo_project_a) == "project_a"

monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
monorepo_project_b.fs = gfs
monorepo_project_b.root_dir = "/subdir/project_b"

assert get_subrepo_relpath(monorepo_project_b) == "subdir/project_b"
48 changes: 0 additions & 48 deletions tests/unit/repo/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,51 +135,3 @@ def test_dynamic_cache_initialization(tmp_dir, scm):
dvc.close()

Repo(str(tmp_dir)).close()


def test_monorepo_relpath(tmp_dir, scm):
from dvc.repo.destroy import destroy

tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})

non_monorepo = Repo.init(tmp_dir)
assert non_monorepo.subrepo_relpath == ""

destroy(non_monorepo)

monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)

assert monorepo_project_a.subrepo_relpath == "project_a"

monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)

assert monorepo_project_b.subrepo_relpath == "subdir/project_b"


def test_virtual_monorepo_relpath(tmp_dir, scm):
from dvc.fs.git import GitFileSystem
from dvc.repo.destroy import destroy

tmp_dir.gen({"project_a": {}, "subdir/project_b": {}})
scm.commit("initial commit")
gfs = GitFileSystem(scm=scm, rev="master")

non_monorepo = Repo.init(tmp_dir)
non_monorepo.fs = gfs
non_monorepo.root_dir = "/"

assert non_monorepo.subrepo_relpath == ""

destroy(non_monorepo)

monorepo_project_a = Repo.init(tmp_dir / "project_a", subdir=True)
monorepo_project_a.fs = gfs
monorepo_project_a.root_dir = "/project_a"

assert monorepo_project_a.subrepo_relpath == "project_a"

monorepo_project_b = Repo.init(tmp_dir / "subdir" / "project_b", subdir=True)
monorepo_project_b.fs = gfs
monorepo_project_b.root_dir = "/subdir/project_b"

assert monorepo_project_b.subrepo_relpath == "subdir/project_b"

0 comments on commit 0030eb5

Please sign in to comment.