Skip to content

Commit

Permalink
plots: support x-dict in nested dvc.yaml (#10318)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattseddon authored Feb 26, 2024
1 parent 84a1750 commit 9f66897
Show file tree
Hide file tree
Showing 3 changed files with 227 additions and 5 deletions.
16 changes: 11 additions & 5 deletions dvc/repo/plots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,11 +423,17 @@ def _id_is_path(plot_props=None):

def _adjust_sources(fs, plot_props, config_dir):
new_plot_props = deepcopy(plot_props)
old_y = new_plot_props.pop("y", {})
new_y = {}
for filepath, val in old_y.items():
new_y[_normpath(fs.join(config_dir, filepath))] = val
new_plot_props["y"] = new_y
for axis in ["x", "y"]:
x_is_inferred = axis == "x" and (
axis not in new_plot_props or isinstance(new_plot_props[axis], str)
)
if x_is_inferred:
continue
old = new_plot_props.pop(axis, {})
new = {}
for filepath, val in old.items():
new[_normpath(fs.join(config_dir, filepath))] = val
new_plot_props[axis] = new
return new_plot_props


Expand Down
81 changes: 81 additions & 0 deletions tests/func/plots/test_show.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,87 @@ def test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run):
)


def test_plots_show_nested_x_dict(tmp_dir, dvc, scm):
rel_pipeline_dir = "pipelines/data-increment"

pipeline_rel_dvclive_metrics_dir = "dvclive/plots/metrics"
dvc_rel_dvclive_metrics_dir = (
f"{rel_pipeline_dir}/{pipeline_rel_dvclive_metrics_dir}"
)

pipeline_dir = tmp_dir / rel_pipeline_dir
dvclive_metrics_dir = pipeline_dir / pipeline_rel_dvclive_metrics_dir
dvclive_metrics_dir.mkdir(parents=True)

def _get_plot_defn(rel_dir: str) -> dict:
return {
"template": "simple",
"x": {f"{rel_dir}/Max_Leaf_Nodes.tsv": "Max_Leaf_Nodes"},
"y": {f"{rel_dir}/Error.tsv": "Error"},
}

(pipeline_dir / "dvc.yaml").dump(
{
"plots": [
{
"Error vs max_leaf_nodes": _get_plot_defn(
pipeline_rel_dvclive_metrics_dir
)
},
]
},
)

dvclive_metrics_dir.gen(
{
"Error.tsv": "step\tError\n" "0\t0.11\n" "1\t0.22\n" "2\t0.44\n",
"Max_Leaf_Nodes.tsv": "step\tMax_Leaf_Nodes\n"
"0\t5\n"
"1\t50\n"
"2\t500\n",
}
)

scm.commit("add dvc.yaml and dvclive metrics")

result = dvc.plots.show()
assert result == {
"workspace": {
"definitions": {
"data": {
f"{rel_pipeline_dir}/dvc.yaml": {
"data": {
"Error vs max_leaf_nodes": _get_plot_defn(
dvc_rel_dvclive_metrics_dir
)
},
}
}
},
"sources": {
"data": {
f"{dvc_rel_dvclive_metrics_dir}/Error.tsv": {
"data": [
{"Error": "0.11", "step": "0"},
{"Error": "0.22", "step": "1"},
{"Error": "0.44", "step": "2"},
],
"props": {},
},
f"{dvc_rel_dvclive_metrics_dir}/Max_Leaf_Nodes.tsv": {
"data": [
{"Max_Leaf_Nodes": "5", "step": "0"},
{"Max_Leaf_Nodes": "50", "step": "1"},
{"Max_Leaf_Nodes": "500", "step": "2"},
],
"props": {},
},
}
},
}
}


def test_dir_plots(tmp_dir, dvc, run_copy_metrics):
subdir = tmp_dir / "subdir"
subdir.mkdir()
Expand Down
135 changes: 135 additions & 0 deletions tests/integration/plots/test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -481,3 +481,138 @@ def test_repo_with_dvclive_plots(tmp_dir, capsys, repo_with_dvclive_plots):
}
assert json_result == expected_result
assert split_json_result == expected_result


@pytest.mark.vscode
def test_nested_x_defn_collection(tmp_dir, dvc, scm, capsys):
rel_pipeline_dir = "pipelines/data-increment"
pipeline_rel_dvclive_metrics_dir = "dvclive/plots/metrics"
pipeline_rel_other_logger_dir = "other/logger"

dvc_rel_dvclive_metrics_dir = (
f"{rel_pipeline_dir}/{pipeline_rel_dvclive_metrics_dir}"
)
dvc_rel_other_logger_dir = f"{rel_pipeline_dir}/{pipeline_rel_other_logger_dir}"

pipeline_dir = tmp_dir / rel_pipeline_dir
dvclive_metrics_dir = pipeline_dir / pipeline_rel_dvclive_metrics_dir
dvclive_metrics_dir.mkdir(parents=True)
other_logger_dir = pipeline_dir / pipeline_rel_other_logger_dir
other_logger_dir.mkdir(parents=True)

(pipeline_dir / "dvc.yaml").dump(
{
"plots": [
{
"Error vs max_leaf_nodes": {
"template": "simple",
"x": {
f"{pipeline_rel_dvclive_metrics_dir}"
"/Max_Leaf_Nodes.tsv": "Max_Leaf_Nodes"
},
"y": {f"{pipeline_rel_dvclive_metrics_dir}/Error.tsv": "Error"},
}
},
{
f"{pipeline_rel_other_logger_dir}/multiple_metrics.json": {
"x": "x",
"y": ["y1", "y2"],
},
},
{
f"{pipeline_rel_dvclive_metrics_dir}/Error.tsv": {"y": ["Error"]},
},
{
"max leaf nodes": {
"y": {
f"{pipeline_rel_dvclive_metrics_dir}"
"/Max_Leaf_Nodes.tsv": "Max_Leaf_Nodes"
}
},
},
]
},
)
dvclive_metrics_dir.gen(
{
"Error.tsv": "step\tError\n" "0\t0.11\n" "1\t0.22\n" "2\t0.44\n",
"Max_Leaf_Nodes.tsv": "step\tMax_Leaf_Nodes\n"
"0\t5\n"
"1\t50\n"
"2\t500\n",
}
)
(other_logger_dir / "multiple_metrics.json").dump(
[
{"x": 0, "y1": 0.1, "y2": 10},
{"x": 1, "y1": 0.2, "y2": 22},
]
)

scm.commit("add dvc.yaml and metrics")

_, _, split_json_result = call(capsys, subcommand="diff")
assert len(split_json_result.keys()) == 1
assert len(split_json_result["data"].keys()) == 4

separate_x_file = split_json_result["data"]["Error vs max_leaf_nodes"][0]

assert separate_x_file["anchor_definitions"]["<DVC_METRIC_DATA>"] == [
{"Error": "0.11", "Max_Leaf_Nodes": "5", "step": "0", "rev": "workspace"},
{"Error": "0.22", "Max_Leaf_Nodes": "50", "step": "1", "rev": "workspace"},
{"Error": "0.44", "Max_Leaf_Nodes": "500", "step": "2", "rev": "workspace"},
]

same_x_file = split_json_result["data"][
f"{dvc_rel_other_logger_dir}/multiple_metrics.json"
][0]
assert same_x_file["anchor_definitions"]["<DVC_METRIC_DATA>"] == [
{
"x": 0,
"y1": 0.1,
"y2": 10,
"dvc_inferred_y_value": 0.1,
"field": "y1",
"rev": "workspace",
},
{
"x": 1,
"y1": 0.2,
"y2": 22,
"dvc_inferred_y_value": 0.2,
"field": "y1",
"rev": "workspace",
},
{
"x": 0,
"y1": 0.1,
"y2": 10,
"dvc_inferred_y_value": 10,
"field": "y2",
"rev": "workspace",
},
{
"x": 1,
"y1": 0.2,
"y2": 22,
"dvc_inferred_y_value": 22,
"field": "y2",
"rev": "workspace",
},
]

inferred_x_from_str = split_json_result["data"][
f"{dvc_rel_dvclive_metrics_dir}/Error.tsv"
][0]
assert inferred_x_from_str["anchor_definitions"]["<DVC_METRIC_DATA>"] == [
{"step": 0, "Error": "0.11", "rev": "workspace"},
{"step": 1, "Error": "0.22", "rev": "workspace"},
{"step": 2, "Error": "0.44", "rev": "workspace"},
]

inferred_x_from_dict = split_json_result["data"]["max leaf nodes"][0]
assert inferred_x_from_dict["anchor_definitions"]["<DVC_METRIC_DATA>"] == [
{"step": 0, "Max_Leaf_Nodes": "5", "rev": "workspace"},
{"step": 1, "Max_Leaf_Nodes": "50", "rev": "workspace"},
{"step": 2, "Max_Leaf_Nodes": "500", "rev": "workspace"},
]

0 comments on commit 9f66897

Please sign in to comment.