diff --git a/dvc/exceptions.py b/dvc/exceptions.py index 7623c31a44..ba3f8467e0 100644 --- a/dvc/exceptions.py +++ b/dvc/exceptions.py @@ -1,9 +1,12 @@ """Exceptions raised by the dvc.""" import errno -from typing import Dict, List +from typing import TYPE_CHECKING, Dict, List, Set from dvc.utils import format_link +if TYPE_CHECKING: + from dvc.stage import Stage + class DvcException(Exception): """Base class for all dvc exceptions.""" @@ -31,24 +34,25 @@ class OutputDuplicationError(DvcException): stages (list): list of paths to stages. """ - def __init__(self, output, stages): + def __init__(self, output: str, stages: Set["Stage"]): from funcy import first assert isinstance(output, str) assert all(hasattr(stage, "relpath") for stage in stages) - msg = "" - stage_names = [s.addressing for s in stages] - stages_str = " ".join(stage_names) if len(stages) == 1: - stage_name = first(stages) - msg = f"output '{output}' is already specified in {stage_name}." + stage = first(stages) + msg = ( + f"output '{output}' is already specified in {stage}." + f"\nUse `dvc remove {stage.addressing}` to stop tracking the " + "overlapping output." + ) else: - msg = "output '{}' is already specified in stages:\n{}".format( - output, "\n".join(f"\t- {s}" for s in stage_names) + stage_names = "\n".join(["\t- " + s.addressing for s in stages]) + msg = ( + f"output '{output}' is specified in:\n{stage_names}" + "\nUse `dvc remove` with any of the above targets to stop tracking the " + "overlapping output." ) - msg += ( - f"\nUse `dvc remove {stages_str}` to stop tracking the overlapping output." - ) super().__init__(msg) self.stages = stages self.output = output diff --git a/dvc/repo/add.py b/dvc/repo/add.py index 07cf83f7b8..4a1c580824 100644 --- a/dvc/repo/add.py +++ b/dvc/repo/add.py @@ -120,7 +120,7 @@ def translate_graph_error(stages: List["Stage"]) -> Iterator[None]: ) except OutputDuplicationError as exc: raise OutputDuplicationError( # noqa: B904 - exc.output, list(set(exc.stages) - set(stages)) + exc.output, set(exc.stages) - set(stages) ) diff --git a/dvc/repo/trie.py b/dvc/repo/trie.py index 63b329e65a..f33aa34d56 100644 --- a/dvc/repo/trie.py +++ b/dvc/repo/trie.py @@ -14,7 +14,7 @@ def build_outs_trie(stages): # Check for dup outs if out_key in outs: dup_stages = [stage, outs[out_key].stage] - raise OutputDuplicationError(str(out), dup_stages) + raise OutputDuplicationError(str(out), set(dup_stages)) # Check for overlapping outs if outs.has_subtrie(out_key): diff --git a/tests/func/test_add.py b/tests/func/test_add.py index e340e64220..8503045bf8 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -12,7 +12,11 @@ from dvc.cli import main from dvc.config import ConfigError from dvc.dvcfile import DVC_FILE_SUFFIX -from dvc.exceptions import DvcException, OverlappingOutputPathsError +from dvc.exceptions import ( + DvcException, + OutputDuplicationError, + OverlappingOutputPathsError, +) from dvc.fs import LocalFileSystem, system from dvc.output import ( OutputAlreadyTrackedError, @@ -22,7 +26,7 @@ from dvc.stage import Stage from dvc.stage.exceptions import StageExternalOutputsError, StagePathNotFoundError from dvc.utils.fs import path_isin -from dvc.utils.serialize import YAMLFileCorruptedError +from dvc.utils.serialize import YAMLFileCorruptedError, dump_yaml from dvc_data.hashfile.hash import file_md5 from dvc_data.hashfile.hash_info import HashInfo from tests.utils import get_gitignore_content @@ -656,6 +660,25 @@ def test_try_adding_pipeline_tracked_output(tmp_dir, dvc, run_copy): dvc.add("bar") +def test_try_adding_multiple_overlaps(tmp_dir, dvc): + tmp_dir.dvc_gen("foo", "foo") + dvcyaml_content = { + "stages": { + "echo-foo": { + "cmd": "echo foo > foo", + "outs": ["foo"], + } + } + } + dump_yaml("dvc.yaml", dvcyaml_content) + msg = ( + "\nUse `dvc remove` with any of the above targets to stop tracking the " + "overlapping output." + ) + with pytest.raises(OutputDuplicationError, match=msg): + dvc.add("foo") + + def test_add_pipeline_file(tmp_dir, dvc, run_copy): from dvc.dvcfile import PROJECT_FILE