Skip to content

Commit

Permalink
Merge branch 'iterative:main' into api_fix_config_no_scm
Browse files Browse the repository at this point in the history
  • Loading branch information
rgoya authored Nov 17, 2024
2 parents 3895cc0 + 1e08cc5 commit 5cb475f
Show file tree
Hide file tree
Showing 14 changed files with 110 additions and 84 deletions.
8 changes: 4 additions & 4 deletions .github/.test_durations
Original file line number Diff line number Diff line change
Expand Up @@ -835,9 +835,9 @@
"tests/func/test_dataset.py::test_dvc": 0.46001330000012786,
"tests/func/test_dataset.py::test_dvc_dataset_pipeline": 0.7948393000000351,
"tests/func/test_dataset.py::test_dvc_dump": 0.17996560000005957,
"tests/func/test_dataset.py::test_dvcx": 0.22482930000001033,
"tests/func/test_dataset.py::test_dvcx_dataset_pipeline": 0.43405649999999696,
"tests/func/test_dataset.py::test_dvcx_dump": 0.1858166000000665,
"tests/func/test_dataset.py::test_datachain": 0.22482930000001033,
"tests/func/test_dataset.py::test_datachain_dataset_pipeline": 0.43405649999999696,
"tests/func/test_dataset.py::test_datachain_dump": 0.1858166000000665,
"tests/func/test_dataset.py::test_invalidation": 0.16291839999996682,
"tests/func/test_dataset.py::test_parametrized": 0.19277699999997822,
"tests/func/test_dataset.py::test_pipeline_when_not_in_sync": 0.2063928999999689,
Expand Down Expand Up @@ -1627,7 +1627,7 @@
"tests/unit/command/test_data_sync.py::test_pull": 0.17549940000003517,
"tests/unit/command/test_data_sync.py::test_push": 0.16576090000000931,
"tests/unit/command/test_dataset.py::test_add[spec0-lock0-Adding ds (url:/path @ main)\\n]": 0.22054290000005494,
"tests/unit/command/test_dataset.py::test_add[spec1-lock1-Adding mydataset (dvcx://dataset @ v1)\\n]": 0.22979979999990974,
"tests/unit/command/test_dataset.py::test_add[spec1-lock1-Adding mydataset (dc://dataset @ v1)\\n]": 0.22979979999990974,
"tests/unit/command/test_dataset.py::test_add[spec2-lock2-Adding mydataset (s3://bucket/path)\\n]": 0.20264819999988504,
"tests/unit/command/test_dataset.py::test_add_already_exists": 0.2456899000001158,
"tests/unit/command/test_dataset.py::test_update[spec0-old_lock0-new_lock0-expected_outputs0-missing]": 0.2354315999998562,
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ jobs:
pytest ${{ matrix.pytestargs }} -n=logical --dist=worksteal --timeout=300 --durations=0
--cov --cov-report=xml --cov-report=term --durations-path=./.github/.test_durations
- name: upload coverage report
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v5
with:
file: ./coverage.xml
fail_ci_if_error: false
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ repos:
- id: sort-simple-yaml
- id: trailing-whitespace
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: 'v0.7.1'
rev: 'v0.7.3'
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
12 changes: 6 additions & 6 deletions dvc/api/dataset.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Literal, TypedDict, Union


class DVCXDataset(TypedDict):
type: Literal["dvcx"]
class DatachainDataset(TypedDict):
type: Literal["dc"]
name: str
version: int

Expand All @@ -20,7 +20,7 @@ class URLDataset(TypedDict):
path: str


def get(name: str) -> Union[DVCXDataset, DVCDataset, URLDataset]:
def get(name: str) -> Union[DatachainDataset, DVCDataset, URLDataset]:
from difflib import get_close_matches

from dvc.fs import get_cloud_fs
Expand All @@ -46,9 +46,9 @@ def get(name: str) -> Union[DVCXDataset, DVCDataset, URLDataset]:
path=dataset.lock.path,
sha=dataset.lock.rev_lock,
)
if dataset.type == "dvcx":
return DVCXDataset(
type="dvcx", name=dataset.name_version[0], version=dataset.lock.version
if dataset.type == "dc":
return DatachainDataset(
type="dc", name=dataset.name_version[0], version=dataset.lock.version
)
if dataset.type == "url":
fs_cls, _, path = get_cloud_fs(repo.config, url=dataset.lock.url)
Expand Down
12 changes: 6 additions & 6 deletions dvc/commands/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def display(cls, name: str, dataset: "Dataset", action: str = "Adding"):

url = dataset.spec.url
ver: str = ""
if dataset.type == "dvcx":
if dataset.type == "dc":
ver = f"v{dataset.lock.version}"
if dataset.type == "dvc":
if dataset.lock.path:
Expand All @@ -55,7 +55,7 @@ def run(self):
raise DvcException("--path can't be used without --dvc")

d = vars(self.args)
for key in ["dvc", "dvcx", "url"]:
for key in ["dvc", "dc", "url"]:
if url := d.pop(key, None):
d.update({"type": key, "url": url})
break
Expand Down Expand Up @@ -88,8 +88,8 @@ def display(self, name: str, dataset: "Dataset", new: "Dataset"):
assert new.lock

v: Optional[tuple[str, str]] = None
if dataset.type == "dvcx":
assert new.type == "dvcx"
if dataset.type == "dc":
assert new.type == "dc"
if new.lock.version < dataset.lock.version:
action = "Downgrading"

Expand Down Expand Up @@ -168,7 +168,7 @@ def add_parser(subparsers, parent_parser):

url_exclusive_group = ds_add_parser.add_mutually_exclusive_group(required=True)
url_exclusive_group.add_argument(
"--dvcx", metavar="name", help="Name of the dvcx dataset to track"
"--dc", metavar="name", help="Name of the DataChain dataset to track"
)
url_exclusive_group.add_argument(
"--dvc",
Expand Down Expand Up @@ -219,7 +219,7 @@ def add_parser(subparsers, parent_parser):
"--rev",
"--version",
nargs="?",
help="DVCX dataset version or Git revision (e.g. SHA, branch, tag)",
help="DataChain dataset version or Git revision (e.g. SHA, branch, tag)",
metavar="<version>",
)
ds_update_parser.set_defaults(func=CmdDatasetUpdate)
13 changes: 13 additions & 0 deletions dvc/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,19 @@ def global_config_dir():


def site_cache_dir():
from platformdirs import PlatformDirs
from platformdirs.unix import Unix

if issubclass(Unix, PlatformDirs):
# Return the cache directory shared by users, e.g. `/var/tmp/$appname`
# NOTE: platformdirs>=5 changed `site_cache_dir` to return /var/cache/$appname.
# as the following path is considered insecure.
# For details, see: https://github.com/tox-dev/platformdirs/pull/239

# FIXME: keeping the old behavior temporarily to avoid dependency conflict.
# In the future, consider migrating to a more secure directory.
return f"/var/tmp/{APPNAME}" # noqa: S108

return os.getenv(env.DVC_SITE_CACHE_DIR) or platformdirs.site_cache_dir(
APPNAME, APPAUTHOR, opinion=True
)
43 changes: 23 additions & 20 deletions dvc/repo/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from dvc_data.hashfile.meta import Meta

if TYPE_CHECKING:
from dvcx.dataset import DatasetRecord, DatasetVersion # type: ignore[import]
from datachain.dataset import DatasetRecord, DatasetVersion # type: ignore[import]
from typing_extensions import Self

from dvc.repo import Repo
Expand All @@ -29,10 +29,10 @@ def _get_dataset_record(name: str) -> "DatasetRecord":
from dvc.exceptions import DvcException

try:
from dvcx.catalog import get_catalog # type: ignore[import]
from datachain.catalog import get_catalog # type: ignore[import]

except ImportError as exc:
raise DvcException("dvcx is not installed") from exc
raise DvcException("datachain is not installed") from exc

catalog = get_catalog()
return catalog.get_remote_dataset(name)
Expand Down Expand Up @@ -86,7 +86,7 @@ def from_dict(cls, d: dict[str, Any]) -> "Self":
class DatasetSpec(SerDe):
name: str
url: str
type: Literal["dvc", "dvcx", "url"]
type: Literal["dvc", "dc", "url"]


@frozen(kw_only=True)
Expand All @@ -108,7 +108,7 @@ class DVCDatasetLock(DVCDatasetSpec):


@frozen(kw_only=True)
class DVCXDatasetLock(DatasetSpec):
class DatachainDatasetLock(DatasetSpec):
version: int
created_at: datetime = field(converter=to_datetime)

Expand Down Expand Up @@ -160,13 +160,13 @@ def update(self, repo, rev: Optional[str] = None, **kwargs) -> "Self":


@frozen(kw_only=True)
class DVCXDataset:
class DatachainDataset:
manifest_path: str
spec: "DatasetSpec"
lock: "Optional[DVCXDatasetLock]" = field(default=None)
lock: "Optional[DatachainDatasetLock]" = field(default=None)
_invalidated: bool = field(default=False, eq=False, repr=False)

type: ClassVar[Literal["dvcx"]] = "dvcx"
type: ClassVar[Literal["dc"]] = "dc"

@property
def pinned(self) -> bool:
Expand All @@ -193,7 +193,7 @@ def update(
name, _version = self.name_version
version = version if version is not None else _version
version_info = _get_dataset_info(name, record=record, version=version)
lock = DVCXDatasetLock(
lock = DatachainDatasetLock(
**self.spec.to_dict(),
version=version_info.version,
created_at=version_info.created_at,
Expand Down Expand Up @@ -226,9 +226,9 @@ def update(self, repo, **kwargs):
return evolve(self, lock=lock)


Lock = Union[DVCDatasetLock, DVCXDatasetLock, URLDatasetLock]
Lock = Union[DVCDatasetLock, DatachainDatasetLock, URLDatasetLock]
Spec = Union[DatasetSpec, DVCDatasetSpec]
Dataset = Union[DVCDataset, DVCXDataset, URLDataset]
Dataset = Union[DVCDataset, DatachainDataset, URLDataset]


class DatasetNotFoundError(DvcException, KeyError):
Expand Down Expand Up @@ -307,13 +307,13 @@ def _spec_from_info(spec: dict[str, Any]) -> Spec:
raise ValueError("type should be present in spec")
if typ == "dvc":
return DVCDatasetSpec.from_dict(spec)
if typ in {"dvcx", "url"}:
if typ in {"dc", "url"}:
return DatasetSpec.from_dict(spec)
raise ValueError(f"unknown dataset type: {spec.get('type', '')}")

@staticmethod
def _lock_from_info(lock: Optional[dict[str, Any]]) -> Optional[Lock]:
kl = {"dvc": DVCDatasetLock, "dvcx": DVCXDatasetLock, "url": URLDatasetLock}
kl = {"dvc": DVCDatasetLock, "dc": DatachainDatasetLock, "url": URLDatasetLock}
if lock and (cls := kl.get(lock.get("type", ""))): # type: ignore[assignment]
return cls.from_dict(lock) # type: ignore[attr-defined]
return None
Expand Down Expand Up @@ -356,9 +356,9 @@ def _build_dataset(
lock=lock,
invalidated=_invalidated,
)
if spec.type == "dvcx":
assert lock is None or isinstance(lock, DVCXDatasetLock)
return DVCXDataset(
if spec.type == "dc":
assert lock is None or isinstance(lock, DatachainDatasetLock)
return DatachainDataset(
manifest_path=manifest_path,
spec=spec,
lock=lock,
Expand All @@ -374,7 +374,7 @@ def add(
manifest_path: StrPath = "dvc.yaml",
**kwargs: Any,
) -> Dataset:
assert type in {"dvc", "dvcx", "url"}
assert type in {"dvc", "dc", "url"}
kwargs.update({"name": name, "url": url, "type": type})
dataset = self._build_dataset(os.path.abspath(manifest_path), kwargs)
dataset = dataset.update(self.repo)
Expand All @@ -389,13 +389,16 @@ def update(self, name, **kwargs) -> tuple[Dataset, Dataset]:

if dataset.type == "url" and (version or kwargs.get("rev")):
raise ValueError("cannot update version/revision for a url")
if dataset.type == "dvcx" and version is not None:
if dataset.type == "dc" and version is not None:
if not isinstance(version, int):
raise TypeError(
f"dvcx version has to be an integer, got {type(version).__name__!r}"
"DataChain dataset version has to be an integer, "
f"got {type(version).__name__!r}"
)
if version < 1:
raise ValueError(f"dvcx version should be >=1, got {version}")
raise ValueError(
f"DataChain dataset version should be >=1, got {version}"
)

new = dataset.update(self.repo, **kwargs)

Expand Down
5 changes: 3 additions & 2 deletions dvc/testing/benchmarks/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,8 @@ def _make_bench(name):
import pytest_benchmark.plugin

# hack from https://github.com/ionelmc/pytest-benchmark/issues/166
bench = pytest_benchmark.plugin.benchmark.__pytest_wrapped__.obj(request)
bench_gen = pytest_benchmark.plugin.benchmark.__pytest_wrapped__.obj(request)
bench = next(bench_gen)

suffix = f"-{name}"

Expand All @@ -172,7 +173,7 @@ def _bench_dvc(*args, **kwargs):
name = kwargs.pop("name", None)
name = f"-{name}" if name else ""
bench = make_bench(args[0] + name)
if request.config.getoption("--benchmark-cprofile-dump") or kwargs.pop(
if request.config.getoption("--dvc-benchmark-cprofile-dump") or kwargs.pop(
"cprofile", False
):
cprofile_results = request.config.invocation_params.dir / "prof"
Expand Down
2 changes: 1 addition & 1 deletion dvc/testing/benchmarks/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def pytest_addoption(parser):
)

parser.addoption(
"--benchmark-cprofile-dump",
"--dvc-benchmark-cprofile-dump",
action="store_true",
default=False,
help="Save cprofile results",
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ dependencies = [
"omegaconf",
"packaging>=19",
"pathspec>=0.10.3",
"platformdirs<4,>=3.1.1",
"platformdirs<5,>=3.1.1",
"psutil>=5.8",
"pydot>=1.2.4",
"pygtrie>=2.3.2",
Expand Down Expand Up @@ -99,7 +99,7 @@ s3 = ["dvc-s3>=3.2.0,<4"]
ssh = ["dvc-ssh>=4,<5"]
ssh_gssapi = ["dvc-ssh[gssapi]>=4,<5"]
testing = [
"pytest-benchmark[histogram]",
"pytest-benchmark[histogram]>=5,<6",
"pytest-test-utils",
"uv",
]
Expand Down
6 changes: 3 additions & 3 deletions tests/func/parsing/test_top_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_artifacts(tmp_dir, dvc):
def test_datasets(tmp_dir, dvc):
template = {
"datasets": [
{"name": "${ds1.name}", "url": "${ds1.url}", "type": "dvcx"},
{"name": "${ds1.name}", "url": "${ds1.url}", "type": "dc"},
{
"name": "${ds2.name}",
"url": "${ds2.url}",
Expand All @@ -138,7 +138,7 @@ def test_datasets(tmp_dir, dvc):

(tmp_dir / "params.yaml").dump(
{
"ds1": {"name": "dogs", "url": "dvcx://dogs"},
"ds1": {"name": "dogs", "url": "dc://dogs"},
"ds2": {
"name": "example-get-started",
"url": "[email protected]:iterative/example-get-started.git",
Expand All @@ -153,7 +153,7 @@ def test_datasets(tmp_dir, dvc):

resolver = DataResolver(dvc, tmp_dir, template)
assert resolver.resolve_datasets() == [
{"name": "dogs", "url": "dvcx://dogs", "type": "dvcx"},
{"name": "dogs", "url": "dc://dogs", "type": "dc"},
{
"name": "example-get-started",
"url": "[email protected]:iterative/example-get-started.git",
Expand Down
Loading

0 comments on commit 5cb475f

Please sign in to comment.