From 0f4dbc937f30e6c6f5b90376c1e4581f6a8afba4 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Fri, 18 Aug 2023 01:01:51 +0200 Subject: [PATCH 001/117] feature: New classes for handling pipelines --- client/verta/docs/python.rst | 1 + client/verta/tests/conftest.py | 2 +- client/verta/tests/unit_tests/conftest.py | 143 +++++++++++- .../pipeline/test_pipeline_graph.py | 103 +++++++++ .../unit_tests/pipeline/test_pipeline_step.py | 134 +++++++++++ .../pipeline/test_registered_pipeline.py | 206 +++++++++++++++++ client/verta/tests/unit_tests/strategies.py | 101 ++++++++- client/verta/verta/pipeline/__init__.py | 16 ++ .../verta/verta/pipeline/_pipeline_graph.py | 89 ++++++++ client/verta/verta/pipeline/_pipeline_step.py | 213 ++++++++++++++++++ .../verta/pipeline/_registered_pipeline.py | 174 ++++++++++++++ 11 files changed, 1170 insertions(+), 12 deletions(-) create mode 100644 client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py create mode 100644 client/verta/tests/unit_tests/pipeline/test_pipeline_step.py create mode 100644 client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py create mode 100644 client/verta/verta/pipeline/__init__.py create mode 100644 client/verta/verta/pipeline/_pipeline_graph.py create mode 100644 client/verta/verta/pipeline/_pipeline_step.py create mode 100644 client/verta/verta/pipeline/_registered_pipeline.py diff --git a/client/verta/docs/python.rst b/client/verta/docs/python.rst index d5e2383e6a..5f4a02d3f9 100644 --- a/client/verta/docs/python.rst +++ b/client/verta/docs/python.rst @@ -29,6 +29,7 @@ Verta endpoint environment integrations + pipeline registry runtime tracking diff --git a/client/verta/tests/conftest.py b/client/verta/tests/conftest.py index bcb9ad8286..0ef7ba5c7e 100644 --- a/client/verta/tests/conftest.py +++ b/client/verta/tests/conftest.py @@ -503,7 +503,7 @@ def class_created_entities(): entity.delete() -@pytest.fixture +@pytest.fixture(scope="session") def model_version(registered_model): yield registered_model.get_or_create_version() diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index c7806ee4ac..5b4ae79c83 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -2,17 +2,24 @@ """Pytest fixtures for use in client unit tests.""" +import json import os +import random +from typing import Any, Callable, Dict, List from unittest.mock import patch +import hypothesis.strategies as st import pytest import responses +from tests.unit_tests.strategies import mock_pipeline_definition from verta._internal_utils._utils import Configuration, Connection from verta._protos.public.registry import RegistryService_pb2 as _RegistryService from verta.client import Client from verta.credentials import EmailCredentials from verta.endpoint import Endpoint +from verta.endpoint.resources import NvidiaGPU, NvidiaGPUModel, Resources +from verta.pipeline import PipelineGraph, PipelineStep from verta.registry.entities import RegisteredModelVersion @@ -64,15 +71,137 @@ def __repr__(self): # avoid network calls when displaying test results @pytest.fixture(scope="session") -def mock_registered_model_version(mock_conn, mock_config): - """Return a mocked object of the RegisteredModelVersion class for use in tests""" +def mock_simple_pipeline_definition() -> Dict[str, Any]: + """Return a mocked pipeline definition for use in tests""" + + def simple_pipeline_definition(id: int) -> Dict[str, Any]: + return { + "graph": [ + {"predecessors": [], "name": "step1"}, + {"predecessors": ["step1"], "name": "step2"}, + ], + "pipeline_version_id": id, + "steps": [ + { + "model_version_id": 1, + "name": "step1", + }, + { + "model_version_id": 2, + "name": "step2", + }, + ], + } + + return simple_pipeline_definition + + +@pytest.fixture(scope="session") +def make_mock_registered_model_version( + mock_conn, mock_config, mock_simple_pipeline_definition +) -> Callable: + """Return a callable function for creating mocked objects of the + RegisteredModelVersion class for use in tests that require multiple + unique instances. + """ class MockRegisteredModelVersion(RegisteredModelVersion): def __repr__(self): # avoid network calls when displaying test results return object.__repr__(self) - return MockRegisteredModelVersion( - mock_conn, - mock_config, - _RegistryService.ModelVersion(id=555, registered_model_id=123), - ) + def _get_artifact(self, key=None, artifact_type=None): + return json.dumps(mock_simple_pipeline_definition(id=self.id)).encode( + "utf-8" + ) + + def _make_mock_registered_model_version(): + """Return a mocked ``RegisteredModelVersion``. + + ``id`` and ``registered_model_id`` will be random and unique for the + test session. + + """ + model_ver_id = random.randint(1, 1000000) + reg_model_id = random.randint(1, 1000000) + + return MockRegisteredModelVersion( + mock_conn, + mock_config, + _RegistryService.ModelVersion( + id=model_ver_id, + registered_model_id=reg_model_id, + version="test_version_name", + ), + ) + + return _make_mock_registered_model_version + + +@pytest.fixture(scope="session") +def make_mock_pipeline_step(make_mock_registered_model_version) -> Callable: + """ + Return a callable function for creating mocked objects of the PipelineStep + class for use in tests that require multiple unique instances. + """ + + class MockPipelineStep(PipelineStep): + def __repr__(self): + return object.__repr__(self) + + def _make_mock_pipeline_step(): + return MockPipelineStep( + model_version=make_mock_registered_model_version(), + name=st.text(min_size=1), + predecessors=[], + ) + + return _make_mock_pipeline_step + + +@pytest.fixture(scope="session") +def make_mock_pipeline_graph(make_mock_pipeline_step) -> Callable: + """ + Return a callable function for creating mocked objects of the PipelineGraph + class for use in tests that require multiple unique instances. + """ + + class MockPipelineGraph(PipelineGraph): + def __repr__(self): + return object.__repr__(self) + + def _make_mock_pipeline_graph(): + step1 = make_mock_pipeline_step() + step1.set_name("step1") + step2 = make_mock_pipeline_step() + step2.set_name("step2") + step3 = make_mock_pipeline_step() + step3.set_name("step3") + return MockPipelineGraph(steps=[step1, step2, step3]) + + return _make_mock_pipeline_graph + + +@pytest.fixture(scope="session") +def make_mock_step_resources() -> Callable: + """ + Return a callable function for generating a list of mocked resources for + a given list of step names. + """ + + def _make_mock_step_resources(step_names: List[str]) -> Dict[str, Any]: + res = dict() + for name in step_names: + res.update( + { + name: Resources( + cpu=random.randint(1, 10), + memory="5Gi", + nvidia_gpu=NvidiaGPU( + model=NvidiaGPUModel.T4, number=random.randint(1, 10) + ), + ), + } + ) + return res + + return _make_mock_step_resources diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py new file mode 100644 index 0000000000..a84d8abd6d --- /dev/null +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for the PipelineGraph class +""" + +from hypothesis import given, HealthCheck, settings + +from tests.unit_tests.strategies import mock_pipeline_definition +from verta.pipeline import PipelineGraph + + +def test_set_steps(make_mock_pipeline_step) -> None: + """ + Test that the steps of a PipelineGraph can be set + """ + step_1 = make_mock_pipeline_step() + step_2 = make_mock_pipeline_step() + graph = PipelineGraph(steps=[]) + graph.set_steps([step_1, step_2]) + assert set(graph.steps) == set([step_1, step_2]) + graph.set_steps([]) + assert not graph.steps + + +@given(pipeline_definition=mock_pipeline_definition()) +@settings( + suppress_health_check=[HealthCheck.function_scoped_fixture], + deadline=None, +) +def test_from_definition( + mocked_responses, pipeline_definition, mock_conn, mock_config +) -> None: + """ + Test that a PipelineGraph object can be constructed from a pipeline + specification. The model version is fetched for each step, so a response + is mocked for each. In depth testing of each step is handled in + test_pipeline_step.test_steps_from_pipeline_spec. + """ + for step in pipeline_definition["steps"]: + mocked_responses.get( + f"https://test_socket/api/v1/registry/model_versions/{step['model_version_id']}", + json={"name": "test"}, + status=200, + ) + mocked_responses.get( + f"https://test_socket/api/v1/registry/registered_models/0", + json={}, + status=200, + ) + graph = PipelineGraph._from_definition( + pipeline_definition=pipeline_definition, conn=mock_conn, conf=mock_config + ) + assert isinstance(graph, PipelineGraph) + assert len(graph.steps) == len(pipeline_definition["steps"]) + + +def test_to_graph_definition(make_mock_pipeline_step) -> None: + """ + Test that a pipeline graph specification can be constructed from a + PipelineGraph object + """ + step_1 = make_mock_pipeline_step() + step_2 = make_mock_pipeline_step() + step_3 = make_mock_pipeline_step() + step_2.set_predecessors([step_1]) + step_3.set_predecessors([step_2]) + graph = PipelineGraph(steps=[step_1, step_2, step_3]) + graph_spec = graph._to_graph_definition() + assert graph_spec == [ + { + "name": step_1.name, + "predecessors": [], + }, + { + "name": step_2.name, + "predecessors": [step_1.name], + }, + { + "name": step_3.name, + "predecessors": [step_2.name], + }, + ] + + +def test_to_step_definition(make_mock_pipeline_step) -> None: + """ + Test that a pipeline steps specification can be constructed from a + PipelineGraph object. + """ + step_1 = make_mock_pipeline_step() + step_2 = make_mock_pipeline_step() + graph = PipelineGraph(steps=[step_1, step_2]) + step_specs = graph._to_steps_definition() + assert step_specs == [ + { + "name": step_1.name, + "model_version_id": step_1.model_version.id, + }, + { + "name": step_2.name, + "model_version_id": step_2.model_version.id, + }, + ] diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py new file mode 100644 index 0000000000..1a01b334fd --- /dev/null +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for the PipelineStep class +""" + +import random + +from hypothesis import given, HealthCheck, settings + +from tests.unit_tests.strategies import mock_pipeline_definition +from verta.pipeline import PipelineStep + + +@given(pipeline_definition=mock_pipeline_definition()) +@settings( + suppress_health_check=[HealthCheck.function_scoped_fixture], + deadline=None, +) +def test_steps_from_pipeline_definition( + pipeline_definition, + mock_conn, + mock_config, + mocked_responses, +) -> None: + """ + Test that a list of PipelineStep objects can be constructed and returned from + a pipeline definition. The registered model, model version, and environment + is fetched for each step, so a response is mocked for each call. + """ + graph = pipeline_definition["graph"] + for step in pipeline_definition["steps"]: + mocked_responses.get( + f"https://test_socket/api/v1/registry/model_versions/{step['model_version_id']}", + json={"model_version": step["model_version_id"]}, + status=200, + ) + mocked_responses.get( + f"https://test_socket/api/v1/registry/registered_models/0", + json={}, + status=200, + ) + generated_steps = PipelineStep._steps_from_pipeline_definition( + pipeline_definition=pipeline_definition, + conn=mock_conn, + conf=mock_config, + ) + # we have the same number of steps as in the pipeline definition + assert len(generated_steps) == len(pipeline_definition["steps"]) + for spec_step, gen_step in zip(pipeline_definition["steps"], generated_steps): + # each step is converted to a PipelineStep object + assert isinstance(gen_step, PipelineStep) + # the names are the same for the steps and their definitions + assert gen_step.name == spec_step["name"] + # predecessors for each step are also converted to PipelineStep objects + for i in gen_step.predecessors: + assert isinstance(i, PipelineStep) + # the predecessors for each step are the same as in the definition + assert set([i.name for i in gen_step.predecessors]) == set( + [s["predecessors"] for s in graph if gen_step.name == s["name"]][0] + ) + + +def test_to_step_spec(make_mock_registered_model_version) -> None: + """Test that a PipelineStep object can be converted to a step specification""" + mod_version = make_mock_registered_model_version() + step = PipelineStep( + model_version=mod_version, + name="test_name", + predecessors=[], # predecessors not included in step spec + ) + assert step._to_step_spec() == { + "name": "test_name", + "model_version_id": mod_version.id, + } + + +def test_to_graph_spec( + make_mock_registered_model_version, make_mock_pipeline_step +) -> None: + """Test that a PipelineStep object can be converted to a step specification""" + predecessors = [make_mock_pipeline_step() for _ in range(random.randint(1, 5))] + step = PipelineStep( + model_version=make_mock_registered_model_version(), + name="test_name", + predecessors=predecessors, + ) + assert step._to_graph_spec() == { + "name": "test_name", + "predecessors": [s.name for s in predecessors], + } + + +def test_set_predecessors_add( + make_mock_registered_model_version, make_mock_pipeline_step +) -> None: + """Test that predecessors can be added to a PipelineStep object""" + predecessor_1 = make_mock_pipeline_step() + predecessor_2 = make_mock_pipeline_step() + step = PipelineStep( + model_version=make_mock_registered_model_version(), + name="test_name", + predecessors=[predecessor_1], + ) + step.set_predecessors(step.predecessors + [predecessor_2]) + assert set(step.predecessors) == {predecessor_1, predecessor_2} + + +def test_set_predecessors_remove( + make_mock_registered_model_version, make_mock_pipeline_step +) -> None: + """Test that predecessors can be removed from a PipelineStep object""" + predecessors = [make_mock_pipeline_step() for _ in range(random.randint(2, 10))] + steps_to_remain = predecessors[: len(predecessors) // 2] + step = PipelineStep( + model_version=make_mock_registered_model_version(), + name="test_name", + predecessors=predecessors, + ) + step.set_predecessors(steps_to_remain) + assert set(step.predecessors) == set(steps_to_remain) + + +def test_change_model_version(make_mock_registered_model_version) -> None: + """Test that a PipelineStep object can have its model version changed""" + model_ver_1 = make_mock_registered_model_version() + model_ver_2 = make_mock_registered_model_version() + step = PipelineStep( + model_version=model_ver_1, + name="test_name", + predecessors=[], + ) + assert step.model_version == model_ver_1 + step.set_model_version(model_ver_2) + assert step.model_version == model_ver_2 diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py new file mode 100644 index 0000000000..5d65617f92 --- /dev/null +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -0,0 +1,206 @@ +# -*- coding: utf-8 -*- +""" +Unit tests for the RegisteredPipeline class +""" + +import pytest +from hypothesis import given, HealthCheck, settings + +from tests.unit_tests.strategies import mock_pipeline_definition +from verta.pipeline import RegisteredPipeline + + +def test_copy_graph( + make_mock_pipeline_graph, make_mock_registered_model_version +) -> None: + """Test that the graph of a RegisteredPipeline can be copied""" + graph = make_mock_pipeline_graph() + pipeline = RegisteredPipeline( + pipeline_graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) + copied_graph = pipeline.copy_graph() + assert copied_graph.steps == graph.steps # same steps + assert copied_graph != graph # different objects + + +@given(pipeline_definition=mock_pipeline_definition()) +@settings( + suppress_health_check=[HealthCheck.function_scoped_fixture], + deadline=None, +) +def test_log_pipeline_definition_artifact( + pipeline_definition, + mocked_responses, + make_mock_pipeline_graph, + make_mock_registered_model_version, +) -> None: + """ + Verify the expected sequence of calls when a pipeline definition + is logged as an artifact to the pipeline's model version. + """ + pipeline = RegisteredPipeline( + pipeline_graph=make_mock_pipeline_graph(), + registered_model_version=make_mock_registered_model_version(), + ) + # Fetch the model + mocked_responses.get( + f"https://test_socket/api/v1/registry/model_versions/{pipeline.id}", + json={}, + status=200, + ) + # Fetch the model version + mocked_responses.put( + f"https://test_socket/api/v1/registry/registered_models/0/model_versions/{pipeline.id}", + json={}, + status=200, + ) + # Fetch the artifact upload URL + mocked_responses.post( + f"https://test_socket/api/v1/registry/model_versions/{pipeline.id}/getUrlForArtifact", + json={ + "url": f"https://account.s3.amazonaws.com/development/ModelVersionEntity/" + f"{pipeline.id}/pipeline.json" + }, + status=200, + ) + # Upload the artifact + mocked_responses.put( + f"https://account.s3.amazonaws.com/development/ModelVersionEntity/{pipeline.id}/pipeline.json", + json={}, + status=200, + ) + pipeline._log_pipeline_definition_artifact() + + +def test_to_pipeline_definition( + make_mock_pipeline_graph, make_mock_registered_model_version +) -> None: + """ + Test that a pipeline definition can be constructed from a + RegisteredPipeline object. In depth testing of the `_to_graph_definition` + and `to_steps_definition` functions are handled in unit tests for + PipelineGraph. + """ + graph = make_mock_pipeline_graph() + pipeline = RegisteredPipeline( + pipeline_graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) + pipeline_definition = pipeline._to_pipeline_definition() + assert pipeline_definition == { + "pipeline_version_id": pipeline.id, + "graph": graph._to_graph_definition(), + "predecessors": graph._to_steps_definition(), + } + + +def test_to_pipeline_configuration_valid( + make_mock_pipeline_graph, + make_mock_registered_model_version, + make_mock_step_resources, +) -> None: + """ + Test that a valid pipeline configuration can be constructed from a + RegisteredPipeline object and a valid list of pipeline resources. + """ + graph = make_mock_pipeline_graph() + step_names = [step.name for step in graph.steps] + mock_res = make_mock_step_resources(step_names) + pipeline = RegisteredPipeline( + pipeline_graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) + + pipeline_configuration = pipeline._to_pipeline_configuration( + pipeline_resources=mock_res + ) + assert pipeline_configuration["pipeline_version_id"] == pipeline.id + for graph_step, config_step in zip(graph.steps, pipeline_configuration["steps"]): + # All steps are included in the configuration + assert graph_step.name == config_step["name"] + # All steps in the config have resources + assert "resources" in config_step.keys() + + +def test_to_pipeline_configuration_invalid_resources( + make_mock_pipeline_graph, + make_mock_registered_model_version, + make_mock_step_resources, +) -> None: + """ + Test that a ValueError is raised when an invalid step name is included + in the provided pipeline resources. (Does not match a step name in the + pipeline's graph) + """ + graph = make_mock_pipeline_graph() + step_names = [step.name for step in graph.steps] + mock_res = make_mock_step_resources(step_names) + mock_res["invalid_step_name"] = make_mock_step_resources(["invalid_step_name"]) + pipeline = RegisteredPipeline( + pipeline_graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) + + with pytest.raises(ValueError): + pipeline._to_pipeline_configuration(pipeline_resources=mock_res) + + +def test_to_pipeline_configuration_no_resources( + make_mock_pipeline_graph, make_mock_registered_model_version +) -> None: + """ + Test that a pipeline configuration can be constructed from a + RegisteredPipeline object without providing pipeline resources. + """ + graph = make_mock_pipeline_graph() + pipeline = RegisteredPipeline( + pipeline_graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) + + pipeline_configuration = pipeline._to_pipeline_configuration() + assert pipeline_configuration["pipeline_version_id"] == pipeline.id + for graph_step, config_step in zip(graph.steps, pipeline_configuration["steps"]): + # All steps are included in the configuration + assert graph_step.name == config_step["name"] + # All steps in the config have resources + assert "resources" not in config_step.keys() + + +def test_from_pipeline_definition( + make_mock_registered_model_version, + mock_conn, + mock_config, + mocked_responses, +) -> None: + """ + Test that a RegisteredPipeline object can be constructed from a pipeline + definition. The model version's `_get_artifact` function is mocked to + return a simple, consistent pipeline definition. Calls relates to the + fetching of the artifact are mocked. + """ + mocked_responses.get( + "https://test_socket/api/v1/registry/model_versions/1", + json={}, + status=200, + ) + mocked_responses.get( + "https://test_socket/api/v1/registry/model_versions/2", + json={}, + status=200, + ) + mocked_responses.get( + "https://test_socket/api/v1/registry/registered_models/0", + json={}, + status=200, + ) + + rmv = make_mock_registered_model_version() + pipeline = RegisteredPipeline._from_pipeline_definition( + registered_model_version=rmv, + conn=mock_conn, + conf=mock_config, + ) + assert isinstance(pipeline, RegisteredPipeline) + assert pipeline.id == rmv.id diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index 6241e0aee7..015928582d 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -1,18 +1,16 @@ # -*- coding: utf-8 -*- """Hypothesis composite strategies for use in client unit tests.""" - from string import ascii_letters, ascii_lowercase, hexdigits from typing import Any, Dict, Optional import hypothesis.strategies as st +from tests.strategies import json_strategy from verta._internal_utils._utils import _VALID_FLAT_KEY_CHARS, python_to_val_proto from verta._protos.public.common import CommonService_pb2 from verta._protos.public.modeldb.versioning import Code_pb2, Dataset_pb2 -from verta.endpoint import KafkaSettings, build - -from tests.strategies import json_strategy +from verta.endpoint import build, KafkaSettings @st.composite @@ -262,3 +260,98 @@ def mock_workspace(draw): ) ) return workspace + + +@st.composite +def mock_pipeline_definition(draw): + """Generate a mocked pipeline specification dictionary""" + + # step names in a pipeline must be unique + step_names = draw( + st.lists(st.text(min_size=5, max_size=25), min_size=5, max_size=5, unique=True) + ) + model_versions = draw( + st.lists( + st.text(alphabet=["1", "2", "3", "4", "5"], min_size=1), + min_size=5, + max_size=5, + unique=True, + ) + ) + + return { + "graph": [ + {"predecessors": [], "name": step_names[0]}, + {"predecessors": [step_names[0]], "name": step_names[1]}, + {"predecessors": [step_names[1]], "name": step_names[2]}, + {"predecessors": [step_names[1]], "name": step_names[3]}, + {"predecessors": [step_names[2], step_names[1]], "name": step_names[4]}, + ], + "pipeline_version_id": draw(st.integers(min_value=1, max_value=1000)), + "steps": [ + { + "model_version_id": model_versions[0], + "name": step_names[0], + }, + { + "model_version_id": model_versions[1], + "name": step_names[1], + }, + { + "model_version_id": model_versions[2], + "name": step_names[2], + }, + { + "model_version_id": model_versions[3], + "name": step_names[3], + }, + { + "model_version_id": model_versions[4], + "name": step_names[4], + }, + ], + } + + +@st.composite +def mock_pipeline_resources_dict(draw): + """Generate a mocked pipeline resources dictionary""" + return { + "resources": { + "cpu_millis": draw(st.integers(min_value=1)), + "memory": draw(st.text(min_size=1)), + "nvidia_gpu": { + "model": draw(st.enums("T4", "V100")), + "number": draw(st.integers(min_value=1, max_value=1000)), + }, + } + } + + +@st.composite +def mock_pipeline_step_configuration(draw): + """Generate a mocked pipeline step configuration dictionary""" + return { + "build_id": draw(st.integers(min_value=1)), + "env": draw( + st.dictionaries( + keys=st.text(min_size=1), + values=st.text(min_size=1), + min_size=1, + ) + ), + "resources": mock_pipeline_resources_dict(), + "name": draw(st.text(min_size=1)), + } + + +@st.composite +def mock_pipeline_configuration(draw): + """Generate a mocked pipeline step configuration dictionary with Kafka settings""" + return { + "pipeline_version_id": draw(st.integers(min_value=1)), + "steps": [ + mock_pipeline_step_configuration(), + mock_pipeline_step_configuration(), + ], + } diff --git a/client/verta/verta/pipeline/__init__.py b/client/verta/verta/pipeline/__init__.py new file mode 100644 index 0000000000..af64740fcb --- /dev/null +++ b/client/verta/verta/pipeline/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +"""Utilities for defining and interacting with pipelines.""" + +from verta._internal_utils import documentation +from ._pipeline_graph import PipelineGraph +from ._pipeline_step import PipelineStep +from ._registered_pipeline import RegisteredPipeline + +documentation.reassign_module( + [ + PipelineGraph, + PipelineStep, + RegisteredPipeline, + ], + module_name=__name__, +) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py new file mode 100644 index 0000000000..5789fb6809 --- /dev/null +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -0,0 +1,89 @@ +# -*- coding: utf-8 -*- + +from typing import Any, Dict, List + +from verta._internal_utils._utils import Configuration, Connection +from ._pipeline_step import PipelineStep + + +class PipelineGraph: + """ + A collection of PipelineSteps to be run as a single inference pipeline. + + Parameters + ---------- + steps : list of :class:`~verta.pipeline.PipelineStep` + List of all possible steps of the pipeline. Ordering of steps in the pipeline + itself is determined by the predecessors provided to each step, thus ordering + of this list is irrelevant. + + Attributes + ---------- + steps: list of :class:`~verta.deployment.PipelineStep` + List of PipelineSteps comprising all possible steps in the PiplineGraph. + """ + + def __init__(self, steps: List[PipelineStep]): + self._steps = steps + + def __repr__(self): + return "\n".join((f"\nPipelineGraph steps:\n{self._format_steps()}",)) + + def _format_steps(self): + """Format steps for improved readability in __repr__() function.""" + return "\n".join([str(s) for s in self._steps]) + + @property + def steps(self): + return self._steps + + @steps.setter + def steps(self, value): + raise AttributeError("cannot set attribute 'steps'; please use set_steps()") + + def set_steps(self, steps: List[str]) -> None: + """ + Set the list of steps for this PipelineGraph. + + Parameters + ---------- + steps : list of :class:`~verta.deployment.PipelineStep`, optional + List of all possible steps of the pipline graph. Order does not matter. + """ + if not isinstance(steps, list): + raise TypeError("steps must be a list of PipelineStep objects") + for step in steps: + if not isinstance(step, PipelineStep): + raise TypeError("steps must be a list of PipelineStep objects") + steps = list(set(steps)) + self._steps = steps + + @classmethod + def _from_definition( + cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration + ) -> "PipelineGraph": + """ + Create a PipelineGraph instance from a specification dict. + + Parameters + ---------- + pipeline_spec : dict + Specification dict from which to create the Pipeline. + """ + return cls( + steps=PipelineStep._steps_from_pipeline_definition( + pipeline_definition, conn, conf + ), + ) + + def _to_graph_definition(self) -> List[Dict[str, Any]]: + """ + Convert this PipelineGraph to a graph dict formatted for a pipeline definition. + """ + return [step._to_graph_spec() for step in self.steps] + + def _to_steps_definition(self) -> List[Dict[str, Any]]: + """ + Convert this PipelineGraph to a dict formatted for a pipeline definition. + """ + return [step._to_step_spec() for step in self.steps] diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py new file mode 100644 index 0000000000..c403011d23 --- /dev/null +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- + +from typing import Any, Dict, List, Optional + +from verta._internal_utils._utils import Configuration, Connection +from verta.registry.entities import RegisteredModel, RegisteredModelVersion + + +class PipelineStep: + """ + A single step within an inference pipeline, representing a single model version to be run. + + Parameters + ---------- + name : str + Name of the step, for use within the scope of the pipeline only. + model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + Registered model version to run for the step. + predecessors : list, optional + List of PipelineSteps whose outputs will be treated as inputs to this step. + If not included, the step is assumed to be an initial step. Values must be unique. + + Attributes + ---------- + name : str + Name of the step within the scope of the pipeline. + model_version + :class:`~verta.registry.entities.RegisteredModelVersion` run by this step. + predecessors : list + List of PipelineSteps whose outputs will be treated as inputs to this step. + """ + + def __init__( + self, + name: str, + model_version: RegisteredModelVersion, + predecessors: Optional[ + List["PipelineStep"] + ] = None, # Optional because it could be the first step with no predecessors + ): + self._name = name + self._model_version = model_version + self._predecessors = predecessors or list() + + # avoid the need to pass in connection params when building as local object + self._registered_model: Optional[RegisteredModel] = None + + # avoid additional http calls to refresh RMV cache. + self._registered_model_id = self._model_version.registered_model_id + + def __repr__(self) -> str: + sequence = ( + "\nPipelineStep:", + f"step name: {self.name}", + ) + if self._registered_model: # don't display on local-only objects` + sequence += (f"registered_model: {self._registered_model.name}",) + sequence += ( + f"registered_model_version: {self.model_version.name}", + f"registered_model_version_id: {self.model_version.id}", + f"predecessors: {[s.name for s in self.predecessors]}", + ) + return "\n".join(sequence) + + @property + def model_version(self) -> RegisteredModelVersion: + return self._model_version + + @model_version.setter + def model_version(self, value) -> None: + """Raise a more informative error than the default.""" + raise AttributeError( + "can't set attribute 'model_version'; please use set_model_version()" + ) + + def set_model_version(self, new_model_version: RegisteredModelVersion) -> None: + """ + Change the registered model version associated with this step. + + Parameters + ---------- + model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + Registered model version to use for the step. + """ + if not isinstance(new_model_version, RegisteredModelVersion): + raise TypeError( + f"model_version must be a RegisteredModelVersion object, not {type(new_model_version)}" + ) + self._model_version = new_model_version + + @property + def name(self) -> str: + return self._name + + @name.setter + def name(self, value) -> None: + """Raise a more informative error than the default.""" + raise AttributeError("can't set attribute 'name'; please use set_name()") + + def set_name(self, name: str) -> None: + """ + Change the name of this step. + + Parameters + ---------- + new_name : str + New name to use for the step. + """ + if not isinstance(name, str): + raise TypeError(f"name must be a string, not {type(name)}") + self._name = name + + @property + def predecessors(self) -> List["PipelineStep"]: + return list(set(self._predecessors)) # deduplicated + + @predecessors.setter + def predecessors(self, value) -> None: + """Raise a more informative error than the default.""" + raise AttributeError( + "can't set attribute 'predecessors'; please use set_predecessors()" + ) + + def set_predecessors(self, steps: List["PipelineStep"]) -> None: + """ + Set the predecessors associated with this step. + + Parameters + ---------- + steps : list + List of PipelineStep objects whose outputs will be treated as inputs to this step. + """ + if not isinstance(steps, list): + raise TypeError(f"predecessors must be type list, not {type(steps)}") + for step in steps: + if not isinstance(step, PipelineStep): + raise TypeError( + f"individual predecessors must be type PipelineStep, not {type(step)}" + ) + self._predecessors = steps + + def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: + """ + Fetch the registered model associated with this step's model version. + This is to provide important context to the user via the _repr_ method + when a registered pipeline is fetched from the backend. + """ + rm = RegisteredModel._get_by_id( + id=self._registered_model_id, conn=conn, conf=conf + ) + self._registered_model = rm + + @classmethod + def _steps_from_pipeline_definition( + cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration + ) -> List["PipelineStep"]: + """ + Return a list of PipelineStep objects by from a pipeline specification + + Parameters + ---------- + pipeline_definition : dict + Specification dictionary for the whole pipeline + conn : :class:`~verta._internal_utils._utils.Connection` + Connection object for fetching the model version associated with the step + + Returns + ------- + list of :class:`~verta._pipelines.PipelineStep` + List of steps in the pipeline spec as PipelineStep objects + """ + steps: List["PipelineStep"] = list() + for step in pipeline_definition["steps"]: + steps.append( + cls( + name=step["name"], + model_version=RegisteredModelVersion._get_by_id( + id=step["model_version_id"], conn=conn, conf=conf + ), + predecessors=[], + ) + ) + for step_object in steps: + step_object._get_registered_model(conn=conn, conf=conf) + predecessor_names = [ + s["predecessors"] + for s in pipeline_definition["graph"] + if s["name"] == step_object.name + ][0] + step_object.set_predecessors( + [s for s in steps if s.name in predecessor_names] + ) + return steps + + def _to_step_spec(self) -> Dict[str, Any]: + """ + Return a dictionary representation of this step, formatted for a + pipeline definition. + """ + return { + "name": self.name, + "model_version_id": self.model_version.id, + } + + def _to_graph_spec(self) -> Dict[str, Any]: + """ + Return a dictionary representation of predecessors for this step, + formatted for a pipeline definition. + """ + return { + "name": self.name, + "predecessors": [s.name for s in self.predecessors], + } diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py new file mode 100644 index 0000000000..5b178f4609 --- /dev/null +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- + +import json +import copy +from typing import Any, Dict, Optional + +from verta._internal_utils._utils import Configuration, Connection +from verta.endpoint.resources import Resources +from verta.pipeline import PipelineGraph +from verta.registry.entities import RegisteredModelVersion + + +class RegisteredPipeline: + """ + Object representing a version of a registered inference pipeline. + There should not be a need to instantiate this class directly; please use + :meth:`Client.create_registered_pipeline() ` + for creating a new pipeline, or + :meth:`Client.get-registered_pipeline() ` + for fetching existing pipelines. + + Attributes + ---------- + name: str + Name of this pipeline. + id: int + Auto-assigned ID of this Pipeline. + pipeline_graph: :class:`~verta.pipeline.PipelineGraph` + PipelineGraph object containing all possible steps in the Pipline. + """ + + def __init__( + self, + registered_model_version: RegisteredModelVersion, + pipeline_graph: PipelineGraph, + ): + """ + Create a Pipeline instance from an existing RegisteredModelVersion object + and the provided pipeline graph. Name and ID are captured once upon creation + to avoid additional HTTP calls to refresh the cache of the RMV, because + pipelines are immutable. + """ + self._registered_model_version = registered_model_version + self._name = self._registered_model_version.name + self._id = self._registered_model_version.id + self._pipeline_graph = pipeline_graph + + def __repr__(self): + return "\n".join( + ( + "RegisteredPipeline:", + f"pipeline name: {self.name}", + f"pipeline id: {self.id}", + f"\n{self._pipeline_graph}", + ) + ) + + @property + def name(self): + return self._name + + @property + def id(self): + return self._id + + @property + def pipeline_graph(self): + return self._pipeline_graph + + def copy_graph(self) -> PipelineGraph: + """ + Return a shallow copy of the PipelineGraph of this pipeline. RegisteredPipeline + objects are immutable once registered with Verta. This function returns + a PipelineGraph object that can be modified and used to create and register + a new RegisteredPipeline. + """ + return copy.copy(self.pipeline_graph) + + def _log_pipeline_definition_artifact(self) -> None: + """ + Log the pipeline definition as an artifact of the registered model version. + """ + self._registered_model_version.log_artifact( + "pipeline.json", self._to_pipeline_definition() + ) + + def _get_pipeline_definition_artifact(self) -> Dict[str, Any]: + """ + Get the pipeline definition artifact from the registered model version. + """ + return self._registered_model_version.get_artifact("pipeline.json") + + def _to_pipeline_definition(self) -> Dict[str, Any]: + """ + Create a complete pipeline definition dict from a name and PipelineGraph. + Used in conjunction with the client function for creating a registered + pipeline from a pipeline graph. + """ + return { + "pipeline_version_id": self.id, + "graph": self.pipeline_graph._to_graph_definition(), + "predecessors": self.pipeline_graph._to_steps_definition(), + } + + def _to_pipeline_configuration( + self, pipeline_resources: Optional[Dict[str, Resources]] = None + ) -> Dict[str, Any]: + """ + Build a pipeline configuration dict for this pipeline. The + `env` and `build` keys are not included in the configuration + resulting in default values being used by the backend. + + Parameters + ---------- + pipeline_resources : Resources + + Returns + ------- + Dictionary representation of a pipeline configuration. + """ + steps = list() + for step in self.pipeline_graph.steps: + step_config = { + "name": step.name, + } + if pipeline_resources: + step_res = pipeline_resources.get(step.name, None) + if step_res: + step_config["resources"] = pipeline_resources.pop( + step.name + )._as_dict() + steps.append(step_config) + if pipeline_resources: + raise ValueError( + f"pipeline_resources contains resources for steps not in " + f"the pipeline {pipeline_resources.keys()}" + ) + return { + "pipeline_version_id": self.id, + "steps": steps, + } + + @classmethod + def _from_pipeline_definition( + cls, + registered_model_version: RegisteredModelVersion, + conn: Connection, + conf: Configuration, + ) -> "RegisteredPipeline": + """ + Create a Pipeline instance from a specification dict. Used when + fetching a registered pipeline from the Verta backend. + + Parameters + ---------- + registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + RegisteredModelVersion object associated with this pipeline. + pipeline_definition : dict + Specification dict from which to create the Pipeline. + conn : :class:`~verta._internal_utils._utils.Connection` + Connection object for fetching the models and model versions associated with steps. + conf : :class:`~verta._internal_utils._utils.Configuration` + Configuration object for fetching the models and model versions associated with steps. + """ + pipeline_definition_str = registered_model_version.get_artifact( + "pipeline.json" + ).read() + pipeline_definition = json.loads(pipeline_definition_str) + return cls( + registered_model_version=registered_model_version, + pipeline_graph=PipelineGraph._from_definition( + pipeline_definition=pipeline_definition, conn=conn, conf=conf + ), + ) From ce4e3f3b3f7de0153229d0b385c0feaa6cd418be Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 10:46:52 +0200 Subject: [PATCH 002/117] fix: drop change to pytest fixture --- client/verta/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/conftest.py b/client/verta/tests/conftest.py index 0ef7ba5c7e..bcb9ad8286 100644 --- a/client/verta/tests/conftest.py +++ b/client/verta/tests/conftest.py @@ -503,7 +503,7 @@ def class_created_entities(): entity.delete() -@pytest.fixture(scope="session") +@pytest.fixture def model_version(registered_model): yield registered_model.get_or_create_version() From b1bd32db8ab467c2bc5ef6b7dab556d9758d2518 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 11:05:49 +0200 Subject: [PATCH 003/117] test: fix up unit tests for changes to mock rmv fixture --- .../tests/unit_tests/deployment/test_build.py | 9 +++++---- .../tests/unit_tests/deployment/test_endpoint.py | 16 ++++++++-------- .../registry/test_model_dependencies.py | 1 + 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/client/verta/tests/unit_tests/deployment/test_build.py b/client/verta/tests/unit_tests/deployment/test_build.py index f6d3eea30b..5fadbaaad1 100644 --- a/client/verta/tests/unit_tests/deployment/test_build.py +++ b/client/verta/tests/unit_tests/deployment/test_build.py @@ -60,14 +60,15 @@ def test_endpoint_get_current_build( @settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) @given(build_dicts=st.lists(build_dict(), unique_by=lambda d: d["id"])) def test_model_version_list_builds( - mock_registered_model_version, + make_mock_registered_model_version, mock_conn, mocked_responses, build_dicts, ): """Verify we can construct Build objects from list_builds().""" + rmv = make_mock_registered_model_version() registry_url = f"{mock_conn.scheme}://{mock_conn.socket}/api/v1/registry" - model_version_url = f"{registry_url}/registered_models/{mock_registered_model_version.registered_model_id}" + model_version_url = f"{registry_url}/registered_models/{rmv.registered_model_id}" deployment_url = f"{mock_conn.scheme}://{mock_conn.socket}/api/v1/deployment" list_builds_url = f"{deployment_url}/builds" @@ -77,7 +78,7 @@ def test_model_version_list_builds( status=200, match=[ query_param_matcher( - {"model_version_id": mock_registered_model_version.id}, + {"model_version_id": rmv.id}, ), ], json={"builds": build_dicts}, @@ -88,7 +89,7 @@ def test_model_version_list_builds( json={"workspace_id": "123"}, ) - builds = mock_registered_model_version.list_builds() + builds = rmv.list_builds() # verify builds are ordered by creation date assert [b.id for b in builds] == [ diff --git a/client/verta/tests/unit_tests/deployment/test_endpoint.py b/client/verta/tests/unit_tests/deployment/test_endpoint.py index ccca025f15..f2a5ea950f 100644 --- a/client/verta/tests/unit_tests/deployment/test_endpoint.py +++ b/client/verta/tests/unit_tests/deployment/test_endpoint.py @@ -145,7 +145,7 @@ def test_kafka_cluster_config_id_default( mock_endpoint, mock_conn, mocked_responses, - mock_registered_model_version, + make_mock_registered_model_version, ) -> None: """Verify that, while updating an endpoint, not including a `cluster_config_id` in the KafkaSettings results in the correct sequence of HTTP requests, including @@ -181,7 +181,7 @@ def test_kafka_cluster_config_id_default( ) mock_endpoint.update( - mock_registered_model_version, kafka_settings=kafka_settings + make_mock_registered_model_version(), kafka_settings=kafka_settings ) _responses.assert_call_count(get_configs_url, 1) @@ -193,7 +193,7 @@ def test_kafka_cluster_config_id_value( mock_endpoint, mock_conn, mocked_responses, - mock_registered_model_version, + make_mock_registered_model_version, ) -> None: """Verify that, while updating an endpoint, the provided value for `cluster_config_id` is used, resulting in the correct sequence of HTTP @@ -226,7 +226,7 @@ def test_kafka_cluster_config_id_value( url=stages_url + f"/{STAGE_ID}", status=200, json={"id": STAGE_ID} ) mock_endpoint.update( - mock_registered_model_version, kafka_settings=kafka_settings + make_mock_registered_model_version(), kafka_settings=kafka_settings ) @settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) @@ -241,7 +241,7 @@ def test_kafka_config_missing_config_id_exception( mock_endpoint, mock_conn, mocked_responses, - mock_registered_model_version, + make_mock_registered_model_version, ) -> None: """In the unlikely evert the ID of a found Kafka config is missing from the backend response, the expected exception is raised. @@ -259,7 +259,7 @@ def test_kafka_config_missing_config_id_exception( _responses.get(url=get_configs_url, status=200, json=kafka_configs_response) with pytest.raises(RuntimeError) as err: mock_endpoint.update( - mock_registered_model_version, kafka_settings=kafka_settings + make_mock_registered_model_version(), kafka_settings=kafka_settings ) assert ( str(err.value) @@ -275,7 +275,7 @@ def test_no_kafka_configs_found_exception( mock_endpoint, mock_conn, mocked_responses, - mock_registered_model_version, + make_mock_registered_model_version, ) -> None: """If no valid Kafka configurations are found, the expected exception is raised.""" deployment_url = f"{mock_conn.scheme}://{mock_conn.socket}/api/v1/deployment" @@ -290,7 +290,7 @@ def test_no_kafka_configs_found_exception( _responses.get(url=get_configs_url, status=200, json={"configurations": []}) with pytest.raises(RuntimeError) as err: mock_endpoint.update( - mock_registered_model_version, kafka_settings=kafka_settings + make_mock_registered_model_version(), kafka_settings=kafka_settings ) assert ( str(err.value) diff --git a/client/verta/tests/unit_tests/registry/test_model_dependencies.py b/client/verta/tests/unit_tests/registry/test_model_dependencies.py index d8579f4f25..8af9c2938e 100644 --- a/client/verta/tests/unit_tests/registry/test_model_dependencies.py +++ b/client/verta/tests/unit_tests/registry/test_model_dependencies.py @@ -145,6 +145,7 @@ def test_class_module_names(dependency_testing_model) -> None: 'requests', 'sklearn', 'torch', + 'typing', 'urllib3', 'verta', 'yaml', From d54fc0af490ac6c5d161f970ea4b9eecde35e556 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 11:13:42 +0200 Subject: [PATCH 004/117] test: update simple pipeline fixture and doc string to reflect that it is actually a factory fixture --- client/verta/tests/unit_tests/conftest.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 5b4ae79c83..a3a0abbe2a 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -71,8 +71,13 @@ def __repr__(self): # avoid network calls when displaying test results @pytest.fixture(scope="session") -def mock_simple_pipeline_definition() -> Dict[str, Any]: - """Return a mocked pipeline definition for use in tests""" +def make_mock_simple_pipeline_definition() -> Callable: + """ + Return a callable function for creating a simple mocked pipeline + definition for use in tests, including a parameter for the pipeline + id to ensure consistency in tests that mock creation of a pipeline + object from a pipeline definition. + """ def simple_pipeline_definition(id: int) -> Dict[str, Any]: return { @@ -98,7 +103,7 @@ def simple_pipeline_definition(id: int) -> Dict[str, Any]: @pytest.fixture(scope="session") def make_mock_registered_model_version( - mock_conn, mock_config, mock_simple_pipeline_definition + mock_conn, mock_config, make_mock_simple_pipeline_definition ) -> Callable: """Return a callable function for creating mocked objects of the RegisteredModelVersion class for use in tests that require multiple @@ -110,7 +115,7 @@ def __repr__(self): # avoid network calls when displaying test results return object.__repr__(self) def _get_artifact(self, key=None, artifact_type=None): - return json.dumps(mock_simple_pipeline_definition(id=self.id)).encode( + return json.dumps(make_mock_simple_pipeline_definition(id=self.id)).encode( "utf-8" ) From 2d01a7034c7dcb3cc1c9d1cbc1c6e964b96e9414 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 11:36:49 +0200 Subject: [PATCH 005/117] test: guarantee uniqueness in mock rmv fixture ids --- client/verta/tests/unit_tests/conftest.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index a3a0abbe2a..dd5d47c07c 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -126,8 +126,16 @@ def _make_mock_registered_model_version(): test session. """ + ids = set() model_ver_id = random.randint(1, 1000000) + while model_ver_id in ids: + model_ver_id = random.randint(1, 1000000) + ids.add(model_ver_id) + reg_model_id = random.randint(1, 1000000) + while reg_model_id in ids: + reg_model_id = random.randint(1, 1000000) + ids.add(reg_model_id) return MockRegisteredModelVersion( mock_conn, From 4a237cc5f69e8ad57bf482590248eb25c4466d9c Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:37:57 +0200 Subject: [PATCH 006/117] Update client/verta/tests/unit_tests/conftest.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index dd5d47c07c..f1944a0720 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -158,7 +158,7 @@ class for use in tests that require multiple unique instances. """ class MockPipelineStep(PipelineStep): - def __repr__(self): + def __repr__(self): # avoid network calls when displaying test results return object.__repr__(self) def _make_mock_pipeline_step(): From b7a07c64a95523acde6f1510e003c6092eb5377d Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:39:16 +0200 Subject: [PATCH 007/117] Update client/verta/tests/unit_tests/conftest.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index f1944a0720..85a8e1e6dd 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -179,7 +179,7 @@ class for use in tests that require multiple unique instances. """ class MockPipelineGraph(PipelineGraph): - def __repr__(self): + def __repr__(self): # avoid network calls when displaying test results return object.__repr__(self) def _make_mock_pipeline_graph(): From 2e2438760484df70b2520f64d1d077188fac7a03 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 11:42:14 +0200 Subject: [PATCH 008/117] test: use static string for name in mock pipe step --- client/verta/tests/unit_tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 85a8e1e6dd..5baa7dbcdd 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -164,7 +164,7 @@ def __repr__(self): # avoid network calls when displaying test results def _make_mock_pipeline_step(): return MockPipelineStep( model_version=make_mock_registered_model_version(), - name=st.text(min_size=1), + name="test_pipeline_step_name", predecessors=[], ) From b8e70ff5e3749fc1715e191831fa2ea2b4a803bc Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:43:56 +0200 Subject: [PATCH 009/117] Update client/verta/tests/unit_tests/conftest.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 5baa7dbcdd..c70264c493 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -201,7 +201,7 @@ def make_mock_step_resources() -> Callable: a given list of step names. """ - def _make_mock_step_resources(step_names: List[str]) -> Dict[str, Any]: + def _make_mock_step_resources(step_names: List[str]) -> Dict[str, Resources]: res = dict() for name in step_names: res.update( From c6bb7e1be4141fdca2c9e8cfaf9c574e6bfb654e Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:46:13 +0200 Subject: [PATCH 010/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index a84d8abd6d..6875459d54 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -82,7 +82,7 @@ def test_to_graph_definition(make_mock_pipeline_step) -> None: ] -def test_to_step_definition(make_mock_pipeline_step) -> None: +def test_to_steps_definition(make_mock_pipeline_step) -> None: """ Test that a pipeline steps specification can be constructed from a PipelineGraph object. From ed63443a473348dee91899cc65bd6365b144ff56 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 11:48:39 +0200 Subject: [PATCH 011/117] test: change naming of function scoped variable --- .../verta/tests/unit_tests/pipeline/test_pipeline_step.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 1a01b334fd..dcaa55f95d 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -62,15 +62,15 @@ def test_steps_from_pipeline_definition( def test_to_step_spec(make_mock_registered_model_version) -> None: """Test that a PipelineStep object can be converted to a step specification""" - mod_version = make_mock_registered_model_version() + model_version = make_mock_registered_model_version() step = PipelineStep( - model_version=mod_version, + model_version=model_version, name="test_name", predecessors=[], # predecessors not included in step spec ) assert step._to_step_spec() == { "name": "test_name", - "model_version_id": mod_version.id, + "model_version_id": model_version.id, } From 6b6443ffdef4a74c3fc8dba9119045668f011983 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:49:39 +0200 Subject: [PATCH 012/117] Update client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 5d65617f92..6128e59913 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -21,7 +21,7 @@ def test_copy_graph( ) copied_graph = pipeline.copy_graph() assert copied_graph.steps == graph.steps # same steps - assert copied_graph != graph # different objects + assert copied_graph is not graph # different objects @given(pipeline_definition=mock_pipeline_definition()) From 4828aabb0553a890bb631d52edeac053a60cc73b Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 11:54:10 +0200 Subject: [PATCH 013/117] test: update mocked responses to pull connection scheme and socket dynamically in case of changes to fixture --- .../unit_tests/pipeline/test_registered_pipeline.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 6128e59913..cbb3583830 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -39,25 +39,26 @@ def test_log_pipeline_definition_artifact( Verify the expected sequence of calls when a pipeline definition is logged as an artifact to the pipeline's model version. """ + rmv = make_mock_registered_model_version() pipeline = RegisteredPipeline( pipeline_graph=make_mock_pipeline_graph(), - registered_model_version=make_mock_registered_model_version(), + registered_model_version=rmv, ) # Fetch the model mocked_responses.get( - f"https://test_socket/api/v1/registry/model_versions/{pipeline.id}", + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/model_versions/{pipeline.id}", json={}, status=200, ) # Fetch the model version mocked_responses.put( - f"https://test_socket/api/v1/registry/registered_models/0/model_versions/{pipeline.id}", + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/registered_models/0/model_versions/{pipeline.id}", json={}, status=200, ) # Fetch the artifact upload URL mocked_responses.post( - f"https://test_socket/api/v1/registry/model_versions/{pipeline.id}/getUrlForArtifact", + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/model_versions/{pipeline.id}/getUrlForArtifact", json={ "url": f"https://account.s3.amazonaws.com/development/ModelVersionEntity/" f"{pipeline.id}/pipeline.json" From b16ec5fd204e4a77713d1e9fa246f8c4fd5b7af7 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 11:57:16 +0200 Subject: [PATCH 014/117] test: fix incorrect comment line --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index cbb3583830..a546530640 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -165,7 +165,7 @@ def test_to_pipeline_configuration_no_resources( for graph_step, config_step in zip(graph.steps, pipeline_configuration["steps"]): # All steps are included in the configuration assert graph_step.name == config_step["name"] - # All steps in the config have resources + # No resources are found in the configuration assert "resources" not in config_step.keys() From 1a1f826009e0580b78768daad47cfc62d37f2310 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 11:58:26 +0200 Subject: [PATCH 015/117] Update client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index a546530640..6d6f8037b9 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -178,7 +178,7 @@ def test_from_pipeline_definition( """ Test that a RegisteredPipeline object can be constructed from a pipeline definition. The model version's `_get_artifact` function is mocked to - return a simple, consistent pipeline definition. Calls relates to the + return a simple, consistent pipeline definition. Calls related to the fetching of the artifact are mocked. """ mocked_responses.get( From 692c36930113fe765c271d9f39570c910de4f791 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 12:27:57 +0200 Subject: [PATCH 016/117] test: drop unused strategies and remove 'mock' from naming convention to avoid confusion with pytest fixtures --- client/verta/tests/unit_tests/conftest.py | 2 - .../pipeline/test_pipeline_graph.py | 4 +- .../unit_tests/pipeline/test_pipeline_step.py | 4 +- .../pipeline/test_registered_pipeline.py | 4 +- client/verta/tests/unit_tests/strategies.py | 46 +------------------ 5 files changed, 7 insertions(+), 53 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index c70264c493..609b81c9da 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -8,11 +8,9 @@ from typing import Any, Callable, Dict, List from unittest.mock import patch -import hypothesis.strategies as st import pytest import responses -from tests.unit_tests.strategies import mock_pipeline_definition from verta._internal_utils._utils import Configuration, Connection from verta._protos.public.registry import RegistryService_pb2 as _RegistryService from verta.client import Client diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 6875459d54..3342ceb6ac 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -5,7 +5,7 @@ from hypothesis import given, HealthCheck, settings -from tests.unit_tests.strategies import mock_pipeline_definition +from tests.unit_tests.strategies import pipeline_definition from verta.pipeline import PipelineGraph @@ -22,7 +22,7 @@ def test_set_steps(make_mock_pipeline_step) -> None: assert not graph.steps -@given(pipeline_definition=mock_pipeline_definition()) +@given(pipeline_definition=pipeline_definition()) @settings( suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None, diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index dcaa55f95d..15afc533f3 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -7,11 +7,11 @@ from hypothesis import given, HealthCheck, settings -from tests.unit_tests.strategies import mock_pipeline_definition +from tests.unit_tests.strategies import pipeline_definition from verta.pipeline import PipelineStep -@given(pipeline_definition=mock_pipeline_definition()) +@given(pipeline_definition=pipeline_definition()) @settings( suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None, diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 6d6f8037b9..e427b4e354 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -6,7 +6,7 @@ import pytest from hypothesis import given, HealthCheck, settings -from tests.unit_tests.strategies import mock_pipeline_definition +from tests.unit_tests.strategies import pipeline_definition from verta.pipeline import RegisteredPipeline @@ -24,7 +24,7 @@ def test_copy_graph( assert copied_graph is not graph # different objects -@given(pipeline_definition=mock_pipeline_definition()) +@given(pipeline_definition=pipeline_definition()) @settings( suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None, diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index 015928582d..31c166a871 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -263,7 +263,7 @@ def mock_workspace(draw): @st.composite -def mock_pipeline_definition(draw): +def pipeline_definition(draw): """Generate a mocked pipeline specification dictionary""" # step names in a pipeline must be unique @@ -311,47 +311,3 @@ def mock_pipeline_definition(draw): }, ], } - - -@st.composite -def mock_pipeline_resources_dict(draw): - """Generate a mocked pipeline resources dictionary""" - return { - "resources": { - "cpu_millis": draw(st.integers(min_value=1)), - "memory": draw(st.text(min_size=1)), - "nvidia_gpu": { - "model": draw(st.enums("T4", "V100")), - "number": draw(st.integers(min_value=1, max_value=1000)), - }, - } - } - - -@st.composite -def mock_pipeline_step_configuration(draw): - """Generate a mocked pipeline step configuration dictionary""" - return { - "build_id": draw(st.integers(min_value=1)), - "env": draw( - st.dictionaries( - keys=st.text(min_size=1), - values=st.text(min_size=1), - min_size=1, - ) - ), - "resources": mock_pipeline_resources_dict(), - "name": draw(st.text(min_size=1)), - } - - -@st.composite -def mock_pipeline_configuration(draw): - """Generate a mocked pipeline step configuration dictionary with Kafka settings""" - return { - "pipeline_version_id": draw(st.integers(min_value=1)), - "steps": [ - mock_pipeline_step_configuration(), - mock_pipeline_step_configuration(), - ], - } From 63d0f6cab5172e1fa2a358d582c4177439010baf Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 12:32:26 +0200 Subject: [PATCH 017/117] test: refactor pipeline definition strategy to be linear pipeline --- client/verta/tests/unit_tests/strategies.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index 31c166a871..809a3abcd5 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -284,8 +284,8 @@ def pipeline_definition(draw): {"predecessors": [], "name": step_names[0]}, {"predecessors": [step_names[0]], "name": step_names[1]}, {"predecessors": [step_names[1]], "name": step_names[2]}, - {"predecessors": [step_names[1]], "name": step_names[3]}, - {"predecessors": [step_names[2], step_names[1]], "name": step_names[4]}, + {"predecessors": [step_names[2]], "name": step_names[3]}, + {"predecessors": [step_names[3]], "name": step_names[4]}, ], "pipeline_version_id": draw(st.integers(min_value=1, max_value=1000)), "steps": [ From 362c6b0c557bb0a261bec8dd1393c8eb33d8f9f1 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 12:59:49 +0200 Subject: [PATCH 018/117] Update client/verta/verta/pipeline/_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 5789fb6809..776250e134 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -24,7 +24,7 @@ class PipelineGraph: """ def __init__(self, steps: List[PipelineStep]): - self._steps = steps + self._steps = self.set_steps(steps) def __repr__(self): return "\n".join((f"\nPipelineGraph steps:\n{self._format_steps()}",)) From a4389dec169b4dd1d6610cad99b93f73ec380141 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 13:23:07 +0200 Subject: [PATCH 019/117] fix: make pipeline_step set_steps function return the steps --- client/verta/verta/pipeline/_pipeline_graph.py | 1 + 1 file changed, 1 insertion(+) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 776250e134..6c4864fee3 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -57,6 +57,7 @@ def set_steps(self, steps: List[str]) -> None: raise TypeError("steps must be a list of PipelineStep objects") steps = list(set(steps)) self._steps = steps + return self.steps @classmethod def _from_definition( From 37f9bbc5230b701d399f11f5751e93ff5ac27ab8 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 13:31:37 +0200 Subject: [PATCH 020/117] fix: update repr formatting for PipelineGraph --- client/verta/verta/pipeline/_pipeline_graph.py | 2 +- client/verta/verta/pipeline/_pipeline_step.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 6c4864fee3..ab2476d77e 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -27,7 +27,7 @@ def __init__(self, steps: List[PipelineStep]): self._steps = self.set_steps(steps) def __repr__(self): - return "\n".join((f"\nPipelineGraph steps:\n{self._format_steps()}",)) + return f"\nPipelineGraph steps:\n{self._format_steps()}" def _format_steps(self): """Format steps for improved readability in __repr__() function.""" diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index c403011d23..43c5de8111 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -50,7 +50,7 @@ def __init__( def __repr__(self) -> str: sequence = ( - "\nPipelineStep:", + "\n PipelineStep:", f"step name: {self.name}", ) if self._registered_model: # don't display on local-only objects` @@ -60,7 +60,7 @@ def __repr__(self) -> str: f"registered_model_version_id: {self.model_version.id}", f"predecessors: {[s.name for s in self.predecessors]}", ) - return "\n".join(sequence) + return "\n ".join(sequence) @property def model_version(self) -> RegisteredModelVersion: From 1c18e237a94bc4d16a39ec9143cf295aab4d02e4 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 13:33:08 +0200 Subject: [PATCH 021/117] Update client/verta/verta/pipeline/_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index ab2476d77e..2131a96811 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -31,7 +31,7 @@ def __repr__(self): def _format_steps(self): """Format steps for improved readability in __repr__() function.""" - return "\n".join([str(s) for s in self._steps]) + return "\n".join([repr(s) for s in self._steps]) @property def steps(self): From 947b99479e31784bc5bcd1b4168fec6ca4f43bea Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 13:35:34 +0200 Subject: [PATCH 022/117] Update client/verta/verta/pipeline/_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_step.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 43c5de8111..e6078bfc6e 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -24,8 +24,8 @@ class PipelineStep: ---------- name : str Name of the step within the scope of the pipeline. - model_version - :class:`~verta.registry.entities.RegisteredModelVersion` run by this step. + model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + Model version being run by this step. predecessors : list List of PipelineSteps whose outputs will be treated as inputs to this step. """ From 62ea6bb332dea161b128803438b70b4607954cd6 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 13:49:00 +0200 Subject: [PATCH 023/117] fix: remove 'optional' tag in doc string for required variable --- client/verta/verta/pipeline/_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 2131a96811..3315f48a46 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -47,7 +47,7 @@ def set_steps(self, steps: List[str]) -> None: Parameters ---------- - steps : list of :class:`~verta.deployment.PipelineStep`, optional + steps : list of :class:`~verta.deployment.PipelineStep` List of all possible steps of the pipline graph. Order does not matter. """ if not isinstance(steps, list): From 97e940e872e60bc27066d8ef900be1cc00034e09 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 13:53:24 +0200 Subject: [PATCH 024/117] fix: use more sepcific error messaging for type violations when setting steps of a graph --- client/verta/verta/pipeline/_pipeline_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 3315f48a46..e2de574aa8 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -51,10 +51,10 @@ def set_steps(self, steps: List[str]) -> None: List of all possible steps of the pipline graph. Order does not matter. """ if not isinstance(steps, list): - raise TypeError("steps must be a list of PipelineStep objects") + raise TypeError(f"steps must be type list, not {type(steps)}") for step in steps: if not isinstance(step, PipelineStep): - raise TypeError("steps must be a list of PipelineStep objects") + raise TypeError(f"individual steps must be type PipelineStep, not {type(step)}") steps = list(set(steps)) self._steps = steps return self.steps From eb44a2115fd7f8278dd9dba84e761cd4489b2814 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 13:55:18 +0200 Subject: [PATCH 025/117] docs: update doc string with missing params --- client/verta/verta/pipeline/_pipeline_graph.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index e2de574aa8..2d117b8359 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -70,6 +70,10 @@ def _from_definition( ---------- pipeline_spec : dict Specification dict from which to create the Pipeline. + conn : :class:`~verta._internal_utils._utils.Connection` + Connection object for fetching the model version associated with the step + conf: :class:`~verta._internal_utils._utils.Configuration` + Configuration object for fetching the model version associated with the step """ return cls( steps=PipelineStep._steps_from_pipeline_definition( From 94d48195fa1cdd6fcf92eaff51d4d18685232600 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 14:00:49 +0200 Subject: [PATCH 026/117] docs: update doc strings for increased clarity on their purpose --- client/verta/verta/pipeline/_pipeline_graph.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 2d117b8359..b2239f0dc8 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -83,12 +83,16 @@ def _from_definition( def _to_graph_definition(self) -> List[Dict[str, Any]]: """ - Convert this PipelineGraph to a graph dict formatted for a pipeline definition. + The back-end expects a list of steps and their predecessors as part of the + `graph` object within a PipelineDefinition. This method converts this PipelineGraph + to a formatted list of steps with predecessors for that purpose. """ return [step._to_graph_spec() for step in self.steps] def _to_steps_definition(self) -> List[Dict[str, Any]]: """ - Convert this PipelineGraph to a dict formatted for a pipeline definition. + The back-end expects a list of steps and their model versions as part of the + `steps` object within a PipelineDefinition. This method converts this PipelineGraph + to a formatted list of steps with model versions for that purpose. """ return [step._to_step_spec() for step in self.steps] From e935e0e2be4e2f2cbb0d90e54be8d6958698409b Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 14:02:53 +0200 Subject: [PATCH 027/117] Update client/verta/verta/pipeline/_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_step.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index e6078bfc6e..f4f4d4a39d 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -38,9 +38,9 @@ def __init__( List["PipelineStep"] ] = None, # Optional because it could be the first step with no predecessors ): - self._name = name - self._model_version = model_version - self._predecessors = predecessors or list() + self._name = self.set_name(name) + self._model_version = self.set_model_version(model_version) + self._predecessors = self.set_predecessors(predecessors) if predecessors else list() # avoid the need to pass in connection params when building as local object self._registered_model: Optional[RegisteredModel] = None From 725e1c6ccdf652c50cf809f21f14ed42cb4ed8fe Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 14:27:48 +0200 Subject: [PATCH 028/117] docs: correct param name in doc string for set_steps --- client/verta/verta/pipeline/_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index f4f4d4a39d..fdb27d37a0 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -103,7 +103,7 @@ def set_name(self, name: str) -> None: Parameters ---------- - new_name : str + name : str New name to use for the step. """ if not isinstance(name, str): From d652c8d87c1339303dbaf400b33aeae920ed4e79 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 14:39:06 +0200 Subject: [PATCH 029/117] Update client/verta/verta/pipeline/_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index fdb27d37a0..dac654f7c4 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -155,7 +155,7 @@ def _steps_from_pipeline_definition( cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration ) -> List["PipelineStep"]: """ - Return a list of PipelineStep objects by from a pipeline specification + Return a list of PipelineStep objects from a pipeline specification Parameters ---------- From 83ba0d6cf7e02d3f3f9cf4c5d1e92906065c3d2b Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 14:43:35 +0200 Subject: [PATCH 030/117] docs: Fix doc string formatting --- .../pipeline/test_pipeline_graph.py | 13 +++--- .../unit_tests/pipeline/test_pipeline_step.py | 7 +-- .../pipeline/test_registered_pipeline.py | 27 +++++------- .../verta/verta/pipeline/_pipeline_graph.py | 15 +++---- client/verta/verta/pipeline/_pipeline_step.py | 31 ++++++------- .../verta/pipeline/_registered_pipeline.py | 44 +++++++++---------- 6 files changed, 66 insertions(+), 71 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 3342ceb6ac..8d3c06491b 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -30,9 +30,10 @@ def test_set_steps(make_mock_pipeline_step) -> None: def test_from_definition( mocked_responses, pipeline_definition, mock_conn, mock_config ) -> None: - """ - Test that a PipelineGraph object can be constructed from a pipeline - specification. The model version is fetched for each step, so a response + """Test that a PipelineGraph object can be constructed from a pipeline + specification. + + The model version is fetched for each step, so a response is mocked for each. In depth testing of each step is handled in test_pipeline_step.test_steps_from_pipeline_spec. """ @@ -55,8 +56,7 @@ def test_from_definition( def test_to_graph_definition(make_mock_pipeline_step) -> None: - """ - Test that a pipeline graph specification can be constructed from a + """Test that a pipeline graph specification can be constructed from a PipelineGraph object """ step_1 = make_mock_pipeline_step() @@ -83,8 +83,7 @@ def test_to_graph_definition(make_mock_pipeline_step) -> None: def test_to_steps_definition(make_mock_pipeline_step) -> None: - """ - Test that a pipeline steps specification can be constructed from a + """Test that a pipeline steps specification can be constructed from a PipelineGraph object. """ step_1 = make_mock_pipeline_step() diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 15afc533f3..db054f986a 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -22,9 +22,10 @@ def test_steps_from_pipeline_definition( mock_config, mocked_responses, ) -> None: - """ - Test that a list of PipelineStep objects can be constructed and returned from - a pipeline definition. The registered model, model version, and environment + """Test that a list of PipelineStep objects can be constructed and returned from + a pipeline definition. + + The registered model, model version, and environment is fetched for each step, so a response is mocked for each call. """ graph = pipeline_definition["graph"] diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index e427b4e354..918621c119 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -""" -Unit tests for the RegisteredPipeline class -""" +"""Unit tests for the RegisteredPipeline class""" import pytest from hypothesis import given, HealthCheck, settings @@ -77,9 +75,10 @@ def test_log_pipeline_definition_artifact( def test_to_pipeline_definition( make_mock_pipeline_graph, make_mock_registered_model_version ) -> None: - """ - Test that a pipeline definition can be constructed from a - RegisteredPipeline object. In depth testing of the `_to_graph_definition` + """Test that a pipeline definition can be constructed from a + RegisteredPipeline object. + + In depth testing of the `_to_graph_definition` and `to_steps_definition` functions are handled in unit tests for PipelineGraph. """ @@ -101,8 +100,7 @@ def test_to_pipeline_configuration_valid( make_mock_registered_model_version, make_mock_step_resources, ) -> None: - """ - Test that a valid pipeline configuration can be constructed from a + """Test that a valid pipeline configuration can be constructed from a RegisteredPipeline object and a valid list of pipeline resources. """ graph = make_mock_pipeline_graph() @@ -129,8 +127,7 @@ def test_to_pipeline_configuration_invalid_resources( make_mock_registered_model_version, make_mock_step_resources, ) -> None: - """ - Test that a ValueError is raised when an invalid step name is included + """Test that a ValueError is raised when an invalid step name is included in the provided pipeline resources. (Does not match a step name in the pipeline's graph) """ @@ -150,8 +147,7 @@ def test_to_pipeline_configuration_invalid_resources( def test_to_pipeline_configuration_no_resources( make_mock_pipeline_graph, make_mock_registered_model_version ) -> None: - """ - Test that a pipeline configuration can be constructed from a + """Test that a pipeline configuration can be constructed from a RegisteredPipeline object without providing pipeline resources. """ graph = make_mock_pipeline_graph() @@ -175,9 +171,10 @@ def test_from_pipeline_definition( mock_config, mocked_responses, ) -> None: - """ - Test that a RegisteredPipeline object can be constructed from a pipeline - definition. The model version's `_get_artifact` function is mocked to + """Test that a RegisteredPipeline object can be constructed from a pipeline + definition. + + The model version's `_get_artifact` function is mocked to return a simple, consistent pipeline definition. Calls related to the fetching of the artifact are mocked. """ diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index b2239f0dc8..12a7c6cf4e 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -7,8 +7,7 @@ class PipelineGraph: - """ - A collection of PipelineSteps to be run as a single inference pipeline. + """A collection of PipelineSteps to be run as a single inference pipeline. Parameters ---------- @@ -42,8 +41,7 @@ def steps(self, value): raise AttributeError("cannot set attribute 'steps'; please use set_steps()") def set_steps(self, steps: List[str]) -> None: - """ - Set the list of steps for this PipelineGraph. + """Set the list of steps for this PipelineGraph. Parameters ---------- @@ -63,8 +61,7 @@ def set_steps(self, steps: List[str]) -> None: def _from_definition( cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration ) -> "PipelineGraph": - """ - Create a PipelineGraph instance from a specification dict. + """Create a PipelineGraph instance from a specification dict. Parameters ---------- @@ -82,7 +79,8 @@ def _from_definition( ) def _to_graph_definition(self) -> List[Dict[str, Any]]: - """ + """Create a pipeline graph specification from this PipelineGraph. + The back-end expects a list of steps and their predecessors as part of the `graph` object within a PipelineDefinition. This method converts this PipelineGraph to a formatted list of steps with predecessors for that purpose. @@ -90,7 +88,8 @@ def _to_graph_definition(self) -> List[Dict[str, Any]]: return [step._to_graph_spec() for step in self.steps] def _to_steps_definition(self) -> List[Dict[str, Any]]: - """ + """Create a pipeline steps specification from this PipelineGraph. + The back-end expects a list of steps and their model versions as part of the `steps` object within a PipelineDefinition. This method converts this PipelineGraph to a formatted list of steps with model versions for that purpose. diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index dac654f7c4..a775ad5d78 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -7,8 +7,8 @@ class PipelineStep: - """ - A single step within an inference pipeline, representing a single model version to be run. + """A single step within an inference pipeline, representing a single model + version to be run. Parameters ---------- @@ -40,7 +40,9 @@ def __init__( ): self._name = self.set_name(name) self._model_version = self.set_model_version(model_version) - self._predecessors = self.set_predecessors(predecessors) if predecessors else list() + self._predecessors = ( + self.set_predecessors(predecessors) if predecessors else list() + ) # avoid the need to pass in connection params when building as local object self._registered_model: Optional[RegisteredModel] = None @@ -74,8 +76,7 @@ def model_version(self, value) -> None: ) def set_model_version(self, new_model_version: RegisteredModelVersion) -> None: - """ - Change the registered model version associated with this step. + """Change the registered model version associated with this step. Parameters ---------- @@ -98,8 +99,7 @@ def name(self, value) -> None: raise AttributeError("can't set attribute 'name'; please use set_name()") def set_name(self, name: str) -> None: - """ - Change the name of this step. + """Change the name of this step. Parameters ---------- @@ -122,8 +122,7 @@ def predecessors(self, value) -> None: ) def set_predecessors(self, steps: List["PipelineStep"]) -> None: - """ - Set the predecessors associated with this step. + """Set the predecessors associated with this step. Parameters ---------- @@ -140,8 +139,8 @@ def set_predecessors(self, steps: List["PipelineStep"]) -> None: self._predecessors = steps def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: - """ - Fetch the registered model associated with this step's model version. + """Fetch the registered model associated with this step's model version. + This is to provide important context to the user via the _repr_ method when a registered pipeline is fetched from the backend. """ @@ -154,8 +153,12 @@ def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: def _steps_from_pipeline_definition( cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration ) -> List["PipelineStep"]: +<<<<<<< Updated upstream """ Return a list of PipelineStep objects from a pipeline specification +======= + """Return a list of PipelineStep objects by from a pipeline specification +>>>>>>> Stashed changes Parameters ---------- @@ -193,8 +196,7 @@ def _steps_from_pipeline_definition( return steps def _to_step_spec(self) -> Dict[str, Any]: - """ - Return a dictionary representation of this step, formatted for a + """Return a dictionary representation of this step, formatted for a pipeline definition. """ return { @@ -203,8 +205,7 @@ def _to_step_spec(self) -> Dict[str, Any]: } def _to_graph_spec(self) -> Dict[str, Any]: - """ - Return a dictionary representation of predecessors for this step, + """Return a dictionary representation of predecessors for this step, formatted for a pipeline definition. """ return { diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 5b178f4609..1db03de26d 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -11,8 +11,8 @@ class RegisteredPipeline: - """ - Object representing a version of a registered inference pipeline. + """Object representing a version of a registered inference pipeline. + There should not be a need to instantiate this class directly; please use :meth:`Client.create_registered_pipeline() ` for creating a new pipeline, or @@ -34,11 +34,11 @@ def __init__( registered_model_version: RegisteredModelVersion, pipeline_graph: PipelineGraph, ): - """ - Create a Pipeline instance from an existing RegisteredModelVersion object - and the provided pipeline graph. Name and ID are captured once upon creation - to avoid additional HTTP calls to refresh the cache of the RMV, because - pipelines are immutable. + """Create a Pipeline instance from an existing RegisteredModelVersion object + and the provided pipeline graph. + + Name and ID are captured once upon creation to avoid additional HTTP calls + to refresh the cache of the RMV, because pipelines are immutable. """ self._registered_model_version = registered_model_version self._name = self._registered_model_version.name @@ -68,11 +68,11 @@ def pipeline_graph(self): return self._pipeline_graph def copy_graph(self) -> PipelineGraph: - """ - Return a shallow copy of the PipelineGraph of this pipeline. RegisteredPipeline - objects are immutable once registered with Verta. This function returns - a PipelineGraph object that can be modified and used to create and register - a new RegisteredPipeline. + """Return a shallow copy of the PipelineGraph of this pipeline. + + RegisteredPipeline objects are immutable once registered with Verta. This + function returns a PipelineGraph object that can be modified and used to + create and register a new RegisteredPipeline. """ return copy.copy(self.pipeline_graph) @@ -85,14 +85,12 @@ def _log_pipeline_definition_artifact(self) -> None: ) def _get_pipeline_definition_artifact(self) -> Dict[str, Any]: - """ - Get the pipeline definition artifact from the registered model version. - """ + """Get the pipeline definition artifact from the registered model version.""" return self._registered_model_version.get_artifact("pipeline.json") def _to_pipeline_definition(self) -> Dict[str, Any]: - """ - Create a complete pipeline definition dict from a name and PipelineGraph. + """Create a complete pipeline definition dict from a name and PipelineGraph. + Used in conjunction with the client function for creating a registered pipeline from a pipeline graph. """ @@ -105,9 +103,9 @@ def _to_pipeline_definition(self) -> Dict[str, Any]: def _to_pipeline_configuration( self, pipeline_resources: Optional[Dict[str, Resources]] = None ) -> Dict[str, Any]: - """ - Build a pipeline configuration dict for this pipeline. The - `env` and `build` keys are not included in the configuration + """Build a pipeline configuration dict for this pipeline. + + The `env` and `build` keys are not included in the configuration resulting in default values being used by the backend. Parameters @@ -147,9 +145,9 @@ def _from_pipeline_definition( conn: Connection, conf: Configuration, ) -> "RegisteredPipeline": - """ - Create a Pipeline instance from a specification dict. Used when - fetching a registered pipeline from the Verta backend. + """Create a Pipeline instance from a specification dict. + + Used when fetching a registered pipeline from the Verta backend. Parameters ---------- From 937905df2205601f9a3309c18f54b24318beea5b Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 14:44:49 +0200 Subject: [PATCH 031/117] docs: add missing param in doc string --- client/verta/verta/pipeline/_pipeline_step.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index a775ad5d78..2a83632b79 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -153,12 +153,7 @@ def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: def _steps_from_pipeline_definition( cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration ) -> List["PipelineStep"]: -<<<<<<< Updated upstream - """ - Return a list of PipelineStep objects from a pipeline specification -======= """Return a list of PipelineStep objects by from a pipeline specification ->>>>>>> Stashed changes Parameters ---------- @@ -166,6 +161,8 @@ def _steps_from_pipeline_definition( Specification dictionary for the whole pipeline conn : :class:`~verta._internal_utils._utils.Connection` Connection object for fetching the model version associated with the step + conf: :class:`~verta._internal_utils._utils.Configuration` + Configuration object for fetching the model version associated with the step Returns ------- From e932080e4d47694d650efce1345e326a4b010fa8 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 14:50:02 +0200 Subject: [PATCH 032/117] docs: remove indentation of attributes section in class docs --- client/verta/verta/pipeline/_pipeline_graph.py | 4 ++-- client/verta/verta/pipeline/_pipeline_step.py | 12 ++++++------ client/verta/verta/pipeline/_registered_pipeline.py | 12 ++++++------ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 12a7c6cf4e..4b74209978 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -18,8 +18,8 @@ class PipelineGraph: Attributes ---------- - steps: list of :class:`~verta.deployment.PipelineStep` - List of PipelineSteps comprising all possible steps in the PiplineGraph. + steps: list of :class:`~verta.deployment.PipelineStep` + List of PipelineSteps comprising all possible steps in the PiplineGraph. """ def __init__(self, steps: List[PipelineStep]): diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 2a83632b79..4dc29149ab 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -22,12 +22,12 @@ class PipelineStep: Attributes ---------- - name : str - Name of the step within the scope of the pipeline. - model_version : :class:`~verta.registry.entities.RegisteredModelVersion` - Model version being run by this step. - predecessors : list - List of PipelineSteps whose outputs will be treated as inputs to this step. + name : str + Name of the step within the scope of the pipeline. + model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + Model version being run by this step. + predecessors : list + List of PipelineSteps whose outputs will be treated as inputs to this step. """ def __init__( diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 1db03de26d..ae20424466 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -21,12 +21,12 @@ class RegisteredPipeline: Attributes ---------- - name: str - Name of this pipeline. - id: int - Auto-assigned ID of this Pipeline. - pipeline_graph: :class:`~verta.pipeline.PipelineGraph` - PipelineGraph object containing all possible steps in the Pipline. + name: str + Name of this pipeline. + id: int + Auto-assigned ID of this Pipeline. + pipeline_graph: :class:`~verta.pipeline.PipelineGraph` + PipelineGraph object containing all possible steps in the Pipline. """ def __init__( From c7a2210e8138ecd63b46769b6e032c6aa09fb8cf Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 15:19:12 +0200 Subject: [PATCH 033/117] refactor: drop redundant 'pipeline' from pipeline_graph param --- client/verta/tests/unit_tests/conftest.py | 14 ++++++------ .../pipeline/test_registered_pipeline.py | 12 +++++----- .../verta/pipeline/_registered_pipeline.py | 22 +++++++++---------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 609b81c9da..1176fda9ba 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -124,16 +124,16 @@ def _make_mock_registered_model_version(): test session. """ - ids = set() + # ids = set() model_ver_id = random.randint(1, 1000000) - while model_ver_id in ids: - model_ver_id = random.randint(1, 1000000) - ids.add(model_ver_id) + # while model_ver_id in ids: + # model_ver_id = random.randint(1, 1000000) + # ids.add(model_ver_id) reg_model_id = random.randint(1, 1000000) - while reg_model_id in ids: - reg_model_id = random.randint(1, 1000000) - ids.add(reg_model_id) + # while reg_model_id in ids: + # reg_model_id = random.randint(1, 1000000) + # ids.add(reg_model_id) return MockRegisteredModelVersion( mock_conn, diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 918621c119..bb99640a5e 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -14,7 +14,7 @@ def test_copy_graph( """Test that the graph of a RegisteredPipeline can be copied""" graph = make_mock_pipeline_graph() pipeline = RegisteredPipeline( - pipeline_graph=graph, + graph=graph, registered_model_version=make_mock_registered_model_version(), ) copied_graph = pipeline.copy_graph() @@ -39,7 +39,7 @@ def test_log_pipeline_definition_artifact( """ rmv = make_mock_registered_model_version() pipeline = RegisteredPipeline( - pipeline_graph=make_mock_pipeline_graph(), + graph=make_mock_pipeline_graph(), registered_model_version=rmv, ) # Fetch the model @@ -84,7 +84,7 @@ def test_to_pipeline_definition( """ graph = make_mock_pipeline_graph() pipeline = RegisteredPipeline( - pipeline_graph=graph, + graph=graph, registered_model_version=make_mock_registered_model_version(), ) pipeline_definition = pipeline._to_pipeline_definition() @@ -107,7 +107,7 @@ def test_to_pipeline_configuration_valid( step_names = [step.name for step in graph.steps] mock_res = make_mock_step_resources(step_names) pipeline = RegisteredPipeline( - pipeline_graph=graph, + graph=graph, registered_model_version=make_mock_registered_model_version(), ) @@ -136,7 +136,7 @@ def test_to_pipeline_configuration_invalid_resources( mock_res = make_mock_step_resources(step_names) mock_res["invalid_step_name"] = make_mock_step_resources(["invalid_step_name"]) pipeline = RegisteredPipeline( - pipeline_graph=graph, + graph=graph, registered_model_version=make_mock_registered_model_version(), ) @@ -152,7 +152,7 @@ def test_to_pipeline_configuration_no_resources( """ graph = make_mock_pipeline_graph() pipeline = RegisteredPipeline( - pipeline_graph=graph, + graph=graph, registered_model_version=make_mock_registered_model_version(), ) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index ae20424466..833e29b65f 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -25,14 +25,14 @@ class RegisteredPipeline: Name of this pipeline. id: int Auto-assigned ID of this Pipeline. - pipeline_graph: :class:`~verta.pipeline.PipelineGraph` + graph: :class:`~verta.pipeline.PipelineGraph` PipelineGraph object containing all possible steps in the Pipline. """ def __init__( self, registered_model_version: RegisteredModelVersion, - pipeline_graph: PipelineGraph, + graph: PipelineGraph, ): """Create a Pipeline instance from an existing RegisteredModelVersion object and the provided pipeline graph. @@ -43,7 +43,7 @@ def __init__( self._registered_model_version = registered_model_version self._name = self._registered_model_version.name self._id = self._registered_model_version.id - self._pipeline_graph = pipeline_graph + self._graph = graph def __repr__(self): return "\n".join( @@ -51,7 +51,7 @@ def __repr__(self): "RegisteredPipeline:", f"pipeline name: {self.name}", f"pipeline id: {self.id}", - f"\n{self._pipeline_graph}", + f"\n{self._graph}", ) ) @@ -64,8 +64,8 @@ def id(self): return self._id @property - def pipeline_graph(self): - return self._pipeline_graph + def graph(self): + return self._graph def copy_graph(self) -> PipelineGraph: """Return a shallow copy of the PipelineGraph of this pipeline. @@ -74,7 +74,7 @@ def copy_graph(self) -> PipelineGraph: function returns a PipelineGraph object that can be modified and used to create and register a new RegisteredPipeline. """ - return copy.copy(self.pipeline_graph) + return copy.copy(self._graph) def _log_pipeline_definition_artifact(self) -> None: """ @@ -96,8 +96,8 @@ def _to_pipeline_definition(self) -> Dict[str, Any]: """ return { "pipeline_version_id": self.id, - "graph": self.pipeline_graph._to_graph_definition(), - "predecessors": self.pipeline_graph._to_steps_definition(), + "graph": self._graph._to_graph_definition(), + "predecessors": self._graph._to_steps_definition(), } def _to_pipeline_configuration( @@ -117,7 +117,7 @@ def _to_pipeline_configuration( Dictionary representation of a pipeline configuration. """ steps = list() - for step in self.pipeline_graph.steps: + for step in self._graph.steps: step_config = { "name": step.name, } @@ -166,7 +166,7 @@ def _from_pipeline_definition( pipeline_definition = json.loads(pipeline_definition_str) return cls( registered_model_version=registered_model_version, - pipeline_graph=PipelineGraph._from_definition( + graph=PipelineGraph._from_definition( pipeline_definition=pipeline_definition, conn=conn, conf=conf ), ) From f52fd6795455c12be2f1a0d4744bc54b42ee077d Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 16:37:45 +0200 Subject: [PATCH 034/117] refactor: make attribute setter methods return their objects now that we're using their validation logic in the __init__ function --- client/verta/verta/pipeline/_pipeline_step.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 4dc29149ab..decc553ca1 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -88,6 +88,7 @@ def set_model_version(self, new_model_version: RegisteredModelVersion) -> None: f"model_version must be a RegisteredModelVersion object, not {type(new_model_version)}" ) self._model_version = new_model_version + return self.model_version @property def name(self) -> str: @@ -109,6 +110,7 @@ def set_name(self, name: str) -> None: if not isinstance(name, str): raise TypeError(f"name must be a string, not {type(name)}") self._name = name + return self.name @property def predecessors(self) -> List["PipelineStep"]: @@ -137,6 +139,7 @@ def set_predecessors(self, steps: List["PipelineStep"]) -> None: f"individual predecessors must be type PipelineStep, not {type(step)}" ) self._predecessors = steps + return self.predecessors def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: """Fetch the registered model associated with this step's model version. From e44573569e00411f18dd9460bce9e2de59fdf777 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 16:39:14 +0200 Subject: [PATCH 035/117] refactor: use deepcopy when copying graphs from RPs --- .../pipeline/test_registered_pipeline.py | 14 +++++++++++--- .../verta/verta/pipeline/_registered_pipeline.py | 4 ++-- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index bb99640a5e..bcf80ae6a5 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -11,15 +11,23 @@ def test_copy_graph( make_mock_pipeline_graph, make_mock_registered_model_version ) -> None: - """Test that the graph of a RegisteredPipeline can be copied""" + """Test that the graph of a RegisteredPipeline can be copied. + + Each step in the copied graph should be a new object, but have the same + name, predecessors, and model version as the original. + """ graph = make_mock_pipeline_graph() pipeline = RegisteredPipeline( graph=graph, registered_model_version=make_mock_registered_model_version(), ) copied_graph = pipeline.copy_graph() - assert copied_graph.steps == graph.steps # same steps - assert copied_graph is not graph # different objects + for orig_step, copied_step in zip(graph.steps, copied_graph.steps): + assert orig_step is not copied_step + assert orig_step.name == copied_step.name + assert orig_step.predecessors == copied_step.predecessors + assert orig_step.model_version.id == copied_step.model_version.id + assert copied_graph is not graph @given(pipeline_definition=pipeline_definition()) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 833e29b65f..1f46ca2ddb 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -68,13 +68,13 @@ def graph(self): return self._graph def copy_graph(self) -> PipelineGraph: - """Return a shallow copy of the PipelineGraph of this pipeline. + """Return a deep copy of the PipelineGraph of this pipeline. RegisteredPipeline objects are immutable once registered with Verta. This function returns a PipelineGraph object that can be modified and used to create and register a new RegisteredPipeline. """ - return copy.copy(self._graph) + return copy.deepcopy(self._graph) def _log_pipeline_definition_artifact(self) -> None: """ From 82695fbb858262c07421f782e32441f33214f201 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 16:42:48 +0200 Subject: [PATCH 036/117] docs: fix doc string type for pipeline_resources --- client/verta/verta/pipeline/_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 1f46ca2ddb..7c1034273d 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -110,7 +110,7 @@ def _to_pipeline_configuration( Parameters ---------- - pipeline_resources : Resources + pipeline_resources : dict of str to :class:`~verta.endpoint.resources.Resources`, optional Returns ------- From 029e087a37c1581e814d57a6ca68a0b12f3da6cf Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 22 Aug 2023 16:44:08 +0200 Subject: [PATCH 037/117] Update client/verta/verta/pipeline/_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_registered_pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 7c1034273d..6990580d5c 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -114,7 +114,8 @@ def _to_pipeline_configuration( Returns ------- - Dictionary representation of a pipeline configuration. + dict + Representation of a pipeline configuration. """ steps = list() for step in self._graph.steps: From 99ae54f1197b50774cd3a116d7b6f5b99db00167 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 17:17:31 +0200 Subject: [PATCH 038/117] refactor: remove usage of .pop from RP _to_pipeline_configuration function --- .../verta/pipeline/_registered_pipeline.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 6990580d5c..4f4b411d59 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -111,12 +111,20 @@ def _to_pipeline_configuration( Parameters ---------- pipeline_resources : dict of str to :class:`~verta.endpoint.resources.Resources`, optional + Resources to be allocated to each step of the pipeline. Keys are step names. Returns ------- dict Representation of a pipeline configuration. """ + if pipeline_resources: + for step_name in pipeline_resources.keys(): + if step_name not in [step.name for step in self._graph.steps]: + raise ValueError( + f"pipeline_resources contains resources for a step not in " + f"the pipeline: {step_name}" + ) steps = list() for step in self._graph.steps: step_config = { @@ -125,15 +133,8 @@ def _to_pipeline_configuration( if pipeline_resources: step_res = pipeline_resources.get(step.name, None) if step_res: - step_config["resources"] = pipeline_resources.pop( - step.name - )._as_dict() + step_config["resources"] = step_res._as_dict() steps.append(step_config) - if pipeline_resources: - raise ValueError( - f"pipeline_resources contains resources for steps not in " - f"the pipeline {pipeline_resources.keys()}" - ) return { "pipeline_version_id": self.id, "steps": steps, From d2924f2eebe0deddb5f466d460b07a757a27c504 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 21:05:27 +0200 Subject: [PATCH 039/117] refactor: drop resources fixture in favor of hypothesis strategy and add optional name param to moock_pipeline_step fixture --- client/verta/tests/unit_tests/conftest.py | 38 ++++--------------- .../pipeline/test_pipeline_graph.py | 8 ++-- .../pipeline/test_registered_pipeline.py | 21 +++++----- client/verta/tests/unit_tests/strategies.py | 14 +++++++ 4 files changed, 35 insertions(+), 46 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 1176fda9ba..6ecb565784 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -5,7 +5,7 @@ import json import os import random -from typing import Any, Callable, Dict, List +from typing import Any, Callable, Dict, Optional from unittest.mock import patch import pytest @@ -150,19 +150,21 @@ def _make_mock_registered_model_version(): @pytest.fixture(scope="session") def make_mock_pipeline_step(make_mock_registered_model_version) -> Callable: - """ - Return a callable function for creating mocked objects of the PipelineStep + """Return a callable function for creating mocked objects of the PipelineStep class for use in tests that require multiple unique instances. + + The optional `name` parameter is for use in tests where more than one unique + step is required for a singe test. """ class MockPipelineStep(PipelineStep): def __repr__(self): # avoid network calls when displaying test results return object.__repr__(self) - def _make_mock_pipeline_step(): + def _make_mock_pipeline_step(name: Optional[str] = None): return MockPipelineStep( model_version=make_mock_registered_model_version(), - name="test_pipeline_step_name", + name=name if name else "test_pipeline_step_name", predecessors=[], ) @@ -190,29 +192,3 @@ def _make_mock_pipeline_graph(): return MockPipelineGraph(steps=[step1, step2, step3]) return _make_mock_pipeline_graph - - -@pytest.fixture(scope="session") -def make_mock_step_resources() -> Callable: - """ - Return a callable function for generating a list of mocked resources for - a given list of step names. - """ - - def _make_mock_step_resources(step_names: List[str]) -> Dict[str, Resources]: - res = dict() - for name in step_names: - res.update( - { - name: Resources( - cpu=random.randint(1, 10), - memory="5Gi", - nvidia_gpu=NvidiaGPU( - model=NvidiaGPUModel.T4, number=random.randint(1, 10) - ), - ), - } - ) - return res - - return _make_mock_step_resources diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 8d3c06491b..fa07ae4653 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -59,14 +59,14 @@ def test_to_graph_definition(make_mock_pipeline_step) -> None: """Test that a pipeline graph specification can be constructed from a PipelineGraph object """ - step_1 = make_mock_pipeline_step() - step_2 = make_mock_pipeline_step() - step_3 = make_mock_pipeline_step() + step_1 = make_mock_pipeline_step("step_1") + step_2 = make_mock_pipeline_step("step_2") + step_3 = make_mock_pipeline_step("step_3") step_2.set_predecessors([step_1]) step_3.set_predecessors([step_2]) graph = PipelineGraph(steps=[step_1, step_2, step_3]) graph_spec = graph._to_graph_definition() - assert graph_spec == [ + assert sorted(graph_spec, key=lambda x: x["name"]) == [ { "name": step_1.name, "predecessors": [], diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index bcf80ae6a5..b22a7592b5 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -4,7 +4,7 @@ import pytest from hypothesis import given, HealthCheck, settings -from tests.unit_tests.strategies import pipeline_definition +from tests.unit_tests.strategies import pipeline_definition, resources from verta.pipeline import RegisteredPipeline @@ -102,25 +102,24 @@ def test_to_pipeline_definition( "predecessors": graph._to_steps_definition(), } - +@given(resources=resources()) def test_to_pipeline_configuration_valid( make_mock_pipeline_graph, make_mock_registered_model_version, - make_mock_step_resources, + resources, ) -> None: """Test that a valid pipeline configuration can be constructed from a RegisteredPipeline object and a valid list of pipeline resources. """ graph = make_mock_pipeline_graph() - step_names = [step.name for step in graph.steps] - mock_res = make_mock_step_resources(step_names) + step_resources = {step.name: resources for step in graph.steps} pipeline = RegisteredPipeline( graph=graph, registered_model_version=make_mock_registered_model_version(), ) pipeline_configuration = pipeline._to_pipeline_configuration( - pipeline_resources=mock_res + pipeline_resources=step_resources ) assert pipeline_configuration["pipeline_version_id"] == pipeline.id for graph_step, config_step in zip(graph.steps, pipeline_configuration["steps"]): @@ -130,26 +129,26 @@ def test_to_pipeline_configuration_valid( assert "resources" in config_step.keys() +@given(resources=resources()) def test_to_pipeline_configuration_invalid_resources( make_mock_pipeline_graph, make_mock_registered_model_version, - make_mock_step_resources, + resources, ) -> None: """Test that a ValueError is raised when an invalid step name is included in the provided pipeline resources. (Does not match a step name in the pipeline's graph) """ graph = make_mock_pipeline_graph() - step_names = [step.name for step in graph.steps] - mock_res = make_mock_step_resources(step_names) - mock_res["invalid_step_name"] = make_mock_step_resources(["invalid_step_name"]) + step_resources = {step.name: resources for step in graph.steps} + step_resources["invalid_step_name"] = resources pipeline = RegisteredPipeline( graph=graph, registered_model_version=make_mock_registered_model_version(), ) with pytest.raises(ValueError): - pipeline._to_pipeline_configuration(pipeline_resources=mock_res) + pipeline._to_pipeline_configuration(pipeline_resources=step_resources) def test_to_pipeline_configuration_no_resources( diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index 809a3abcd5..1baa123fe2 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -11,6 +11,7 @@ from verta._protos.public.common import CommonService_pb2 from verta._protos.public.modeldb.versioning import Code_pb2, Dataset_pb2 from verta.endpoint import build, KafkaSettings +from verta.endpoint.resources import NvidiaGPU, NvidiaGPUModel, Resources @st.composite @@ -311,3 +312,16 @@ def pipeline_definition(draw): }, ], } + + +@st.composite +def resources(draw): + """Return a strategy emulating the Resources class.""" + return Resources( + cpu=draw(st.integers(min_value=1)), + memory=draw(st.from_regex(r"^[0-9]+[e]?[0-9]*[E|P|T|G|M|K]?[i]?$", fullmatch=True)), + nvidia_gpu=NvidiaGPU( + model=draw(st.sampled_from([NvidiaGPUModel.T4, NvidiaGPUModel.V100])), + number=draw(st.integers(min_value=1)), + ), + ) From af9bf744d7ff3e2286cfa814aa6329bf87641945 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 22 Aug 2023 22:45:11 +0200 Subject: [PATCH 040/117] refactor: use sets instead of lists for predecessors and graph steps --- client/verta/tests/unit_tests/conftest.py | 4 +-- .../pipeline/test_pipeline_graph.py | 23 ++++++++----- .../unit_tests/pipeline/test_pipeline_step.py | 33 +++++++++++-------- .../pipeline/test_registered_pipeline.py | 5 ++- .../verta/verta/pipeline/_pipeline_graph.py | 29 ++++++++-------- client/verta/verta/pipeline/_pipeline_step.py | 32 +++++++++--------- 6 files changed, 71 insertions(+), 55 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 6ecb565784..09a2f0d4e7 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -165,7 +165,7 @@ def _make_mock_pipeline_step(name: Optional[str] = None): return MockPipelineStep( model_version=make_mock_registered_model_version(), name=name if name else "test_pipeline_step_name", - predecessors=[], + predecessors=set(), ) return _make_mock_pipeline_step @@ -189,6 +189,6 @@ def _make_mock_pipeline_graph(): step2.set_name("step2") step3 = make_mock_pipeline_step() step3.set_name("step3") - return MockPipelineGraph(steps=[step1, step2, step3]) + return MockPipelineGraph(steps={step1, step2, step3}) return _make_mock_pipeline_graph diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index fa07ae4653..074f048542 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -15,10 +15,10 @@ def test_set_steps(make_mock_pipeline_step) -> None: """ step_1 = make_mock_pipeline_step() step_2 = make_mock_pipeline_step() - graph = PipelineGraph(steps=[]) - graph.set_steps([step_1, step_2]) - assert set(graph.steps) == set([step_1, step_2]) - graph.set_steps([]) + graph = PipelineGraph(steps=set()) + graph.set_steps({step_1, step_2}) + assert set(graph.steps) == {step_1, step_2} + graph.set_steps(set()) assert not graph.steps @@ -62,9 +62,9 @@ def test_to_graph_definition(make_mock_pipeline_step) -> None: step_1 = make_mock_pipeline_step("step_1") step_2 = make_mock_pipeline_step("step_2") step_3 = make_mock_pipeline_step("step_3") - step_2.set_predecessors([step_1]) - step_3.set_predecessors([step_2]) - graph = PipelineGraph(steps=[step_1, step_2, step_3]) + step_2.set_predecessors({step_1}) + step_3.set_predecessors({step_2}) + graph = PipelineGraph(steps={step_1, step_2, step_3}) graph_spec = graph._to_graph_definition() assert sorted(graph_spec, key=lambda x: x["name"]) == [ { @@ -85,12 +85,14 @@ def test_to_graph_definition(make_mock_pipeline_step) -> None: def test_to_steps_definition(make_mock_pipeline_step) -> None: """Test that a pipeline steps specification can be constructed from a PipelineGraph object. + + Definitions are type list to remain json serializable. """ step_1 = make_mock_pipeline_step() step_2 = make_mock_pipeline_step() - graph = PipelineGraph(steps=[step_1, step_2]) + graph = PipelineGraph(steps={step_1, step_2}) step_specs = graph._to_steps_definition() - assert step_specs == [ + expected_definition = [ { "name": step_1.name, "model_version_id": step_1.model_version.id, @@ -100,3 +102,6 @@ def test_to_steps_definition(make_mock_pipeline_step) -> None: "model_version_id": step_2.model_version.id, }, ] + assert sorted(step_specs, key=lambda x: x["name"]) == sorted( + expected_definition, key=lambda x: x["name"] + ) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index db054f986a..24810508e9 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -22,11 +22,11 @@ def test_steps_from_pipeline_definition( mock_config, mocked_responses, ) -> None: - """Test that a list of PipelineStep objects can be constructed and returned from - a pipeline definition. + """Test that a list of PipelineStep objects can be constructed and + returned from a pipeline definition. - The registered model, model version, and environment - is fetched for each step, so a response is mocked for each call. + The registered model, model version, and environment is fetched for + each step, so a response is mocked for each call. """ graph = pipeline_definition["graph"] for step in pipeline_definition["steps"]: @@ -47,7 +47,11 @@ def test_steps_from_pipeline_definition( ) # we have the same number of steps as in the pipeline definition assert len(generated_steps) == len(pipeline_definition["steps"]) - for spec_step, gen_step in zip(pipeline_definition["steps"], generated_steps): + # convert from set to list and sort for comparison + generated_steps_sorted = sorted(list(generated_steps), key=lambda x: x.name) + # sort for comparison + spec_steps_sorted = sorted(pipeline_definition["steps"], key=lambda x: x["name"]) + for spec_step, gen_step in zip(spec_steps_sorted, generated_steps_sorted): # each step is converted to a PipelineStep object assert isinstance(gen_step, PipelineStep) # the names are the same for the steps and their definitions @@ -79,7 +83,7 @@ def test_to_graph_spec( make_mock_registered_model_version, make_mock_pipeline_step ) -> None: """Test that a PipelineStep object can be converted to a step specification""" - predecessors = [make_mock_pipeline_step() for _ in range(random.randint(1, 5))] + predecessors = {make_mock_pipeline_step() for _ in range(random.randint(1, 5))} step = PipelineStep( model_version=make_mock_registered_model_version(), name="test_name", @@ -100,9 +104,11 @@ def test_set_predecessors_add( step = PipelineStep( model_version=make_mock_registered_model_version(), name="test_name", - predecessors=[predecessor_1], + predecessors={predecessor_1}, ) - step.set_predecessors(step.predecessors + [predecessor_2]) + new_steps = step.predecessors.copy() + new_steps.add(predecessor_2) + step.set_predecessors(new_steps) assert set(step.predecessors) == {predecessor_1, predecessor_2} @@ -110,15 +116,16 @@ def test_set_predecessors_remove( make_mock_registered_model_version, make_mock_pipeline_step ) -> None: """Test that predecessors can be removed from a PipelineStep object""" - predecessors = [make_mock_pipeline_step() for _ in range(random.randint(2, 10))] - steps_to_remain = predecessors[: len(predecessors) // 2] + predecessors = {make_mock_pipeline_step() for _ in range(random.randint(2, 10))} + predecessors_as_list = list(predecessors) # convert to list for slicing + steps_to_remain = predecessors_as_list[: len(predecessors_as_list) // 2] step = PipelineStep( model_version=make_mock_registered_model_version(), name="test_name", predecessors=predecessors, ) - step.set_predecessors(steps_to_remain) - assert set(step.predecessors) == set(steps_to_remain) + step.set_predecessors(set(steps_to_remain)) + assert step.predecessors == set(steps_to_remain) def test_change_model_version(make_mock_registered_model_version) -> None: @@ -128,7 +135,7 @@ def test_change_model_version(make_mock_registered_model_version) -> None: step = PipelineStep( model_version=model_ver_1, name="test_name", - predecessors=[], + predecessors=set(), ) assert step.model_version == model_ver_1 step.set_model_version(model_ver_2) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index b22a7592b5..abfc1db298 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -22,7 +22,10 @@ def test_copy_graph( registered_model_version=make_mock_registered_model_version(), ) copied_graph = pipeline.copy_graph() - for orig_step, copied_step in zip(graph.steps, copied_graph.steps): + # convert from sets to lists and sort for comparison + graph_steps_sorted = sorted(list(graph.steps), key=lambda x: x.name) + copied_graph_steps_sorted = sorted(list(copied_graph.steps), key=lambda x: x.name) + for orig_step, copied_step in zip(graph_steps_sorted, copied_graph_steps_sorted): assert orig_step is not copied_step assert orig_step.name == copied_step.name assert orig_step.predecessors == copied_step.predecessors diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 4b74209978..c0c068771f 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import Any, Dict, List +from typing import Any, Dict, List, Set from verta._internal_utils._utils import Configuration, Connection from ._pipeline_step import PipelineStep @@ -11,18 +11,18 @@ class PipelineGraph: Parameters ---------- - steps : list of :class:`~verta.pipeline.PipelineStep` + steps : set of :class:`~verta.pipeline.PipelineStep` List of all possible steps of the pipeline. Ordering of steps in the pipeline itself is determined by the predecessors provided to each step, thus ordering of this list is irrelevant. Attributes ---------- - steps: list of :class:`~verta.deployment.PipelineStep` - List of PipelineSteps comprising all possible steps in the PiplineGraph. + steps: set of :class:`~verta.deployment.PipelineStep` + Set of PipelineSteps comprising all possible steps in the PiplineGraph. """ - def __init__(self, steps: List[PipelineStep]): + def __init__(self, steps: Set[PipelineStep]): self._steps = self.set_steps(steps) def __repr__(self): @@ -40,20 +40,19 @@ def steps(self): def steps(self, value): raise AttributeError("cannot set attribute 'steps'; please use set_steps()") - def set_steps(self, steps: List[str]) -> None: - """Set the list of steps for this PipelineGraph. + def set_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: + """Update the set of steps for this PipelineGraph to the provided value. Parameters ---------- - steps : list of :class:`~verta.deployment.PipelineStep` - List of all possible steps of the pipline graph. Order does not matter. + steps : set of :class:`~verta.deployment.PipelineStep` + Set of all possible steps of the pipline graph. Order does not matter. """ - if not isinstance(steps, list): - raise TypeError(f"steps must be type list, not {type(steps)}") + if not isinstance(steps, set): + raise TypeError(f"steps must be type set, not {type(steps)}") for step in steps: if not isinstance(step, PipelineStep): raise TypeError(f"individual steps must be type PipelineStep, not {type(step)}") - steps = list(set(steps)) self._steps = steps return self.steps @@ -83,7 +82,8 @@ def _to_graph_definition(self) -> List[Dict[str, Any]]: The back-end expects a list of steps and their predecessors as part of the `graph` object within a PipelineDefinition. This method converts this PipelineGraph - to a formatted list of steps with predecessors for that purpose. + to a formatted list of steps with predecessors for that purpose. A list is used + to remain json serializable. """ return [step._to_graph_spec() for step in self.steps] @@ -92,6 +92,7 @@ def _to_steps_definition(self) -> List[Dict[str, Any]]: The back-end expects a list of steps and their model versions as part of the `steps` object within a PipelineDefinition. This method converts this PipelineGraph - to a formatted list of steps with model versions for that purpose. + to a formatted list of steps with model versions for that purpose. A list is used + to remain json serializable. """ return [step._to_step_spec() for step in self.steps] diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index decc553ca1..2394890f48 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Set from verta._internal_utils._utils import Configuration, Connection from verta.registry.entities import RegisteredModel, RegisteredModelVersion @@ -16,8 +16,8 @@ class PipelineStep: Name of the step, for use within the scope of the pipeline only. model_version : :class:`~verta.registry.entities.RegisteredModelVersion` Registered model version to run for the step. - predecessors : list, optional - List of PipelineSteps whose outputs will be treated as inputs to this step. + predecessors : set, optional + Set of PipelineSteps whose outputs will be treated as inputs to this step. If not included, the step is assumed to be an initial step. Values must be unique. Attributes @@ -35,13 +35,13 @@ def __init__( name: str, model_version: RegisteredModelVersion, predecessors: Optional[ - List["PipelineStep"] + Set["PipelineStep"] ] = None, # Optional because it could be the first step with no predecessors ): self._name = self.set_name(name) self._model_version = self.set_model_version(model_version) self._predecessors = ( - self.set_predecessors(predecessors) if predecessors else list() + self.set_predecessors(predecessors) if predecessors else set() ) # avoid the need to pass in connection params when building as local object @@ -99,7 +99,7 @@ def name(self, value) -> None: """Raise a more informative error than the default.""" raise AttributeError("can't set attribute 'name'; please use set_name()") - def set_name(self, name: str) -> None: + def set_name(self, name: str) -> str: """Change the name of this step. Parameters @@ -113,8 +113,8 @@ def set_name(self, name: str) -> None: return self.name @property - def predecessors(self) -> List["PipelineStep"]: - return list(set(self._predecessors)) # deduplicated + def predecessors(self) -> Set["PipelineStep"]: + return self._predecessors @predecessors.setter def predecessors(self, value) -> None: @@ -123,7 +123,7 @@ def predecessors(self, value) -> None: "can't set attribute 'predecessors'; please use set_predecessors()" ) - def set_predecessors(self, steps: List["PipelineStep"]) -> None: + def set_predecessors(self, steps: Set["PipelineStep"]) -> set: """Set the predecessors associated with this step. Parameters @@ -131,8 +131,8 @@ def set_predecessors(self, steps: List["PipelineStep"]) -> None: steps : list List of PipelineStep objects whose outputs will be treated as inputs to this step. """ - if not isinstance(steps, list): - raise TypeError(f"predecessors must be type list, not {type(steps)}") + if not isinstance(steps, set): + raise TypeError(f"predecessors must be type set, not {type(steps)}") for step in steps: if not isinstance(step, PipelineStep): raise TypeError( @@ -155,7 +155,7 @@ def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: @classmethod def _steps_from_pipeline_definition( cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration - ) -> List["PipelineStep"]: + ) -> Set["PipelineStep"]: """Return a list of PipelineStep objects by from a pipeline specification Parameters @@ -172,15 +172,15 @@ def _steps_from_pipeline_definition( list of :class:`~verta._pipelines.PipelineStep` List of steps in the pipeline spec as PipelineStep objects """ - steps: List["PipelineStep"] = list() + steps: Set["PipelineStep"] = set() for step in pipeline_definition["steps"]: - steps.append( + steps.add( cls( name=step["name"], model_version=RegisteredModelVersion._get_by_id( id=step["model_version_id"], conn=conn, conf=conf ), - predecessors=[], + predecessors=set(), ) ) for step_object in steps: @@ -191,7 +191,7 @@ def _steps_from_pipeline_definition( if s["name"] == step_object.name ][0] step_object.set_predecessors( - [s for s in steps if s.name in predecessor_names] + {s for s in steps if s.name in predecessor_names} ) return steps From 593c59b077d687ed22831284f759db80bf402fa2 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Wed, 23 Aug 2023 01:16:06 +0200 Subject: [PATCH 041/117] tests: fix pipeline_definition fixture to use integer ids for model version, and beef up mocked http responses with more realistic json responses for completeness --- .../pipeline/test_pipeline_graph.py | 52 +++++++++++++--- .../unit_tests/pipeline/test_pipeline_step.py | 61 ++++++++++++++----- client/verta/tests/unit_tests/strategies.py | 3 +- 3 files changed, 92 insertions(+), 24 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 074f048542..0c185c77e5 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -4,6 +4,7 @@ """ from hypothesis import given, HealthCheck, settings +from hypothesis import strategies as st from tests.unit_tests.strategies import pipeline_definition from verta.pipeline import PipelineGraph @@ -22,13 +23,25 @@ def test_set_steps(make_mock_pipeline_step) -> None: assert not graph.steps -@given(pipeline_definition=pipeline_definition()) +@given( + pipeline_definition=pipeline_definition(), + registered_model_id=st.integers(min_value=1, max_value=1000000000), + # max value limit avoids protobuf "Value out of range" error + model_version_name=st.text(min_size=1), + model_name=st.text(min_size=1), +) @settings( suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None, ) def test_from_definition( - mocked_responses, pipeline_definition, mock_conn, mock_config + mocked_responses, + pipeline_definition, + mock_conn, + mock_config, + registered_model_id, + model_version_name, + model_name, ) -> None: """Test that a PipelineGraph object can be constructed from a pipeline specification. @@ -40,20 +53,45 @@ def test_from_definition( for step in pipeline_definition["steps"]: mocked_responses.get( f"https://test_socket/api/v1/registry/model_versions/{step['model_version_id']}", - json={"name": "test"}, + json={ + "model_version": { + "id": step['model_version_id'], + "registered_model_id": registered_model_id, + "version": model_version_name, + } + }, status=200, ) mocked_responses.get( - f"https://test_socket/api/v1/registry/registered_models/0", - json={}, + f"https://test_socket/api/v1/registry/registered_models/{registered_model_id}", + json={ + "registered_model": { + "id": registered_model_id, + "name": model_name, + } + }, status=200, ) graph = PipelineGraph._from_definition( pipeline_definition=pipeline_definition, conn=mock_conn, conf=mock_config ) + # the object produced is a PipelineGraph assert isinstance(graph, PipelineGraph) + # we have the same number of steps as in the pipeline definition assert len(graph.steps) == len(pipeline_definition["steps"]) + # sort each group of steps for comparison + pipeline_steps_sorted = sorted( + pipeline_definition["steps"], key=lambda x: x["name"] + ) + graph_steps_sorted = sorted(list(graph.steps), key=lambda x: x.name) + + for graph_step, pipeline_step in zip(graph_steps_sorted, pipeline_steps_sorted): + assert graph_step.name == pipeline_step["name"] + assert graph_step.model_version.id == pipeline_step["model_version_id"] + assert graph_step._registered_model.name == model_name + assert graph_step._registered_model.id == registered_model_id + def test_to_graph_definition(make_mock_pipeline_step) -> None: """Test that a pipeline graph specification can be constructed from a @@ -88,8 +126,8 @@ def test_to_steps_definition(make_mock_pipeline_step) -> None: Definitions are type list to remain json serializable. """ - step_1 = make_mock_pipeline_step() - step_2 = make_mock_pipeline_step() + step_1 = make_mock_pipeline_step(name="step_1") + step_2 = make_mock_pipeline_step(name="step_2") graph = PipelineGraph(steps={step_1, step_2}) step_specs = graph._to_steps_definition() expected_definition = [ diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 24810508e9..8f9a09697c 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -5,13 +5,19 @@ import random -from hypothesis import given, HealthCheck, settings +from hypothesis import given, HealthCheck, settings, strategies as st from tests.unit_tests.strategies import pipeline_definition from verta.pipeline import PipelineStep -@given(pipeline_definition=pipeline_definition()) +@given( + pipeline_definition=pipeline_definition(), + registered_model_id=st.integers(min_value=1, max_value=1000000000), + # max value limit avoids protobuf "Value out of range" error + model_version_name=st.text(min_size=1), + model_name=st.text(min_size=1), +) @settings( suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None, @@ -21,6 +27,9 @@ def test_steps_from_pipeline_definition( mock_conn, mock_config, mocked_responses, + registered_model_id, + model_version_name, + model_name, ) -> None: """Test that a list of PipelineStep objects can be constructed and returned from a pipeline definition. @@ -32,12 +41,23 @@ def test_steps_from_pipeline_definition( for step in pipeline_definition["steps"]: mocked_responses.get( f"https://test_socket/api/v1/registry/model_versions/{step['model_version_id']}", - json={"model_version": step["model_version_id"]}, + json={ + "model_version": { + "id": step["model_version_id"], + "registered_model_id": registered_model_id, + "version": model_version_name, + } + }, status=200, ) mocked_responses.get( - f"https://test_socket/api/v1/registry/registered_models/0", - json={}, + f"https://test_socket/api/v1/registry/registered_models/{registered_model_id}", + json={ + "registered_model": { + "id": registered_model_id, + "name": model_name, + } + }, status=200, ) generated_steps = PipelineStep._steps_from_pipeline_definition( @@ -47,22 +67,31 @@ def test_steps_from_pipeline_definition( ) # we have the same number of steps as in the pipeline definition assert len(generated_steps) == len(pipeline_definition["steps"]) - # convert from set to list and sort for comparison + + # sort each group of steps for comparison generated_steps_sorted = sorted(list(generated_steps), key=lambda x: x.name) - # sort for comparison - spec_steps_sorted = sorted(pipeline_definition["steps"], key=lambda x: x["name"]) - for spec_step, gen_step in zip(spec_steps_sorted, generated_steps_sorted): + definition_steps_sorted = sorted( + pipeline_definition["steps"], key=lambda x: x["name"] + ) + + for def_step, gen_step in zip(definition_steps_sorted, generated_steps_sorted): + # the names are the same for the steps and their definitions + assert gen_step.name == def_step["name"] + # model version ids are the same for the steps and their definitions + assert gen_step.model_version.id == def_step["model_version_id"] + # registered model ids are the same for the steps and their definitions + assert gen_step._registered_model.id == registered_model_id + # registered model names are fetched and added + assert gen_step._registered_model.name == model_name # each step is converted to a PipelineStep object assert isinstance(gen_step, PipelineStep) - # the names are the same for the steps and their definitions - assert gen_step.name == spec_step["name"] # predecessors for each step are also converted to PipelineStep objects for i in gen_step.predecessors: assert isinstance(i, PipelineStep) - # the predecessors for each step are the same as in the definition - assert set([i.name for i in gen_step.predecessors]) == set( - [s["predecessors"] for s in graph if gen_step.name == s["name"]][0] - ) + # the predecessors for each step are all included and have the same name as in the definition + assert [s.name for s in gen_step.predecessors] == [ + s["predecessors"] for s in graph if gen_step.name == s["name"] + ][0] def test_to_step_spec(make_mock_registered_model_version) -> None: @@ -71,7 +100,7 @@ def test_to_step_spec(make_mock_registered_model_version) -> None: step = PipelineStep( model_version=model_version, name="test_name", - predecessors=[], # predecessors not included in step spec + predecessors=set(), # predecessors not included in step spec ) assert step._to_step_spec() == { "name": "test_name", diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index 1baa123fe2..67807f09f3 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -273,7 +273,8 @@ def pipeline_definition(draw): ) model_versions = draw( st.lists( - st.text(alphabet=["1", "2", "3", "4", "5"], min_size=1), + # limit max value to prevent protobuf "Value out of range" error + st.integers(min_value=1, max_value=1000000000), min_size=5, max_size=5, unique=True, From b22c9e8cccab48a6426b861fa14b5f2f76180ea7 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Wed, 23 Aug 2023 01:26:09 +0200 Subject: [PATCH 042/117] test: use max_value on integer strategy only where required to prevent protobuf out of range error --- client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py | 2 +- client/verta/tests/unit_tests/strategies.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 0c185c77e5..eedb0de36a 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -25,7 +25,7 @@ def test_set_steps(make_mock_pipeline_step) -> None: @given( pipeline_definition=pipeline_definition(), - registered_model_id=st.integers(min_value=1, max_value=1000000000), + registered_model_id=st.integers(min_value=1, max_value=2**63), # max value limit avoids protobuf "Value out of range" error model_version_name=st.text(min_size=1), model_name=st.text(min_size=1), diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index 67807f09f3..ebff50bc71 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -274,7 +274,7 @@ def pipeline_definition(draw): model_versions = draw( st.lists( # limit max value to prevent protobuf "Value out of range" error - st.integers(min_value=1, max_value=1000000000), + st.integers(min_value=1, max_value=2**63), min_size=5, max_size=5, unique=True, @@ -289,7 +289,7 @@ def pipeline_definition(draw): {"predecessors": [step_names[2]], "name": step_names[3]}, {"predecessors": [step_names[3]], "name": step_names[4]}, ], - "pipeline_version_id": draw(st.integers(min_value=1, max_value=1000)), + "pipeline_version_id": draw(st.integers(min_value=1)), "steps": [ { "model_version_id": model_versions[0], From 4bf821430fc36ae3c9083cbfa866b1ad17b91132 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Wed, 23 Aug 2023 17:06:29 +0200 Subject: [PATCH 043/117] docs: fix typo in doc string --- client/verta/verta/pipeline/_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 2394890f48..300d238110 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -156,7 +156,7 @@ def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: def _steps_from_pipeline_definition( cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration ) -> Set["PipelineStep"]: - """Return a list of PipelineStep objects by from a pipeline specification + """Return a list of PipelineStep objects from a pipeline specification Parameters ---------- From 0433567efcd1ab026f794a0d49d896a240bd2793 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Wed, 23 Aug 2023 19:54:05 +0200 Subject: [PATCH 044/117] refactor: use conn params from RMV to get RM and adjust unit tests to patch the call --- client/verta/tests/unit_tests/conftest.py | 44 +++++-- .../pipeline/test_pipeline_graph.py | 54 +++++--- .../unit_tests/pipeline/test_pipeline_step.py | 122 ++++++++++++------ .../pipeline/test_registered_pipeline.py | 100 +++++++++----- client/verta/verta/pipeline/_pipeline_step.py | 37 +++--- 5 files changed, 237 insertions(+), 120 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 09a2f0d4e7..f7a0b6a4ad 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -16,9 +16,8 @@ from verta.client import Client from verta.credentials import EmailCredentials from verta.endpoint import Endpoint -from verta.endpoint.resources import NvidiaGPU, NvidiaGPUModel, Resources from verta.pipeline import PipelineGraph, PipelineStep -from verta.registry.entities import RegisteredModelVersion +from verta.registry.entities import RegisteredModel, RegisteredModelVersion @pytest.fixture(scope="session") @@ -99,6 +98,31 @@ def simple_pipeline_definition(id: int) -> Dict[str, Any]: return simple_pipeline_definition +@pytest.fixture(scope="session") +def make_mock_registered_model(mock_conn, mock_config) -> Callable: + """Return a callable function for creating mocked objects of the + RegisteredModel class. + """ + + class MockRegisteredModel(RegisteredModel): + def __repr__(self): # avoid network calls when displaying test results + return object.__repr__(self) + + def _make_mock_registered_model(id: int, name: str): + """Return a mocked RegisteredModel object.""" + + return MockRegisteredModel( + mock_conn, + mock_config, + _RegistryService.RegisteredModel( + id=id, + name=name, + ), + ) + + return _make_mock_registered_model + + @pytest.fixture(scope="session") def make_mock_registered_model_version( mock_conn, mock_config, make_mock_simple_pipeline_definition @@ -124,16 +148,16 @@ def _make_mock_registered_model_version(): test session. """ - # ids = set() + ids = set() model_ver_id = random.randint(1, 1000000) - # while model_ver_id in ids: - # model_ver_id = random.randint(1, 1000000) - # ids.add(model_ver_id) + while model_ver_id in ids: + model_ver_id = random.randint(1, 1000000) + ids.add(model_ver_id) reg_model_id = random.randint(1, 1000000) - # while reg_model_id in ids: - # reg_model_id = random.randint(1, 1000000) - # ids.add(reg_model_id) + while reg_model_id in ids: + reg_model_id = random.randint(1, 1000000) + ids.add(reg_model_id) return MockRegisteredModelVersion( mock_conn, @@ -141,7 +165,7 @@ def _make_mock_registered_model_version(): _RegistryService.ModelVersion( id=model_ver_id, registered_model_id=reg_model_id, - version="test_version_name", + version="test_model_version_name", ), ) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index eedb0de36a..1601228700 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -3,20 +3,26 @@ Unit tests for the PipelineGraph class """ -from hypothesis import given, HealthCheck, settings -from hypothesis import strategies as st +from unittest.mock import patch +from hypothesis import given, HealthCheck, settings, strategies as st + +import verta from tests.unit_tests.strategies import pipeline_definition from verta.pipeline import PipelineGraph -def test_set_steps(make_mock_pipeline_step) -> None: +def test_set_steps(make_mock_pipeline_step, make_mock_registered_model) -> None: """ Test that the steps of a PipelineGraph can be set """ - step_1 = make_mock_pipeline_step() - step_2 = make_mock_pipeline_step() - graph = PipelineGraph(steps=set()) + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + step_1 = make_mock_pipeline_step() + step_2 = make_mock_pipeline_step() + graph = PipelineGraph(steps=set()) graph.set_steps({step_1, step_2}) assert set(graph.steps) == {step_1, step_2} graph.set_steps(set()) @@ -35,13 +41,13 @@ def test_set_steps(make_mock_pipeline_step) -> None: deadline=None, ) def test_from_definition( - mocked_responses, pipeline_definition, - mock_conn, - mock_config, registered_model_id, model_version_name, model_name, + mock_conn, + mock_config, + mocked_responses, ) -> None: """Test that a PipelineGraph object can be constructed from a pipeline specification. @@ -55,7 +61,7 @@ def test_from_definition( f"https://test_socket/api/v1/registry/model_versions/{step['model_version_id']}", json={ "model_version": { - "id": step['model_version_id'], + "id": step["model_version_id"], "registered_model_id": registered_model_id, "version": model_version_name, } @@ -93,13 +99,19 @@ def test_from_definition( assert graph_step._registered_model.id == registered_model_id -def test_to_graph_definition(make_mock_pipeline_step) -> None: +def test_to_graph_definition( + make_mock_pipeline_step, make_mock_registered_model +) -> None: """Test that a pipeline graph specification can be constructed from a PipelineGraph object """ - step_1 = make_mock_pipeline_step("step_1") - step_2 = make_mock_pipeline_step("step_2") - step_3 = make_mock_pipeline_step("step_3") + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + step_1 = make_mock_pipeline_step("step_1") + step_2 = make_mock_pipeline_step("step_2") + step_3 = make_mock_pipeline_step("step_3") step_2.set_predecessors({step_1}) step_3.set_predecessors({step_2}) graph = PipelineGraph(steps={step_1, step_2, step_3}) @@ -120,15 +132,21 @@ def test_to_graph_definition(make_mock_pipeline_step) -> None: ] -def test_to_steps_definition(make_mock_pipeline_step) -> None: +def test_to_steps_definition( + make_mock_pipeline_step, make_mock_registered_model +) -> None: """Test that a pipeline steps specification can be constructed from a PipelineGraph object. Definitions are type list to remain json serializable. """ - step_1 = make_mock_pipeline_step(name="step_1") - step_2 = make_mock_pipeline_step(name="step_2") - graph = PipelineGraph(steps={step_1, step_2}) + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + step_1 = make_mock_pipeline_step(name="step_1") + step_2 = make_mock_pipeline_step(name="step_2") + graph = PipelineGraph(steps={step_1, step_2}) step_specs = graph._to_steps_definition() expected_definition = [ { diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 8f9a09697c..e231914783 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -6,9 +6,11 @@ import random from hypothesis import given, HealthCheck, settings, strategies as st +from unittest.mock import patch from tests.unit_tests.strategies import pipeline_definition from verta.pipeline import PipelineStep +import verta @given( @@ -24,18 +26,19 @@ ) def test_steps_from_pipeline_definition( pipeline_definition, - mock_conn, - mock_config, - mocked_responses, registered_model_id, model_version_name, model_name, + mock_conn, + mock_config, + mocked_responses, ) -> None: """Test that a list of PipelineStep objects can be constructed and returned from a pipeline definition. The registered model, model version, and environment is fetched for - each step, so a response is mocked for each call. + each step. However, only the call to to fetch the RMV is mocked, as + the _get_registered_model function is patched to return a mock RM. """ graph = pipeline_definition["graph"] for step in pipeline_definition["steps"]: @@ -68,7 +71,7 @@ def test_steps_from_pipeline_definition( # we have the same number of steps as in the pipeline definition assert len(generated_steps) == len(pipeline_definition["steps"]) - # sort each group of steps for comparison + # sort both group of steps for side-by-side comparison generated_steps_sorted = sorted(list(generated_steps), key=lambda x: x.name) definition_steps_sorted = sorted( pipeline_definition["steps"], key=lambda x: x["name"] @@ -94,30 +97,47 @@ def test_steps_from_pipeline_definition( ][0] -def test_to_step_spec(make_mock_registered_model_version) -> None: +def test_to_step_spec( + make_mock_registered_model_version, make_mock_registered_model +) -> None: """Test that a PipelineStep object can be converted to a step specification""" - model_version = make_mock_registered_model_version() - step = PipelineStep( - model_version=model_version, - name="test_name", - predecessors=set(), # predecessors not included in step spec + mocked_rmv = make_mock_registered_model_version() + mocked_rm = make_mock_registered_model( + id=mocked_rmv.registered_model_id, name="test_rmv" ) + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + step = PipelineStep( + model_version=mocked_rmv, + name="test_name", + predecessors=set(), # predecessors not included in step spec + ) assert step._to_step_spec() == { "name": "test_name", - "model_version_id": model_version.id, + "model_version_id": mocked_rmv.id, } def test_to_graph_spec( - make_mock_registered_model_version, make_mock_pipeline_step + make_mock_registered_model_version, + make_mock_pipeline_step, + make_mock_registered_model, ) -> None: """Test that a PipelineStep object can be converted to a step specification""" - predecessors = {make_mock_pipeline_step() for _ in range(random.randint(1, 5))} - step = PipelineStep( - model_version=make_mock_registered_model_version(), - name="test_name", - predecessors=predecessors, + mocked_rmv = make_mock_registered_model_version() + mocked_rm = make_mock_registered_model( + id=mocked_rmv.registered_model_id, name="test_rmv" ) + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + predecessors = {make_mock_pipeline_step() for _ in range(random.randint(1, 5))} + step = PipelineStep( + model_version=mocked_rmv, + name="test_name", + predecessors=predecessors, + ) assert step._to_graph_spec() == { "name": "test_name", "predecessors": [s.name for s in predecessors], @@ -125,16 +145,25 @@ def test_to_graph_spec( def test_set_predecessors_add( - make_mock_registered_model_version, make_mock_pipeline_step + make_mock_registered_model_version, + make_mock_pipeline_step, + make_mock_registered_model, ) -> None: """Test that predecessors can be added to a PipelineStep object""" - predecessor_1 = make_mock_pipeline_step() - predecessor_2 = make_mock_pipeline_step() - step = PipelineStep( - model_version=make_mock_registered_model_version(), - name="test_name", - predecessors={predecessor_1}, + mocked_rmv = make_mock_registered_model_version() + mocked_rm = make_mock_registered_model( + id=mocked_rmv.registered_model_id, name="test_rmv" ) + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + predecessor_1 = make_mock_pipeline_step() + predecessor_2 = make_mock_pipeline_step() + step = PipelineStep( + model_version=mocked_rmv, + name="test_name", + predecessors={predecessor_1}, + ) new_steps = step.predecessors.copy() new_steps.add(predecessor_2) step.set_predecessors(new_steps) @@ -142,30 +171,45 @@ def test_set_predecessors_add( def test_set_predecessors_remove( - make_mock_registered_model_version, make_mock_pipeline_step + make_mock_registered_model_version, + make_mock_pipeline_step, + make_mock_registered_model, ) -> None: """Test that predecessors can be removed from a PipelineStep object""" - predecessors = {make_mock_pipeline_step() for _ in range(random.randint(2, 10))} - predecessors_as_list = list(predecessors) # convert to list for slicing - steps_to_remain = predecessors_as_list[: len(predecessors_as_list) // 2] - step = PipelineStep( - model_version=make_mock_registered_model_version(), - name="test_name", - predecessors=predecessors, + mocked_rmv = make_mock_registered_model_version() + mocked_rm = make_mock_registered_model( + id=mocked_rmv.registered_model_id, name="test_rmv" ) + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + predecessors = {make_mock_pipeline_step() for _ in range(random.randint(2, 10))} + predecessors_as_list = list(predecessors) # convert to list for slicing + steps_to_remain = predecessors_as_list[: len(predecessors_as_list) // 2] + step = PipelineStep( + model_version=mocked_rmv, + name="test_name", + predecessors=predecessors, + ) step.set_predecessors(set(steps_to_remain)) assert step.predecessors == set(steps_to_remain) -def test_change_model_version(make_mock_registered_model_version) -> None: +def test_change_model_version( + make_mock_registered_model_version, make_mock_registered_model +) -> None: """Test that a PipelineStep object can have its model version changed""" model_ver_1 = make_mock_registered_model_version() model_ver_2 = make_mock_registered_model_version() - step = PipelineStep( - model_version=model_ver_1, - name="test_name", - predecessors=set(), - ) + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + step = PipelineStep( + model_version=model_ver_1, + name="test_name", + predecessors=set(), + ) assert step.model_version == model_ver_1 step.set_model_version(model_ver_2) assert step.model_version == model_ver_2 diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index abfc1db298..b040bc26e5 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -1,26 +1,35 @@ # -*- coding: utf-8 -*- """Unit tests for the RegisteredPipeline class""" +from unittest.mock import patch + import pytest from hypothesis import given, HealthCheck, settings +import verta from tests.unit_tests.strategies import pipeline_definition, resources from verta.pipeline import RegisteredPipeline def test_copy_graph( - make_mock_pipeline_graph, make_mock_registered_model_version + make_mock_pipeline_graph, + make_mock_registered_model_version, + make_mock_registered_model, ) -> None: """Test that the graph of a RegisteredPipeline can be copied. Each step in the copied graph should be a new object, but have the same name, predecessors, and model version as the original. """ - graph = make_mock_pipeline_graph() - pipeline = RegisteredPipeline( - graph=graph, - registered_model_version=make_mock_registered_model_version(), - ) + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + graph = make_mock_pipeline_graph() + pipeline = RegisteredPipeline( + graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) copied_graph = pipeline.copy_graph() # convert from sets to lists and sort for comparison graph_steps_sorted = sorted(list(graph.steps), key=lambda x: x.name) @@ -84,7 +93,9 @@ def test_log_pipeline_definition_artifact( def test_to_pipeline_definition( - make_mock_pipeline_graph, make_mock_registered_model_version + make_mock_pipeline_graph, + make_mock_registered_model_version, + make_mock_registered_model, ) -> None: """Test that a pipeline definition can be constructed from a RegisteredPipeline object. @@ -93,11 +104,15 @@ def test_to_pipeline_definition( and `to_steps_definition` functions are handled in unit tests for PipelineGraph. """ - graph = make_mock_pipeline_graph() - pipeline = RegisteredPipeline( - graph=graph, - registered_model_version=make_mock_registered_model_version(), - ) + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + graph = make_mock_pipeline_graph() + pipeline = RegisteredPipeline( + graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) pipeline_definition = pipeline._to_pipeline_definition() assert pipeline_definition == { "pipeline_version_id": pipeline.id, @@ -105,21 +120,27 @@ def test_to_pipeline_definition( "predecessors": graph._to_steps_definition(), } + @given(resources=resources()) def test_to_pipeline_configuration_valid( + resources, make_mock_pipeline_graph, make_mock_registered_model_version, - resources, + make_mock_registered_model, ) -> None: """Test that a valid pipeline configuration can be constructed from a RegisteredPipeline object and a valid list of pipeline resources. """ - graph = make_mock_pipeline_graph() - step_resources = {step.name: resources for step in graph.steps} - pipeline = RegisteredPipeline( - graph=graph, - registered_model_version=make_mock_registered_model_version(), - ) + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + graph = make_mock_pipeline_graph() + step_resources = {step.name: resources for step in graph.steps} + pipeline = RegisteredPipeline( + graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) pipeline_configuration = pipeline._to_pipeline_configuration( pipeline_resources=step_resources @@ -134,38 +155,47 @@ def test_to_pipeline_configuration_valid( @given(resources=resources()) def test_to_pipeline_configuration_invalid_resources( + resources, make_mock_pipeline_graph, make_mock_registered_model_version, - resources, + make_mock_registered_model, ) -> None: """Test that a ValueError is raised when an invalid step name is included in the provided pipeline resources. (Does not match a step name in the pipeline's graph) """ - graph = make_mock_pipeline_graph() - step_resources = {step.name: resources for step in graph.steps} - step_resources["invalid_step_name"] = resources - pipeline = RegisteredPipeline( - graph=graph, - registered_model_version=make_mock_registered_model_version(), - ) - + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + graph = make_mock_pipeline_graph() + step_resources = {step.name: resources for step in graph.steps} + step_resources["invalid_step_name"] = resources + pipeline = RegisteredPipeline( + graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) with pytest.raises(ValueError): pipeline._to_pipeline_configuration(pipeline_resources=step_resources) def test_to_pipeline_configuration_no_resources( - make_mock_pipeline_graph, make_mock_registered_model_version + make_mock_pipeline_graph, + make_mock_registered_model_version, + make_mock_registered_model, ) -> None: """Test that a pipeline configuration can be constructed from a RegisteredPipeline object without providing pipeline resources. """ - graph = make_mock_pipeline_graph() - pipeline = RegisteredPipeline( - graph=graph, - registered_model_version=make_mock_registered_model_version(), - ) - + mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + graph = make_mock_pipeline_graph() + pipeline = RegisteredPipeline( + graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) pipeline_configuration = pipeline._to_pipeline_configuration() assert pipeline_configuration["pipeline_version_id"] == pipeline.id for graph_step, config_step in zip(graph.steps, pipeline_configuration["steps"]): diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 300d238110..c6bdca26e9 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -43,26 +43,23 @@ def __init__( self._predecessors = ( self.set_predecessors(predecessors) if predecessors else set() ) - - # avoid the need to pass in connection params when building as local object - self._registered_model: Optional[RegisteredModel] = None - - # avoid additional http calls to refresh RMV cache. self._registered_model_id = self._model_version.registered_model_id + self._registered_model: RegisteredModel = self._get_registered_model( + conn=model_version._conn, conf=model_version._conf + ) def __repr__(self) -> str: - sequence = ( - "\n PipelineStep:", - f"step name: {self.name}", - ) - if self._registered_model: # don't display on local-only objects` - sequence += (f"registered_model: {self._registered_model.name}",) - sequence += ( - f"registered_model_version: {self.model_version.name}", - f"registered_model_version_id: {self.model_version.id}", - f"predecessors: {[s.name for s in self.predecessors]}", + return "\n ".join( + ( + "\n PipelineStep:", + f"step name: {self.name}", + f"registered_model: {self._registered_model.name}", + f"registered_model_id: {self._registered_model_id}", + f"registered_model_version: {self.model_version.name}", + f"registered_model_version_id: {self.model_version.id}", + f"predecessors: {[s.name for s in self.predecessors]}", + ) ) - return "\n ".join(sequence) @property def model_version(self) -> RegisteredModelVersion: @@ -151,12 +148,17 @@ def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: id=self._registered_model_id, conn=conn, conf=conf ) self._registered_model = rm + return rm @classmethod def _steps_from_pipeline_definition( cls, pipeline_definition: Dict[str, Any], conn: Connection, conf: Configuration ) -> Set["PipelineStep"]: - """Return a list of PipelineStep objects from a pipeline specification + """Return a list of PipelineStep objects from a pipeline definition. + + This method is used when fetching a pre-existing pipeline from the backend + and converting it to a local RegisteredPipeline object, which includes the + PipelineGraph and all component steps as PipelineStep objects. Parameters ---------- @@ -184,7 +186,6 @@ def _steps_from_pipeline_definition( ) ) for step_object in steps: - step_object._get_registered_model(conn=conn, conf=conf) predecessor_names = [ s["predecessors"] for s in pipeline_definition["graph"] From 8390e73a87c0fb4fa05754d66972d7fc2b3e29a6 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Wed, 23 Aug 2023 20:13:46 +0200 Subject: [PATCH 045/117] test: be more specific when checking for error conditions on provided step resources --- .../pipeline/test_registered_pipeline.py | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index b040bc26e5..deb017193b 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -160,9 +160,13 @@ def test_to_pipeline_configuration_invalid_resources( make_mock_registered_model_version, make_mock_registered_model, ) -> None: - """Test that a ValueError is raised when an invalid step name is included - in the provided pipeline resources. (Does not match a step name in the - pipeline's graph) + """Test that the expected errors are raised when an invalid pipeline resources + are provided. + + Invalid resources include: + - a step name not in the pipeline -> ValueError + - a step name that is not a string -> TypeError + - a step resource that is not a Resources object -> TypeError """ mocked_rm = make_mock_registered_model(id=123, name="test_rmv") with patch.object( @@ -175,8 +179,32 @@ def test_to_pipeline_configuration_invalid_resources( graph=graph, registered_model_version=make_mock_registered_model_version(), ) - with pytest.raises(ValueError): + # step name not in pipeline + with pytest.raises(ValueError) as err: + pipeline._to_pipeline_configuration(pipeline_resources=step_resources) + assert ( + str(err.value) + == "pipeline_resources contains resources for a step not in the " + "pipeline: 'invalid_step_name'" + ) + step_resources.pop("invalid_step_name") + # step name not a string + step_resources.update({123: resources}) + with pytest.raises(TypeError) as err2: pipeline._to_pipeline_configuration(pipeline_resources=step_resources) + assert ( + str(err2.value) + == "pipeline_resources keys must be type str, not " + ) + step_resources.pop(123) + # step resource not a Resources object + step_resources.update({"step_1": "not_resources"}) + with pytest.raises(TypeError) as err3: + pipeline._to_pipeline_configuration(pipeline_resources=step_resources) + assert ( + str(err3.value) + == "pipeline_resources values must be type Resources, not " + ) def test_to_pipeline_configuration_no_resources( From c19a56c0b5a0a39f2e64193309f852a1599d754d Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Wed, 23 Aug 2023 20:15:19 +0200 Subject: [PATCH 046/117] test: be more specific when checking for error conditions on provided step resources --- client/verta/verta/pipeline/_registered_pipeline.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 4f4b411d59..c61844d737 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -119,11 +119,20 @@ def _to_pipeline_configuration( Representation of a pipeline configuration. """ if pipeline_resources: + for res in pipeline_resources.values(): + if not isinstance(res, Resources): + raise TypeError( + f"pipeline_resources values must be type Resources, not {type(res)}" + ) for step_name in pipeline_resources.keys(): + if not isinstance(step_name, str): + raise TypeError( + f"pipeline_resources keys must be type str, not {type(step_name)}" + ) if step_name not in [step.name for step in self._graph.steps]: raise ValueError( f"pipeline_resources contains resources for a step not in " - f"the pipeline: {step_name}" + f"the pipeline: '{step_name}'" ) steps = list() for step in self._graph.steps: From 2a6f69a9f17fb926ec2d1c5d2ec4575f4dda053e Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 00:12:47 +0200 Subject: [PATCH 047/117] refactor: fix artifact handling and use conn params from RMV object --- client/verta/tests/unit_tests/conftest.py | 7 ++- .../pipeline/test_registered_pipeline.py | 62 +++++++++++++------ .../verta/pipeline/_registered_pipeline.py | 50 +++++++++------ 3 files changed, 79 insertions(+), 40 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index f7a0b6a4ad..9c30974e57 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -137,9 +137,10 @@ def __repr__(self): # avoid network calls when displaying test results return object.__repr__(self) def _get_artifact(self, key=None, artifact_type=None): - return json.dumps(make_mock_simple_pipeline_definition(id=self.id)).encode( - "utf-8" - ) + if key == "pipeline.json": + return json.dumps( + make_mock_simple_pipeline_definition(id=self.id) + ).encode("utf-8") def _make_mock_registered_model_version(): """Return a mocked ``RegisteredModelVersion``. diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index deb017193b..3ea3c7fbde 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -4,7 +4,7 @@ from unittest.mock import patch import pytest -from hypothesis import given, HealthCheck, settings +from hypothesis import given, HealthCheck, settings, strategies as st import verta from tests.unit_tests.strategies import pipeline_definition, resources @@ -42,13 +42,13 @@ def test_copy_graph( assert copied_graph is not graph -@given(pipeline_definition=pipeline_definition()) +@given(model_version_name=st.text(min_size=1)) @settings( suppress_health_check=[HealthCheck.function_scoped_fixture], deadline=None, ) def test_log_pipeline_definition_artifact( - pipeline_definition, + model_version_name, mocked_responses, make_mock_pipeline_graph, make_mock_registered_model_version, @@ -56,42 +56,68 @@ def test_log_pipeline_definition_artifact( """ Verify the expected sequence of calls when a pipeline definition is logged as an artifact to the pipeline's model version. + + Fetching the registered model version is patched instead of mocking a + response to avoid having to pass the RM's id down through multiple + pytest fixtures. """ rmv = make_mock_registered_model_version() - pipeline = RegisteredPipeline( - graph=make_mock_pipeline_graph(), - registered_model_version=rmv, - ) - # Fetch the model + # Fetch the registered model version mocked_responses.get( - f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/model_versions/{pipeline.id}", - json={}, + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/model_versions/{rmv.id}", + json={ + "model_version": { + "id": rmv.id, + "registered_model_id": rmv.registered_model_id, + "version": model_version_name, + } + }, status=200, ) - # Fetch the model version mocked_responses.put( - f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/registered_models/0/model_versions/{pipeline.id}", + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/registered_models/{rmv.registered_model_id}/model_versions/{rmv.id}", json={}, status=200, ) # Fetch the artifact upload URL mocked_responses.post( - f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/model_versions/{pipeline.id}/getUrlForArtifact", + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/model_versions/{rmv.id}/getUrlForArtifact", json={ "url": f"https://account.s3.amazonaws.com/development/ModelVersionEntity/" - f"{pipeline.id}/pipeline.json" + f"{rmv.id}/pipeline.json" }, status=200, ) # Upload the artifact mocked_responses.put( - f"https://account.s3.amazonaws.com/development/ModelVersionEntity/{pipeline.id}/pipeline.json", + f"https://account.s3.amazonaws.com/development/ModelVersionEntity/{rmv.id}/pipeline.json", json={}, status=200, ) + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=rmv + ): + pipeline = RegisteredPipeline( + graph=make_mock_pipeline_graph(), + registered_model_version=rmv, + ) pipeline._log_pipeline_definition_artifact() +def test_get_pipeline_definition_artifact( + make_mock_registered_model_version, + make_mock_simple_pipeline_definition, +) -> None: + """Test that a pipeline definition artifact can be fetched from the + registered model version associated with a RegisteredPipeline object. + """ + rmv = make_mock_registered_model_version() + pipeline_definition = RegisteredPipeline._get_pipeline_definition_artifact( + registered_model_version=rmv, + ) + assert pipeline_definition == make_mock_simple_pipeline_definition(id=rmv.id) + + def test_to_pipeline_definition( make_mock_pipeline_graph, make_mock_registered_model_version, @@ -184,8 +210,8 @@ def test_to_pipeline_configuration_invalid_resources( pipeline._to_pipeline_configuration(pipeline_resources=step_resources) assert ( str(err.value) - == "pipeline_resources contains resources for a step not in the " - "pipeline: 'invalid_step_name'" + == "pipeline_resources contains resources for a step not in the " + "pipeline: 'invalid_step_name'" ) step_resources.pop("invalid_step_name") # step name not a string @@ -265,8 +291,6 @@ def test_from_pipeline_definition( rmv = make_mock_registered_model_version() pipeline = RegisteredPipeline._from_pipeline_definition( registered_model_version=rmv, - conn=mock_conn, - conf=mock_config, ) assert isinstance(pipeline, RegisteredPipeline) assert pipeline.id == rmv.id diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index c61844d737..56958ba32b 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -4,6 +4,8 @@ import copy from typing import Any, Dict, Optional +import tempfile + from verta._internal_utils._utils import Configuration, Connection from verta.endpoint.resources import Resources from verta.pipeline import PipelineGraph @@ -80,13 +82,10 @@ def _log_pipeline_definition_artifact(self) -> None: """ Log the pipeline definition as an artifact of the registered model version. """ - self._registered_model_version.log_artifact( - "pipeline.json", self._to_pipeline_definition() - ) - - def _get_pipeline_definition_artifact(self) -> Dict[str, Any]: - """Get the pipeline definition artifact from the registered model version.""" - return self._registered_model_version.get_artifact("pipeline.json") + with tempfile.NamedTemporaryFile() as temp_file: + bytes = json.dumps(self._to_pipeline_definition()).encode("utf-8") + temp_file.write(bytes) + self._registered_model_version.log_artifact("pipeline.json", temp_file) def _to_pipeline_definition(self) -> Dict[str, Any]: """Create a complete pipeline definition dict from a name and PipelineGraph. @@ -149,12 +148,30 @@ def _to_pipeline_configuration( "steps": steps, } + @classmethod + def _get_pipeline_definition_artifact( + cls, registered_model_version: RegisteredModelVersion + ) -> Dict[str, Any]: + """Get the pipeline definition artifact from the registered model version. + + Parameters + ---------- + registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + RegisteredModelVersion object associated with this pipeline, from which + the pipeline definition artifact will be fetched. + + Returns + ------- + dict + Pipeline definition dictionary. + """ + definition = registered_model_version.get_artifact("pipeline.json").read() + return json.loads(definition.decode("utf-8")) + @classmethod def _from_pipeline_definition( cls, registered_model_version: RegisteredModelVersion, - conn: Connection, - conf: Configuration, ) -> "RegisteredPipeline": """Create a Pipeline instance from a specification dict. @@ -166,18 +183,15 @@ def _from_pipeline_definition( RegisteredModelVersion object associated with this pipeline. pipeline_definition : dict Specification dict from which to create the Pipeline. - conn : :class:`~verta._internal_utils._utils.Connection` - Connection object for fetching the models and model versions associated with steps. - conf : :class:`~verta._internal_utils._utils.Configuration` - Configuration object for fetching the models and model versions associated with steps. """ - pipeline_definition_str = registered_model_version.get_artifact( - "pipeline.json" - ).read() - pipeline_definition = json.loads(pipeline_definition_str) + pipeline_definition = cls._get_pipeline_definition_artifact( + registered_model_version + ) return cls( registered_model_version=registered_model_version, graph=PipelineGraph._from_definition( - pipeline_definition=pipeline_definition, conn=conn, conf=conf + pipeline_definition=pipeline_definition, + conn=registered_model_version._conn, + conf=registered_model_version._conf, ), ) From 251819920aa991234d8be81e80a4c4fd07d80fca Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 00:26:55 +0200 Subject: [PATCH 048/117] test: make pipeline_definition strategy handle arbitrary numbers of steps dynamically --- client/verta/tests/unit_tests/strategies.py | 55 +++++++++------------ 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index ebff50bc71..c4007bccda 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -269,49 +269,40 @@ def pipeline_definition(draw): # step names in a pipeline must be unique step_names = draw( - st.lists(st.text(min_size=5, max_size=25), min_size=5, max_size=5, unique=True) + st.lists(st.text(min_size=1), min_size=2, unique=True) ) model_versions = draw( st.lists( # limit max value to prevent protobuf "Value out of range" error st.integers(min_value=1, max_value=2**63), - min_size=5, - max_size=5, + min_size=len(step_names), + max_size=len(step_names), unique=True, ) ) + graph = list() + for i in range(len(step_names)): + if i == 0: + graph.append({"predecessors": [], "name": step_names[i]}) + else: + graph.append( + {"predecessors": [step_names[i - 1]], "name": step_names[i]} + ) + + steps = list() + for i in range(len(step_names)): + steps.append( + { + "model_version_id": model_versions[i], + "name": step_names[i], + } + ) + return { - "graph": [ - {"predecessors": [], "name": step_names[0]}, - {"predecessors": [step_names[0]], "name": step_names[1]}, - {"predecessors": [step_names[1]], "name": step_names[2]}, - {"predecessors": [step_names[2]], "name": step_names[3]}, - {"predecessors": [step_names[3]], "name": step_names[4]}, - ], + "graph": graph, "pipeline_version_id": draw(st.integers(min_value=1)), - "steps": [ - { - "model_version_id": model_versions[0], - "name": step_names[0], - }, - { - "model_version_id": model_versions[1], - "name": step_names[1], - }, - { - "model_version_id": model_versions[2], - "name": step_names[2], - }, - { - "model_version_id": model_versions[3], - "name": step_names[3], - }, - { - "model_version_id": model_versions[4], - "name": step_names[4], - }, - ], + "steps": steps, } From f87bfafeff496f300fc32d8d7e887f1293de61c2 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 01:27:05 +0200 Subject: [PATCH 049/117] refactor: separate validation logic from setter methods to allow validation at more stages of the user workflow --- .../verta/verta/pipeline/_pipeline_graph.py | 20 ++++++++++++---- client/verta/verta/pipeline/_pipeline_step.py | 23 ++++++++++++------- .../verta/pipeline/_registered_pipeline.py | 2 ++ 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index c0c068771f..ad6c5f69bd 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -23,7 +23,9 @@ class PipelineGraph: """ def __init__(self, steps: Set[PipelineStep]): - self._steps = self.set_steps(steps) + self._steps = self._validate_steps(steps) + self._predecessors = [s.predecessors for s in self._steps] + # throws an error if any step's predecessors attr has been inappropriately mutated def __repr__(self): return f"\nPipelineGraph steps:\n{self._format_steps()}" @@ -34,7 +36,8 @@ def _format_steps(self): @property def steps(self): - return self._steps + return self._validate_steps(self._steps) + @steps.setter def steps(self, value): @@ -48,13 +51,20 @@ def set_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: steps : set of :class:`~verta.deployment.PipelineStep` Set of all possible steps of the pipline graph. Order does not matter. """ + self._steps = self._validate_steps(steps) + return self.steps + + def _validate_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: + """Validate that the provided steps are a set of PipelineStep objects.""" if not isinstance(steps, set): raise TypeError(f"steps must be type set, not {type(steps)}") for step in steps: if not isinstance(step, PipelineStep): - raise TypeError(f"individual steps must be type PipelineStep, not {type(step)}") - self._steps = steps - return self.steps + raise TypeError( + f"individual steps of a PipelineGraph must be type" + f" PipelineStep, not {type(step)} for step '{step}'" + ) + return steps @classmethod def _from_definition( diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index c6bdca26e9..82450d3127 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -41,7 +41,7 @@ def __init__( self._name = self.set_name(name) self._model_version = self.set_model_version(model_version) self._predecessors = ( - self.set_predecessors(predecessors) if predecessors else set() + self._validate_predecessors(predecessors) if predecessors else set() ) self._registered_model_id = self._model_version.registered_model_id self._registered_model: RegisteredModel = self._get_registered_model( @@ -111,7 +111,7 @@ def set_name(self, name: str) -> str: @property def predecessors(self) -> Set["PipelineStep"]: - return self._predecessors + return self._validate_predecessors(self._predecessors) @predecessors.setter def predecessors(self, value) -> None: @@ -128,15 +128,22 @@ def set_predecessors(self, steps: Set["PipelineStep"]) -> set: steps : list List of PipelineStep objects whose outputs will be treated as inputs to this step. """ - if not isinstance(steps, set): - raise TypeError(f"predecessors must be type set, not {type(steps)}") - for step in steps: + self._predecessors = self._validate_predecessors(steps) + return self.predecessors + + def _validate_predecessors( + self, predecessors: Set["PipelineStep"] + ) -> set["PipelineStep"]: + """Validate that the provided predecessors are a set of PipelineStep objects.""" + if not isinstance(predecessors, set): + raise TypeError(f"steps must be type set, not {type(predecessors)}") + for step in predecessors: if not isinstance(step, PipelineStep): raise TypeError( - f"individual predecessors must be type PipelineStep, not {type(step)}" + f"individual predecessors of a PipelineStep must be type" + f" PipelineStep, not {type(step)} for predecessor '{step}'" ) - self._predecessors = steps - return self.predecessors + return predecessors def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: """Fetch the registered model associated with this step's model version. diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 56958ba32b..91cc230860 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -46,6 +46,8 @@ def __init__( self._name = self._registered_model_version.name self._id = self._registered_model_version.id self._graph = graph + self._graph_steps = self._graph.steps + # throws an error if the graph's steps attr has been inappropriately mutated def __repr__(self): return "\n".join( From 8f4ad67209961f2481a536411bbb40d5e727cd99 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 18:54:32 +0200 Subject: [PATCH 050/117] refactor: use registered_model_version as naming convention everywhere for consistency and clarity --- client/verta/tests/unit_tests/conftest.py | 2 +- .../pipeline/test_pipeline_graph.py | 8 +- .../unit_tests/pipeline/test_pipeline_step.py | 18 ++-- .../pipeline/test_registered_pipeline.py | 5 +- client/verta/verta/pipeline/_pipeline_step.py | 87 ++++++++++++------- 5 files changed, 76 insertions(+), 44 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 9c30974e57..d63b68077d 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -188,7 +188,7 @@ def __repr__(self): # avoid network calls when displaying test results def _make_mock_pipeline_step(name: Optional[str] = None): return MockPipelineStep( - model_version=make_mock_registered_model_version(), + registered_model_version=make_mock_registered_model_version(), name=name if name else "test_pipeline_step_name", predecessors=set(), ) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 1601228700..6179b35ce6 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -94,7 +94,9 @@ def test_from_definition( for graph_step, pipeline_step in zip(graph_steps_sorted, pipeline_steps_sorted): assert graph_step.name == pipeline_step["name"] - assert graph_step.model_version.id == pipeline_step["model_version_id"] + assert ( + graph_step.registered_model_version.id == pipeline_step["model_version_id"] + ) assert graph_step._registered_model.name == model_name assert graph_step._registered_model.id == registered_model_id @@ -151,11 +153,11 @@ def test_to_steps_definition( expected_definition = [ { "name": step_1.name, - "model_version_id": step_1.model_version.id, + "model_version_id": step_1.registered_model_version.id, }, { "name": step_2.name, - "model_version_id": step_2.model_version.id, + "model_version_id": step_2.registered_model_version.id, }, ] assert sorted(step_specs, key=lambda x: x["name"]) == sorted( diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index e231914783..c78d95e002 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -81,7 +81,7 @@ def test_steps_from_pipeline_definition( # the names are the same for the steps and their definitions assert gen_step.name == def_step["name"] # model version ids are the same for the steps and their definitions - assert gen_step.model_version.id == def_step["model_version_id"] + assert gen_step.registered_model_version.id == def_step["model_version_id"] # registered model ids are the same for the steps and their definitions assert gen_step._registered_model.id == registered_model_id # registered model names are fetched and added @@ -109,7 +109,7 @@ def test_to_step_spec( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): step = PipelineStep( - model_version=mocked_rmv, + registered_model_version=mocked_rmv, name="test_name", predecessors=set(), # predecessors not included in step spec ) @@ -134,7 +134,7 @@ def test_to_graph_spec( ): predecessors = {make_mock_pipeline_step() for _ in range(random.randint(1, 5))} step = PipelineStep( - model_version=mocked_rmv, + registered_model_version=mocked_rmv, name="test_name", predecessors=predecessors, ) @@ -160,7 +160,7 @@ def test_set_predecessors_add( predecessor_1 = make_mock_pipeline_step() predecessor_2 = make_mock_pipeline_step() step = PipelineStep( - model_version=mocked_rmv, + registered_model_version=mocked_rmv, name="test_name", predecessors={predecessor_1}, ) @@ -187,7 +187,7 @@ def test_set_predecessors_remove( predecessors_as_list = list(predecessors) # convert to list for slicing steps_to_remain = predecessors_as_list[: len(predecessors_as_list) // 2] step = PipelineStep( - model_version=mocked_rmv, + registered_model_version=mocked_rmv, name="test_name", predecessors=predecessors, ) @@ -206,10 +206,10 @@ def test_change_model_version( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): step = PipelineStep( - model_version=model_ver_1, + registered_model_version=model_ver_1, name="test_name", predecessors=set(), ) - assert step.model_version == model_ver_1 - step.set_model_version(model_ver_2) - assert step.model_version == model_ver_2 + assert step.registered_model_version == model_ver_1 + step.set_registered_model_version(model_ver_2) + assert step.registered_model_version == model_ver_2 diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 3ea3c7fbde..ac1eb5281a 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -38,7 +38,10 @@ def test_copy_graph( assert orig_step is not copied_step assert orig_step.name == copied_step.name assert orig_step.predecessors == copied_step.predecessors - assert orig_step.model_version.id == copied_step.model_version.id + assert ( + orig_step.registered_model_version.id + == copied_step.registered_model_version.id + ) assert copied_graph is not graph diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 82450d3127..a46f176c8b 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, Optional, Set from verta._internal_utils._utils import Configuration, Connection from verta.registry.entities import RegisteredModel, RegisteredModelVersion @@ -14,38 +14,40 @@ class PipelineStep: ---------- name : str Name of the step, for use within the scope of the pipeline only. - model_version : :class:`~verta.registry.entities.RegisteredModelVersion` - Registered model version to run for the step. + registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + Registered model version to run for this step. predecessors : set, optional - Set of PipelineSteps whose outputs will be treated as inputs to this step. - If not included, the step is assumed to be an initial step. Values must be unique. + Set of unique PipelineSteps whose outputs will be treated as inputs to this + step. If not included, the step is assumed to be an initial step. Attributes ---------- name : str Name of the step within the scope of the pipeline. - model_version : :class:`~verta.registry.entities.RegisteredModelVersion` - Model version being run by this step. - predecessors : list - List of PipelineSteps whose outputs will be treated as inputs to this step. + registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + Registered model version run by this step. + predecessors : set + Set of PipelineSteps whose outputs will be treated as inputs to this step. """ def __init__( self, name: str, - model_version: RegisteredModelVersion, + registered_model_version: RegisteredModelVersion, predecessors: Optional[ Set["PipelineStep"] ] = None, # Optional because it could be the first step with no predecessors ): self._name = self.set_name(name) - self._model_version = self.set_model_version(model_version) + self._registered_model_version = self.set_registered_model_version( + registered_model_version + ) self._predecessors = ( self._validate_predecessors(predecessors) if predecessors else set() ) - self._registered_model_id = self._model_version.registered_model_id + self._registered_model_id = self._registered_model_version.registered_model_id self._registered_model: RegisteredModel = self._get_registered_model( - conn=model_version._conn, conf=model_version._conf + conn=registered_model_version._conn, conf=registered_model_version._conf ) def __repr__(self) -> str: @@ -55,37 +57,51 @@ def __repr__(self) -> str: f"step name: {self.name}", f"registered_model: {self._registered_model.name}", f"registered_model_id: {self._registered_model_id}", - f"registered_model_version: {self.model_version.name}", - f"registered_model_version_id: {self.model_version.id}", + f"registered_model_version: {self.registered_model_version.name}", + f"registered_model_version_id: {self.registered_model_version.id}", f"predecessors: {[s.name for s in self.predecessors]}", ) ) @property - def model_version(self) -> RegisteredModelVersion: - return self._model_version + def registered_model_version(self) -> RegisteredModelVersion: + return self._registered_model_version - @model_version.setter - def model_version(self, value) -> None: + @registered_model_version.setter + def registered_model_version(self, value) -> None: """Raise a more informative error than the default.""" raise AttributeError( - "can't set attribute 'model_version'; please use set_model_version()" + "can't set attribute 'registered_model_version'; please use set_registered_model_version()" ) - def set_model_version(self, new_model_version: RegisteredModelVersion) -> None: - """Change the registered model version associated with this step. + def set_registered_model_version( + self, registered_model_version: RegisteredModelVersion + ) -> RegisteredModelVersion: + """Set the registered model version associated with this step. Parameters ---------- - model_version : :class:`~verta.registry.entities.RegisteredModelVersion` + registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` Registered model version to use for the step. + + Returns + ------- + RegisteredModelVersion + The new registered model version now set for this step. + + Raises + ------ + TypeError + If the provided value is not type RegisteredModelVersion + """ - if not isinstance(new_model_version, RegisteredModelVersion): + if not isinstance(registered_model_version, RegisteredModelVersion): raise TypeError( - f"model_version must be a RegisteredModelVersion object, not {type(new_model_version)}" + f"registered_model_version must be a RegisteredModelVersion object, not {type(registered_model_version)}" ) - self._model_version = new_model_version - return self.model_version + # TODO: Figure out if self._model_version_id gets updated here by default + self._registered_model_version = registered_model_version + return self.registered_model_version @property def name(self) -> str: @@ -102,7 +118,18 @@ def set_name(self, name: str) -> str: Parameters ---------- name : str - New name to use for the step. + New name to use for the step + + Returns + ------- + name: str + The string value of the name now set for this step. + + Raises + ------ + TypeError + If the provided value is not type str. + """ if not isinstance(name, str): raise TypeError(f"name must be a string, not {type(name)}") @@ -186,7 +213,7 @@ def _steps_from_pipeline_definition( steps.add( cls( name=step["name"], - model_version=RegisteredModelVersion._get_by_id( + registered_model_version=RegisteredModelVersion._get_by_id( id=step["model_version_id"], conn=conn, conf=conf ), predecessors=set(), @@ -209,7 +236,7 @@ def _to_step_spec(self) -> Dict[str, Any]: """ return { "name": self.name, - "model_version_id": self.model_version.id, + "model_version_id": self.registered_model_version.id, } def _to_graph_spec(self) -> Dict[str, Any]: From 05eec319c37f97ad915f93fb3f2f53be337dfa27 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 19:51:59 +0200 Subject: [PATCH 051/117] refactor: update all related params when changing RMV for a step --- .../unit_tests/pipeline/test_pipeline_step.py | 66 ++++++++++++++----- client/verta/verta/pipeline/_pipeline_step.py | 7 +- 2 files changed, 55 insertions(+), 18 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index c78d95e002..a144e9330f 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -195,21 +195,55 @@ def test_set_predecessors_remove( assert step.predecessors == set(steps_to_remain) +@given( + rm_1_name=st.text(min_size=1), + rm_2_name=st.text(min_size=1), +) +@settings( + suppress_health_check=[HealthCheck.function_scoped_fixture], + deadline=None, +) def test_change_model_version( - make_mock_registered_model_version, make_mock_registered_model + rm_1_name, + rm_2_name, + make_mock_registered_model_version, + make_mock_registered_model, + mocked_responses, ) -> None: - """Test that a PipelineStep object can have its model version changed""" - model_ver_1 = make_mock_registered_model_version() - model_ver_2 = make_mock_registered_model_version() - mocked_rm = make_mock_registered_model(id=123, name="test_rmv") - with patch.object( - verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm - ): - step = PipelineStep( - registered_model_version=model_ver_1, - name="test_name", - predecessors=set(), - ) - assert step.registered_model_version == model_ver_1 - step.set_registered_model_version(model_ver_2) - assert step.registered_model_version == model_ver_2 + """Test that a PipelineStep object can have its model version changed + + Each time a RMV is set for a PipelineStep, the RM for it is fetched, + so a call is mocked for th initial step creation and the change. + """ + rmv_1 = make_mock_registered_model_version() + rmv_2 = make_mock_registered_model_version() + mocked_responses.get( + f"https://test_socket/api/v1/registry/registered_models/{rmv_1.registered_model_id}", + json={ + "registered_model": { + "id": rmv_1.registered_model_id, + "name": rm_1_name, + } + }, + status=200, + ) + mocked_responses.get( + f"https://test_socket/api/v1/registry/registered_models/{rmv_2.registered_model_id}", + json={ + "registered_model": { + "id": rmv_2.registered_model_id, + "name": rm_2_name, + } + }, + status=200, + ) + step = PipelineStep( + registered_model_version=rmv_1, + name="test_name", + predecessors=set(), + ) + assert step.registered_model_version == rmv_1 + assert step._registered_model_id == rmv_1.registered_model_id + step.set_registered_model_version(rmv_2) + assert step.registered_model_version == rmv_2 + assert step._registered_model_id == rmv_2.registered_model_id diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index a46f176c8b..da73e27fbf 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -99,8 +99,11 @@ def set_registered_model_version( raise TypeError( f"registered_model_version must be a RegisteredModelVersion object, not {type(registered_model_version)}" ) - # TODO: Figure out if self._model_version_id gets updated here by default self._registered_model_version = registered_model_version + self._registered_model_id = registered_model_version.registered_model_id + self._registered_model = self._get_registered_model( + conn=registered_model_version._conn, conf=registered_model_version._conf + ) return self.registered_model_version @property @@ -172,7 +175,7 @@ def _validate_predecessors( ) return predecessors - def _get_registered_model(self, conn: Connection, conf: Configuration) -> None: + def _get_registered_model(self, conn: Connection, conf: Configuration) -> RegisteredModel: """Fetch the registered model associated with this step's model version. This is to provide important context to the user via the _repr_ method From 2321822c598cf74dd77334dae681af21e36cd321 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 20:07:23 +0200 Subject: [PATCH 052/117] refactor: use conn params from RMV instead of passing in --- client/verta/verta/pipeline/_pipeline_step.py | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index da73e27fbf..77237ea260 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -46,9 +46,7 @@ def __init__( self._validate_predecessors(predecessors) if predecessors else set() ) self._registered_model_id = self._registered_model_version.registered_model_id - self._registered_model: RegisteredModel = self._get_registered_model( - conn=registered_model_version._conn, conf=registered_model_version._conf - ) + self._registered_model: RegisteredModel = self._get_registered_model() def __repr__(self) -> str: return "\n ".join( @@ -101,9 +99,7 @@ def set_registered_model_version( ) self._registered_model_version = registered_model_version self._registered_model_id = registered_model_version.registered_model_id - self._registered_model = self._get_registered_model( - conn=registered_model_version._conn, conf=registered_model_version._conf - ) + self._registered_model = self._get_registered_model() return self.registered_model_version @property @@ -131,7 +127,7 @@ def set_name(self, name: str) -> str: Raises ------ TypeError - If the provided value is not type str. + If the provided value for ``name`` is not type str. """ if not isinstance(name, str): @@ -150,13 +146,23 @@ def predecessors(self, value) -> None: "can't set attribute 'predecessors'; please use set_predecessors()" ) - def set_predecessors(self, steps: Set["PipelineStep"]) -> set: + def set_predecessors(self, steps: Set["PipelineStep"]) -> Set["PipelineStep"]: """Set the predecessors associated with this step. Parameters ---------- steps : list List of PipelineStep objects whose outputs will be treated as inputs to this step. + + Returns + ------- + set of PipelineStep + The new set of predecessors now set for this step. + + Raises + ------ + TypeError + If the provided value for ``steps`` is not a set of PipelineStep objects. """ self._predecessors = self._validate_predecessors(steps) return self.predecessors @@ -164,7 +170,14 @@ def set_predecessors(self, steps: Set["PipelineStep"]) -> set: def _validate_predecessors( self, predecessors: Set["PipelineStep"] ) -> set["PipelineStep"]: - """Validate that the provided predecessors are a set of PipelineStep objects.""" + """Validate that the provided predecessors are a set of PipelineStep objects. + + Parameters + ---------- + predecessors : set + Set of PipelineStep objects whose outputs will be treated as inputs to + this step. + """ if not isinstance(predecessors, set): raise TypeError(f"steps must be type set, not {type(predecessors)}") for step in predecessors: @@ -175,14 +188,20 @@ def _validate_predecessors( ) return predecessors - def _get_registered_model(self, conn: Connection, conf: Configuration) -> RegisteredModel: + def _get_registered_model(self) -> RegisteredModel: """Fetch the registered model associated with this step's model version. This is to provide important context to the user via the _repr_ method when a registered pipeline is fetched from the backend. + + Returns + ------- + :class:`~verta.registry.entities.RegisteredModel` """ rm = RegisteredModel._get_by_id( - id=self._registered_model_id, conn=conn, conf=conf + id=self._registered_model_id, + conn=self.registered_model_version._conn, + conf=self.registered_model_version._conf, ) self._registered_model = rm return rm From 0e9b6eb04df878aafb25a4c56f9d8340fef1ba97 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 20:16:31 +0200 Subject: [PATCH 053/117] fix: final formatting and accuracy changes for _pipeline_step.py --- client/verta/verta/pipeline/_pipeline_step.py | 43 +++++++++++-------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 77237ea260..29b3f27b3b 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -90,7 +90,7 @@ def set_registered_model_version( Raises ------ TypeError - If the provided value is not type RegisteredModelVersion + If the provided value is not type RegisteredModelVersion. """ if not isinstance(registered_model_version, RegisteredModelVersion): @@ -117,7 +117,7 @@ def set_name(self, name: str) -> str: Parameters ---------- name : str - New name to use for the step + New name to use for the step. Returns ------- @@ -191,8 +191,8 @@ def _validate_predecessors( def _get_registered_model(self) -> RegisteredModel: """Fetch the registered model associated with this step's model version. - This is to provide important context to the user via the _repr_ method - when a registered pipeline is fetched from the backend. + This is to provide important context to the user when a registered + pipeline is fetched from the backend. Returns ------- @@ -219,16 +219,17 @@ def _steps_from_pipeline_definition( Parameters ---------- pipeline_definition : dict - Specification dictionary for the whole pipeline + Specification dictionary for the whole pipeline. conn : :class:`~verta._internal_utils._utils.Connection` - Connection object for fetching the model version associated with the step + Connection object for fetching the model version associated with each step. conf: :class:`~verta._internal_utils._utils.Configuration` - Configuration object for fetching the model version associated with the step + Configuration object for fetching the model version associated with each + step. Returns ------- - list of :class:`~verta._pipelines.PipelineStep` - List of steps in the pipeline spec as PipelineStep objects + set of :class:`~verta._pipelines.PipelineStep` + Set of steps in the pipeline spec as PipelineStep objects. """ steps: Set["PipelineStep"] = set() for step in pipeline_definition["steps"]: @@ -252,20 +253,28 @@ def _steps_from_pipeline_definition( ) return steps - def _to_step_spec(self) -> Dict[str, Any]: - """Return a dictionary representation of this step, formatted for a - pipeline definition. + def _to_graph_spec(self) -> Dict[str, Any]: + """Return a dictionary representation of predecessors for this step, + formatted for a pipeline definition. + + The back-end expects a list of steps and their predecessors as part of the + `graph` object within a PipelineDefinition. This method converts this individual + PipelineStep to a formatted dict for that purpose. """ return { "name": self.name, - "model_version_id": self.registered_model_version.id, + "predecessors": [s.name for s in self.predecessors], } - def _to_graph_spec(self) -> Dict[str, Any]: - """Return a dictionary representation of predecessors for this step, - formatted for a pipeline definition. + def _to_step_spec(self) -> Dict[str, Any]: + """Return a dictionary representation of this step, formatted for a + pipeline definition. + + The back-end expects a list of steps and their model version as part of the + `steps` object within a PipelineDefinition. This method converts this individual + PipelineStep to a formatted dict for that purpose. """ return { "name": self.name, - "predecessors": [s.name for s in self.predecessors], + "model_version_id": self.registered_model_version.id, } From c73100110964d910b65410d2c0bcb98610ebf0a1 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 20:49:12 +0200 Subject: [PATCH 054/117] fix: final formatting and accuracy changes for _pipeline_graph.py --- .../verta/verta/pipeline/_pipeline_graph.py | 55 +++++++++++++------ client/verta/verta/pipeline/_pipeline_step.py | 3 +- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index ad6c5f69bd..760be2b28e 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -7,41 +7,40 @@ class PipelineGraph: - """A collection of PipelineSteps to be run as a single inference pipeline. + """Object representing a collection of PipelineSteps to be run as a single + inference pipeline. Parameters ---------- steps : set of :class:`~verta.pipeline.PipelineStep` - List of all possible steps of the pipeline. Ordering of steps in the pipeline - itself is determined by the predecessors provided to each step, thus ordering - of this list is irrelevant. + Set of all possible steps of the pipeline. Ordering of steps in the pipeline + itself is determined by the predecessors provided to each step. Attributes ---------- steps: set of :class:`~verta.deployment.PipelineStep` - Set of PipelineSteps comprising all possible steps in the PiplineGraph. + Set of PipelineSteps comprising all possible steps in this PiplineGraph. """ def __init__(self, steps: Set[PipelineStep]): self._steps = self._validate_steps(steps) self._predecessors = [s.predecessors for s in self._steps] - # throws an error if any step's predecessors attr has been inappropriately mutated + # throws an error if any step's predecessors attr has been inappropriately mutated. - def __repr__(self): + def __repr__(self) -> str: return f"\nPipelineGraph steps:\n{self._format_steps()}" - def _format_steps(self): + def _format_steps(self) -> str: """Format steps for improved readability in __repr__() function.""" return "\n".join([repr(s) for s in self._steps]) @property - def steps(self): + def steps(self) -> Set[PipelineStep]: return self._validate_steps(self._steps) - @steps.setter def steps(self, value): - raise AttributeError("cannot set attribute 'steps'; please use set_steps()") + raise AttributeError("can't set attribute 'steps'; please use set_steps()") def set_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: """Update the set of steps for this PipelineGraph to the provided value. @@ -49,20 +48,36 @@ def set_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: Parameters ---------- steps : set of :class:`~verta.deployment.PipelineStep` - Set of all possible steps of the pipline graph. Order does not matter. + Set of all possible steps of the pipline graph. """ self._steps = self._validate_steps(steps) return self.steps def _validate_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: - """Validate that the provided steps are a set of PipelineStep objects.""" + """Validate that the provided steps are a set of PipelineStep objects. + + Parameters + ---------- + steps : set of :class:`~verta.deployment.PipelineStep` + Set of steps provided by a user. + + Returns + ------- + set of :class:`~verta.deployment.PipelineStep` + The same set of steps if validation is successful. + + Raises + ------ + TypeError + If steps is not a set of PipelineStep objects. + """ if not isinstance(steps, set): raise TypeError(f"steps must be type set, not {type(steps)}") for step in steps: if not isinstance(step, PipelineStep): raise TypeError( f"individual steps of a PipelineGraph must be type" - f" PipelineStep, not {type(step)} for step '{step}'" + f" PipelineStep, not {type(step)}" ) return steps @@ -72,10 +87,14 @@ def _from_definition( ) -> "PipelineGraph": """Create a PipelineGraph instance from a specification dict. + This is used to return a PipelineGraph object when fetching an existing registered + pipeline from the backend in the form of a dict extracted from a `pipeline.json` + artifact. + Parameters ---------- - pipeline_spec : dict - Specification dict from which to create the Pipeline. + pipeline_definition : dict + Pipeline definition dict from which to create the Pipeline. conn : :class:`~verta._internal_utils._utils.Connection` Connection object for fetching the model version associated with the step conf: :class:`~verta._internal_utils._utils.Configuration` @@ -93,7 +112,7 @@ def _to_graph_definition(self) -> List[Dict[str, Any]]: The back-end expects a list of steps and their predecessors as part of the `graph` object within a PipelineDefinition. This method converts this PipelineGraph to a formatted list of steps with predecessors for that purpose. A list is used - to remain json serializable. + to remain json serializable, as this will be converted and uploaded as an artifact. """ return [step._to_graph_spec() for step in self.steps] @@ -103,6 +122,6 @@ def _to_steps_definition(self) -> List[Dict[str, Any]]: The back-end expects a list of steps and their model versions as part of the `steps` object within a PipelineDefinition. This method converts this PipelineGraph to a formatted list of steps with model versions for that purpose. A list is used - to remain json serializable. + to remain json serializable, as this will be converted and uploaded as an artifact. """ return [step._to_step_spec() for step in self.steps] diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 29b3f27b3b..7ecf00adca 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -7,8 +7,7 @@ class PipelineStep: - """A single step within an inference pipeline, representing a single model - version to be run. + """Object representing a single step to be run within an inference pipeline. Parameters ---------- From 130e18f69fda52be2321d16d3a38551e73037b25 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 21:36:30 +0200 Subject: [PATCH 055/117] fix: final formatting and accuracy changes for _registered_pipeline.py --- .../verta/verta/pipeline/_pipeline_graph.py | 4 +- .../verta/pipeline/_registered_pipeline.py | 57 +++++++++++++------ 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 760be2b28e..1e414ba72c 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -25,10 +25,10 @@ class PipelineGraph: def __init__(self, steps: Set[PipelineStep]): self._steps = self._validate_steps(steps) self._predecessors = [s.predecessors for s in self._steps] - # throws an error if any step's predecessors attr has been inappropriately mutated. + # throws an exception if any step's predecessors attr has been inappropriately mutated. def __repr__(self) -> str: - return f"\nPipelineGraph steps:\n{self._format_steps()}" + return f"PipelineGraph steps:\n{self._format_steps()}" def _format_steps(self) -> str: """Format steps for improved readability in __repr__() function.""" diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 91cc230860..37fa4207e1 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -1,12 +1,10 @@ # -*- coding: utf-8 -*- -import json import copy -from typing import Any, Dict, Optional - +import json import tempfile +from typing import Any, Dict, Optional -from verta._internal_utils._utils import Configuration, Connection from verta.endpoint.resources import Resources from verta.pipeline import PipelineGraph from verta.registry.entities import RegisteredModelVersion @@ -16,17 +14,23 @@ class RegisteredPipeline: """Object representing a version of a registered inference pipeline. There should not be a need to instantiate this class directly; please use - :meth:`Client.create_registered_pipeline() ` + :meth:`Client.create_registered_pipeline() ` for creating a new pipeline, or - :meth:`Client.get-registered_pipeline() ` - for fetching existing pipelines. + :meth:`Client.get_registered_pipeline() ` + for fetching an existing pipeline. + + .. note:: + Registered pipelines are immutable once registered with Verta. A new version + must be created and registered with any desired changes. Use the ``copy_graph()`` + function to create a local copy of this pipeline's graph that can be modified + and used to create the new version. Attributes ---------- name: str Name of this pipeline. id: int - Auto-assigned ID of this Pipeline. + ID of this Pipeline, auto-assigned by the Verta backend. graph: :class:`~verta.pipeline.PipelineGraph` PipelineGraph object containing all possible steps in the Pipline. """ @@ -47,10 +51,10 @@ def __init__( self._id = self._registered_model_version.id self._graph = graph self._graph_steps = self._graph.steps - # throws an error if the graph's steps attr has been inappropriately mutated + # throws an exception if the graph's steps attr has been inappropriately mutated. def __repr__(self): - return "\n".join( + return "\n ".join( ( "RegisteredPipeline:", f"pipeline name: {self.name}", @@ -72,7 +76,7 @@ def graph(self): return self._graph def copy_graph(self) -> PipelineGraph: - """Return a deep copy of the PipelineGraph of this pipeline. + """Return a deep copy of the PipelineGraph object for this pipeline. RegisteredPipeline objects are immutable once registered with Verta. This function returns a PipelineGraph object that can be modified and used to @@ -93,7 +97,9 @@ def _to_pipeline_definition(self) -> Dict[str, Any]: """Create a complete pipeline definition dict from a name and PipelineGraph. Used in conjunction with the client function for creating a registered - pipeline from a pipeline graph. + pipeline from a pipeline graph. This gets converted to JSON and uploaded + as an artifact to the registered model version for the pipeline by the + _log_pipeline_definition_artifact function. """ return { "pipeline_version_id": self.id, @@ -106,8 +112,11 @@ def _to_pipeline_configuration( ) -> Dict[str, Any]: """Build a pipeline configuration dict for this pipeline. - The `env` and `build` keys are not included in the configuration - resulting in default values being used by the backend. + Used in conjunction with the client function for creating a registered + pipeline from a pipeline graph. This gets included in the update request + for an endpoint when the pipeline is deployed. The `env` and `build` keys + are not included in the configuration resulting in default values being + used by the backend. Parameters ---------- @@ -118,6 +127,14 @@ def _to_pipeline_configuration( ------- dict Representation of a pipeline configuration. + + Raises + ------ + TypeError + If pipeline_resources is not a dict of str to Resources. + ValueError + If pipeline_resources contains resources for a step name that is not + in the pipeline. """ if pipeline_resources: for res in pipeline_resources.values(): @@ -156,6 +173,9 @@ def _get_pipeline_definition_artifact( ) -> Dict[str, Any]: """Get the pipeline definition artifact from the registered model version. + This is used to fetch the pipeline definition from the pipeline RMV when an + existing registered pipeline is fetched from the backend. + Parameters ---------- registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` @@ -175,16 +195,17 @@ def _from_pipeline_definition( cls, registered_model_version: RegisteredModelVersion, ) -> "RegisteredPipeline": - """Create a Pipeline instance from a specification dict. + """Create a local RegisteredPipeline object from a pipeline's registered + model version. - Used when fetching a registered pipeline from the Verta backend. + Used when fetching a registered pipeline from the Verta backend. The + `pipeline.json` artifact is fetched from the RMV and used to build a + local RegisteredPipeline object. Parameters ---------- registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` RegisteredModelVersion object associated with this pipeline. - pipeline_definition : dict - Specification dict from which to create the Pipeline. """ pipeline_definition = cls._get_pipeline_definition_artifact( registered_model_version From b82930edb3789bb57c7b0f8858cb78159cd753c4 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 21:39:35 +0200 Subject: [PATCH 056/117] fix: final formatting and accuracy changes for strategies.py --- client/verta/tests/unit_tests/strategies.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index c4007bccda..f1c9c715cb 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -265,7 +265,9 @@ def mock_workspace(draw): @st.composite def pipeline_definition(draw): - """Generate a mocked pipeline specification dictionary""" + """Return a strategy for a mocked linear pipeline specification + dictionary with an arbitrary number of steps. + """ # step names in a pipeline must be unique step_names = draw( From a2f5ea166289aa779b33864c2a57f7afac8c66b8 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 21:45:28 +0200 Subject: [PATCH 057/117] fix: final formatting and accuracy changes for conftest.py --- client/verta/tests/unit_tests/conftest.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index d63b68077d..93bc2fdb95 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -69,9 +69,10 @@ def __repr__(self): # avoid network calls when displaying test results @pytest.fixture(scope="session") def make_mock_simple_pipeline_definition() -> Callable: - """ - Return a callable function for creating a simple mocked pipeline - definition for use in tests, including a parameter for the pipeline + """Return a callable function for creating a simple mocked pipeline + definition. + + For use in tests, including a parameter for the pipeline id to ensure consistency in tests that mock creation of a pipeline object from a pipeline definition. """ @@ -128,8 +129,7 @@ def make_mock_registered_model_version( mock_conn, mock_config, make_mock_simple_pipeline_definition ) -> Callable: """Return a callable function for creating mocked objects of the - RegisteredModelVersion class for use in tests that require multiple - unique instances. + RegisteredModelVersion class. """ class MockRegisteredModelVersion(RegisteredModelVersion): @@ -176,10 +176,10 @@ def _make_mock_registered_model_version(): @pytest.fixture(scope="session") def make_mock_pipeline_step(make_mock_registered_model_version) -> Callable: """Return a callable function for creating mocked objects of the PipelineStep - class for use in tests that require multiple unique instances. + class. - The optional `name` parameter is for use in tests where more than one unique - step is required for a singe test. + The optional `name` parameter is for use in tests where names must be + known for assertions. """ class MockPipelineStep(PipelineStep): @@ -198,9 +198,8 @@ def _make_mock_pipeline_step(name: Optional[str] = None): @pytest.fixture(scope="session") def make_mock_pipeline_graph(make_mock_pipeline_step) -> Callable: - """ - Return a callable function for creating mocked objects of the PipelineGraph - class for use in tests that require multiple unique instances. + """Return a callable function for creating mocked objects of the PipelineGraph + class. """ class MockPipelineGraph(PipelineGraph): From fb0557a2f25786a81b9f882f9c0ca3fbc6405e49 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 21:47:01 +0200 Subject: [PATCH 058/117] fix: black format --- client/verta/tests/unit_tests/strategies.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/client/verta/tests/unit_tests/strategies.py b/client/verta/tests/unit_tests/strategies.py index f1c9c715cb..6c442b1bb7 100644 --- a/client/verta/tests/unit_tests/strategies.py +++ b/client/verta/tests/unit_tests/strategies.py @@ -270,9 +270,7 @@ def pipeline_definition(draw): """ # step names in a pipeline must be unique - step_names = draw( - st.lists(st.text(min_size=1), min_size=2, unique=True) - ) + step_names = draw(st.lists(st.text(min_size=1), min_size=2, unique=True)) model_versions = draw( st.lists( # limit max value to prevent protobuf "Value out of range" error @@ -288,9 +286,7 @@ def pipeline_definition(draw): if i == 0: graph.append({"predecessors": [], "name": step_names[i]}) else: - graph.append( - {"predecessors": [step_names[i - 1]], "name": step_names[i]} - ) + graph.append({"predecessors": [step_names[i - 1]], "name": step_names[i]}) steps = list() for i in range(len(step_names)): @@ -313,7 +309,9 @@ def resources(draw): """Return a strategy emulating the Resources class.""" return Resources( cpu=draw(st.integers(min_value=1)), - memory=draw(st.from_regex(r"^[0-9]+[e]?[0-9]*[E|P|T|G|M|K]?[i]?$", fullmatch=True)), + memory=draw( + st.from_regex(r"^[0-9]+[e]?[0-9]*[E|P|T|G|M|K]?[i]?$", fullmatch=True) + ), nvidia_gpu=NvidiaGPU( model=draw(st.sampled_from([NvidiaGPUModel.T4, NvidiaGPUModel.V100])), number=draw(st.integers(min_value=1)), From fa420345ba76a2da701185bf8ce20634579d228b Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 22:09:00 +0200 Subject: [PATCH 059/117] fix: final formatting and accuracy changes for unit test files --- .../pipeline/test_pipeline_graph.py | 13 +++--- .../unit_tests/pipeline/test_pipeline_step.py | 25 +++++------ .../pipeline/test_registered_pipeline.py | 45 ++++++++++--------- 3 files changed, 39 insertions(+), 44 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 6179b35ce6..d7e34b7947 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -13,9 +13,7 @@ def test_set_steps(make_mock_pipeline_step, make_mock_registered_model) -> None: - """ - Test that the steps of a PipelineGraph can be set - """ + """Test that the steps of a PipelineGraph can be set.""" mocked_rm = make_mock_registered_model(id=123, name="test_rmv") with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm @@ -50,7 +48,7 @@ def test_from_definition( mocked_responses, ) -> None: """Test that a PipelineGraph object can be constructed from a pipeline - specification. + definition. The model version is fetched for each step, so a response is mocked for each. In depth testing of each step is handled in @@ -85,7 +83,6 @@ def test_from_definition( assert isinstance(graph, PipelineGraph) # we have the same number of steps as in the pipeline definition assert len(graph.steps) == len(pipeline_definition["steps"]) - # sort each group of steps for comparison pipeline_steps_sorted = sorted( pipeline_definition["steps"], key=lambda x: x["name"] @@ -104,8 +101,8 @@ def test_from_definition( def test_to_graph_definition( make_mock_pipeline_step, make_mock_registered_model ) -> None: - """Test that a pipeline graph specification can be constructed from a - PipelineGraph object + """Test that a pipeline `graph` specification can be constructed from a + PipelineGraph object. """ mocked_rm = make_mock_registered_model(id=123, name="test_rmv") with patch.object( @@ -137,7 +134,7 @@ def test_to_graph_definition( def test_to_steps_definition( make_mock_pipeline_step, make_mock_registered_model ) -> None: - """Test that a pipeline steps specification can be constructed from a + """Test that a pipeline `steps` specification can be constructed from a PipelineGraph object. Definitions are type list to remain json serializable. diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index a144e9330f..bce1129267 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -4,18 +4,18 @@ """ import random +from unittest.mock import patch from hypothesis import given, HealthCheck, settings, strategies as st -from unittest.mock import patch +import verta from tests.unit_tests.strategies import pipeline_definition from verta.pipeline import PipelineStep -import verta @given( pipeline_definition=pipeline_definition(), - registered_model_id=st.integers(min_value=1, max_value=1000000000), + registered_model_id=st.integers(min_value=1, max_value=2**63), # max value limit avoids protobuf "Value out of range" error model_version_name=st.text(min_size=1), model_name=st.text(min_size=1), @@ -36,9 +36,8 @@ def test_steps_from_pipeline_definition( """Test that a list of PipelineStep objects can be constructed and returned from a pipeline definition. - The registered model, model version, and environment is fetched for - each step. However, only the call to to fetch the RMV is mocked, as - the _get_registered_model function is patched to return a mock RM. + The registered model, and registered model version is fetched for + each step, so a call is mocked for each. """ graph = pipeline_definition["graph"] for step in pipeline_definition["steps"]: @@ -70,7 +69,6 @@ def test_steps_from_pipeline_definition( ) # we have the same number of steps as in the pipeline definition assert len(generated_steps) == len(pipeline_definition["steps"]) - # sort both group of steps for side-by-side comparison generated_steps_sorted = sorted(list(generated_steps), key=lambda x: x.name) definition_steps_sorted = sorted( @@ -100,7 +98,7 @@ def test_steps_from_pipeline_definition( def test_to_step_spec( make_mock_registered_model_version, make_mock_registered_model ) -> None: - """Test that a PipelineStep object can be converted to a step specification""" + """Test that a PipelineStep object can be converted to a step specification.""" mocked_rmv = make_mock_registered_model_version() mocked_rm = make_mock_registered_model( id=mocked_rmv.registered_model_id, name="test_rmv" @@ -124,7 +122,7 @@ def test_to_graph_spec( make_mock_pipeline_step, make_mock_registered_model, ) -> None: - """Test that a PipelineStep object can be converted to a step specification""" + """Test that a PipelineStep object can be converted to a graph specification.""" mocked_rmv = make_mock_registered_model_version() mocked_rm = make_mock_registered_model( id=mocked_rmv.registered_model_id, name="test_rmv" @@ -149,7 +147,7 @@ def test_set_predecessors_add( make_mock_pipeline_step, make_mock_registered_model, ) -> None: - """Test that predecessors can be added to a PipelineStep object""" + """Test that predecessors can be added to a PipelineStep object.""" mocked_rmv = make_mock_registered_model_version() mocked_rm = make_mock_registered_model( id=mocked_rmv.registered_model_id, name="test_rmv" @@ -175,7 +173,7 @@ def test_set_predecessors_remove( make_mock_pipeline_step, make_mock_registered_model, ) -> None: - """Test that predecessors can be removed from a PipelineStep object""" + """Test that predecessors can be removed from a PipelineStep object.""" mocked_rmv = make_mock_registered_model_version() mocked_rm = make_mock_registered_model( id=mocked_rmv.registered_model_id, name="test_rmv" @@ -207,13 +205,12 @@ def test_change_model_version( rm_1_name, rm_2_name, make_mock_registered_model_version, - make_mock_registered_model, mocked_responses, ) -> None: - """Test that a PipelineStep object can have its model version changed + """Test that a PipelineStep object can have its model version changed. Each time a RMV is set for a PipelineStep, the RM for it is fetched, - so a call is mocked for th initial step creation and the change. + so a call is mocked for the initial step creation and the change. """ rmv_1 = make_mock_registered_model_version() rmv_2 = make_mock_registered_model_version() diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index ac1eb5281a..97bff9ae8d 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- -"""Unit tests for the RegisteredPipeline class""" +""" +Unit tests for the RegisteredPipeline class +""" from unittest.mock import patch @@ -7,7 +9,7 @@ from hypothesis import given, HealthCheck, settings, strategies as st import verta -from tests.unit_tests.strategies import pipeline_definition, resources +from tests.unit_tests.strategies import resources from verta.pipeline import RegisteredPipeline @@ -31,9 +33,10 @@ def test_copy_graph( registered_model_version=make_mock_registered_model_version(), ) copied_graph = pipeline.copy_graph() - # convert from sets to lists and sort for comparison + # convert from sets to lists and sort for side-by-side comparison graph_steps_sorted = sorted(list(graph.steps), key=lambda x: x.name) copied_graph_steps_sorted = sorted(list(copied_graph.steps), key=lambda x: x.name) + for orig_step, copied_step in zip(graph_steps_sorted, copied_graph_steps_sorted): assert orig_step is not copied_step assert orig_step.name == copied_step.name @@ -56,8 +59,7 @@ def test_log_pipeline_definition_artifact( make_mock_pipeline_graph, make_mock_registered_model_version, ) -> None: - """ - Verify the expected sequence of calls when a pipeline definition + """Verify the expected sequence of calls when a pipeline definition is logged as an artifact to the pipeline's model version. Fetching the registered model version is patched instead of mocking a @@ -157,7 +159,7 @@ def test_to_pipeline_configuration_valid( make_mock_registered_model_version, make_mock_registered_model, ) -> None: - """Test that a valid pipeline configuration can be constructed from a + """Test that a pipeline configuration can be constructed from a RegisteredPipeline object and a valid list of pipeline resources. """ mocked_rm = make_mock_registered_model(id=123, name="test_rmv") @@ -193,9 +195,9 @@ def test_to_pipeline_configuration_invalid_resources( are provided. Invalid resources include: - - a step name not in the pipeline -> ValueError - a step name that is not a string -> TypeError - a step resource that is not a Resources object -> TypeError + - a step name not in the pipeline -> ValueError """ mocked_rm = make_mock_registered_model(id=123, name="test_rmv") with patch.object( @@ -208,14 +210,7 @@ def test_to_pipeline_configuration_invalid_resources( graph=graph, registered_model_version=make_mock_registered_model_version(), ) - # step name not in pipeline - with pytest.raises(ValueError) as err: - pipeline._to_pipeline_configuration(pipeline_resources=step_resources) - assert ( - str(err.value) - == "pipeline_resources contains resources for a step not in the " - "pipeline: 'invalid_step_name'" - ) + step_resources.pop("invalid_step_name") # step name not a string step_resources.update({123: resources}) @@ -234,6 +229,14 @@ def test_to_pipeline_configuration_invalid_resources( str(err3.value) == "pipeline_resources values must be type Resources, not " ) + # step name not in pipeline + with pytest.raises(ValueError) as err: + pipeline._to_pipeline_configuration(pipeline_resources=step_resources) + assert ( + str(err.value) + == "pipeline_resources contains resources for a step not in the " + "pipeline: 'invalid_step_name'" + ) def test_to_pipeline_configuration_no_resources( @@ -242,7 +245,7 @@ def test_to_pipeline_configuration_no_resources( make_mock_registered_model, ) -> None: """Test that a pipeline configuration can be constructed from a - RegisteredPipeline object without providing pipeline resources. + RegisteredPipeline object without providing any pipeline resources. """ mocked_rm = make_mock_registered_model(id=123, name="test_rmv") with patch.object( @@ -258,22 +261,20 @@ def test_to_pipeline_configuration_no_resources( for graph_step, config_step in zip(graph.steps, pipeline_configuration["steps"]): # All steps are included in the configuration assert graph_step.name == config_step["name"] - # No resources are found in the configuration + # No resources are found in the resulting configuration assert "resources" not in config_step.keys() def test_from_pipeline_definition( make_mock_registered_model_version, - mock_conn, - mock_config, mocked_responses, ) -> None: """Test that a RegisteredPipeline object can be constructed from a pipeline definition. - The model version's `_get_artifact` function is mocked to - return a simple, consistent pipeline definition. Calls related to the - fetching of the artifact are mocked. + The model version's `_get_artifact` function is overidden in the + mocked RMV fixture to return a simple, consistent pipeline definition. + Calls related to the fetching of the RMV and RM are mocked. """ mocked_responses.get( "https://test_socket/api/v1/registry/model_versions/1", From 6329110c55e47aea8c252aa6ecfa0aab299703d2 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Thu, 24 Aug 2023 23:51:28 +0200 Subject: [PATCH 060/117] docs: tweak public docs --- client/verta/verta/pipeline/_pipeline_graph.py | 10 ++++++++++ client/verta/verta/pipeline/_pipeline_step.py | 4 ++-- client/verta/verta/pipeline/_registered_pipeline.py | 10 +++++++--- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 1e414ba72c..9b3fdd3637 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -49,6 +49,16 @@ def set_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: ---------- steps : set of :class:`~verta.deployment.PipelineStep` Set of all possible steps of the pipline graph. + + Returns + ------- + set of :class:`~verta.deployment.PipelineStep` + The steps now set for this graph, if validation is successful. + + Raises + ------ + TypeError + If ``steps`` is not a set of PipelineStep objects. """ self._steps = self._validate_steps(steps) return self.steps diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 7ecf00adca..804b8d6610 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -120,8 +120,8 @@ def set_name(self, name: str) -> str: Returns ------- - name: str - The string value of the name now set for this step. + str + The new name now set for this step. Raises ------ diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 37fa4207e1..8c629f02fc 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -76,11 +76,15 @@ def graph(self): return self._graph def copy_graph(self) -> PipelineGraph: - """Return a deep copy of the PipelineGraph object for this pipeline. + """Return a copy of the PipelineGraph object for this pipeline. RegisteredPipeline objects are immutable once registered with Verta. This - function returns a PipelineGraph object that can be modified and used to - create and register a new RegisteredPipeline. + copy can be modified and used to create and register a new RegisteredPipeline. + + Returns + ------- + :class:`~verta.pipeline.PipelineGraph` + A deep copy of the PipelineGraph object for this pipeline. """ return copy.deepcopy(self._graph) From b42c8afce9beb51ddf631c7139be6bc3bf663c14 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Fri, 25 Aug 2023 00:01:25 +0200 Subject: [PATCH 061/117] test: fix order of test operations --- .../pipeline/test_registered_pipeline.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 97bff9ae8d..38912f9f15 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -195,9 +195,9 @@ def test_to_pipeline_configuration_invalid_resources( are provided. Invalid resources include: + - a step name not in the pipeline -> ValueError - a step name that is not a string -> TypeError - a step resource that is not a Resources object -> TypeError - - a step name not in the pipeline -> ValueError """ mocked_rm = make_mock_registered_model(id=123, name="test_rmv") with patch.object( @@ -210,7 +210,14 @@ def test_to_pipeline_configuration_invalid_resources( graph=graph, registered_model_version=make_mock_registered_model_version(), ) - + # step name not in pipeline + with pytest.raises(ValueError) as err: + pipeline._to_pipeline_configuration(pipeline_resources=step_resources) + assert ( + str(err.value) + == "pipeline_resources contains resources for a step not in the " + "pipeline: 'invalid_step_name'" + ) step_resources.pop("invalid_step_name") # step name not a string step_resources.update({123: resources}) @@ -229,14 +236,7 @@ def test_to_pipeline_configuration_invalid_resources( str(err3.value) == "pipeline_resources values must be type Resources, not " ) - # step name not in pipeline - with pytest.raises(ValueError) as err: - pipeline._to_pipeline_configuration(pipeline_resources=step_resources) - assert ( - str(err.value) - == "pipeline_resources contains resources for a step not in the " - "pipeline: 'invalid_step_name'" - ) + def test_to_pipeline_configuration_no_resources( From 916755cbd3dda2935e7f3f1b67a838f8d5cc7bfc Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Fri, 25 Aug 2023 20:31:59 +0200 Subject: [PATCH 062/117] fix: annotation error cuaght by pylint --- client/verta/verta/pipeline/_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 804b8d6610..d63aaee8b2 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -168,7 +168,7 @@ def set_predecessors(self, steps: Set["PipelineStep"]) -> Set["PipelineStep"]: def _validate_predecessors( self, predecessors: Set["PipelineStep"] - ) -> set["PipelineStep"]: + ) -> Set["PipelineStep"]: """Validate that the provided predecessors are a set of PipelineStep objects. Parameters From 3109eeb38449a34271b4a996477b041ca7416adf Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 20:52:25 +0200 Subject: [PATCH 063/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index d7e34b7947..0deafd0285 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -29,8 +29,8 @@ def test_set_steps(make_mock_pipeline_step, make_mock_registered_model) -> None: @given( pipeline_definition=pipeline_definition(), - registered_model_id=st.integers(min_value=1, max_value=2**63), # max value limit avoids protobuf "Value out of range" error + registered_model_id=st.integers(min_value=1, max_value=2**63), model_version_name=st.text(min_size=1), model_name=st.text(min_size=1), ) From ab7590bb1cbe9862347975377c433ddc6aaf0964 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 20:52:43 +0200 Subject: [PATCH 064/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 0deafd0285..8eccfab893 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -52,7 +52,7 @@ def test_from_definition( The model version is fetched for each step, so a response is mocked for each. In depth testing of each step is handled in - test_pipeline_step.test_steps_from_pipeline_spec. + test_pipeline_step.test_steps_from_pipeline_definition. """ for step in pipeline_definition["steps"]: mocked_responses.get( From c0ac34eb4a0b5a3291cf6396421b8138433efee3 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 20:53:27 +0200 Subject: [PATCH 065/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 8eccfab893..c65d8ce2a8 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -87,7 +87,7 @@ def test_from_definition( pipeline_steps_sorted = sorted( pipeline_definition["steps"], key=lambda x: x["name"] ) - graph_steps_sorted = sorted(list(graph.steps), key=lambda x: x.name) + graph_steps_sorted = sorted(graph.steps, key=lambda x: x.name) for graph_step, pipeline_step in zip(graph_steps_sorted, pipeline_steps_sorted): assert graph_step.name == pipeline_step["name"] From 72388cf5bff6ee9b70633abfbabe3278ab0761d0 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 20:55:32 +0200 Subject: [PATCH 066/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index bce1129267..5a7a97fc07 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -87,7 +87,7 @@ def test_steps_from_pipeline_definition( # each step is converted to a PipelineStep object assert isinstance(gen_step, PipelineStep) # predecessors for each step are also converted to PipelineStep objects - for i in gen_step.predecessors: + for predecessor in gen_step.predecessors: assert isinstance(i, PipelineStep) # the predecessors for each step are all included and have the same name as in the definition assert [s.name for s in gen_step.predecessors] == [ From c21180b995289861263406628622dcafdfaf9862 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 20:56:00 +0200 Subject: [PATCH 067/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 5a7a97fc07..fed87ff85d 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -101,7 +101,7 @@ def test_to_step_spec( """Test that a PipelineStep object can be converted to a step specification.""" mocked_rmv = make_mock_registered_model_version() mocked_rm = make_mock_registered_model( - id=mocked_rmv.registered_model_id, name="test_rmv" + id=mocked_rmv.registered_model_id, name="test_rm" ) with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm From ec2cdb5c1541d81d9f87309b23bd8bfe02e39d9e Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 20:56:25 +0200 Subject: [PATCH 068/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index fed87ff85d..b1adea9b96 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -150,7 +150,7 @@ def test_set_predecessors_add( """Test that predecessors can be added to a PipelineStep object.""" mocked_rmv = make_mock_registered_model_version() mocked_rm = make_mock_registered_model( - id=mocked_rmv.registered_model_id, name="test_rmv" + id=mocked_rmv.registered_model_id, name="test_rm" ) with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm From fbb17427d7943eb97d351164f70121eeddcae53b Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 20:57:06 +0200 Subject: [PATCH 069/117] Update client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 38912f9f15..81ec895596 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -23,7 +23,7 @@ def test_copy_graph( Each step in the copied graph should be a new object, but have the same name, predecessors, and model version as the original. """ - mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + mocked_rm = make_mock_registered_model(id=123, name="test_rm") with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): From e120e0fdc825d716412684ec7c8c521122541034 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:07:34 +0200 Subject: [PATCH 070/117] Update client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 81ec895596..d75ae04690 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -132,7 +132,7 @@ def test_to_pipeline_definition( RegisteredPipeline object. In depth testing of the `_to_graph_definition` - and `to_steps_definition` functions are handled in unit tests for + and `_to_steps_definition` functions are handled in unit tests for PipelineGraph. """ mocked_rm = make_mock_registered_model(id=123, name="test_rmv") From 24780226d7c95cdf901ff2ef9c6fb3756fa84407 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:08:06 +0200 Subject: [PATCH 071/117] Update client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index d75ae04690..4eb3bbe647 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -135,7 +135,7 @@ def test_to_pipeline_definition( and `_to_steps_definition` functions are handled in unit tests for PipelineGraph. """ - mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + mocked_rm = make_mock_registered_model(id=123, name="test_rm") with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): From e7aeaa4690d0574afbde56d32c89f42969e3a2bc Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Sun, 27 Aug 2023 21:13:22 +0200 Subject: [PATCH 072/117] test: use RM instead of RMV for patched object --- .../tests/unit_tests/pipeline/test_registered_pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 4eb3bbe647..7d46d21509 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -57,6 +57,7 @@ def test_log_pipeline_definition_artifact( model_version_name, mocked_responses, make_mock_pipeline_graph, + make_mock_registered_model, make_mock_registered_model_version, ) -> None: """Verify the expected sequence of calls when a pipeline definition @@ -66,6 +67,7 @@ def test_log_pipeline_definition_artifact( response to avoid having to pass the RM's id down through multiple pytest fixtures. """ + rm = make_mock_registered_model(id=123, name="test_rm") rmv = make_mock_registered_model_version() # Fetch the registered model version mocked_responses.get( @@ -100,7 +102,7 @@ def test_log_pipeline_definition_artifact( status=200, ) with patch.object( - verta.pipeline.PipelineStep, "_get_registered_model", return_value=rmv + verta.pipeline.PipelineStep, "_get_registered_model", return_value=rm ): pipeline = RegisteredPipeline( graph=make_mock_pipeline_graph(), From b51953bb51f251c1d423c3fe253f699ba5512cb8 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:14:33 +0200 Subject: [PATCH 073/117] Update client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 7d46d21509..36d8e01cc7 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -164,7 +164,7 @@ def test_to_pipeline_configuration_valid( """Test that a pipeline configuration can be constructed from a RegisteredPipeline object and a valid list of pipeline resources. """ - mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + mocked_rm = make_mock_registered_model(id=123, name="test_rm") with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): From 330ca5def0d26583f3550c6334fff7095fe710af Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:15:35 +0200 Subject: [PATCH 074/117] Update client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 36d8e01cc7..25e87aaabe 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -249,7 +249,7 @@ def test_to_pipeline_configuration_no_resources( """Test that a pipeline configuration can be constructed from a RegisteredPipeline object without providing any pipeline resources. """ - mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + mocked_rm = make_mock_registered_model(id=123, name="test_rm") with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): From 73e42d60c428ad7d278e2179cd876067f577f7c6 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:16:27 +0200 Subject: [PATCH 075/117] Update client/verta/verta/pipeline/_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_step.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index d63aaee8b2..d50ed8283d 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -166,8 +166,9 @@ def set_predecessors(self, steps: Set["PipelineStep"]) -> Set["PipelineStep"]: self._predecessors = self._validate_predecessors(steps) return self.predecessors + @staticmethod def _validate_predecessors( - self, predecessors: Set["PipelineStep"] + predecessors: Set["PipelineStep"] ) -> Set["PipelineStep"]: """Validate that the provided predecessors are a set of PipelineStep objects. From e690e2922e8576d0bb9634d70684e06165fa6a4b Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:16:58 +0200 Subject: [PATCH 076/117] Update client/verta/verta/pipeline/_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_registered_pipeline.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 8c629f02fc..fd164065a0 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -191,8 +191,7 @@ def _get_pipeline_definition_artifact( dict Pipeline definition dictionary. """ - definition = registered_model_version.get_artifact("pipeline.json").read() - return json.loads(definition.decode("utf-8")) + return json.load(registered_model_version.get_artifact("pipeline.json")) @classmethod def _from_pipeline_definition( From 261a0749a3fe38f0db33a5a1c035d39d4859487c Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:17:31 +0200 Subject: [PATCH 077/117] Update client/verta/verta/pipeline/_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_registered_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index fd164065a0..eb6d31a2ec 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -171,9 +171,9 @@ def _to_pipeline_configuration( "steps": steps, } - @classmethod + @staticmethod def _get_pipeline_definition_artifact( - cls, registered_model_version: RegisteredModelVersion + registered_model_version: RegisteredModelVersion ) -> Dict[str, Any]: """Get the pipeline definition artifact from the registered model version. From 88c01516d9b3effdace8b2aba5948ab20a0763a4 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:19:05 +0200 Subject: [PATCH 078/117] Update client/verta/verta/pipeline/_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index eb6d31a2ec..19162e9d47 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -108,7 +108,7 @@ def _to_pipeline_definition(self) -> Dict[str, Any]: return { "pipeline_version_id": self.id, "graph": self._graph._to_graph_definition(), - "predecessors": self._graph._to_steps_definition(), + "steps": self._graph._to_steps_definition(), } def _to_pipeline_configuration( From 322670498df1a555c051f47ab9df5cb26be6a99c Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:19:42 +0200 Subject: [PATCH 079/117] Update client/verta/verta/pipeline/_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_registered_pipeline.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 19162e9d47..236d679225 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -92,9 +92,8 @@ def _log_pipeline_definition_artifact(self) -> None: """ Log the pipeline definition as an artifact of the registered model version. """ - with tempfile.NamedTemporaryFile() as temp_file: - bytes = json.dumps(self._to_pipeline_definition()).encode("utf-8") - temp_file.write(bytes) + with tempfile.NamedTemporaryFile("w+") as temp_file: + json.dump(pipeline._to_pipeline_definition(), temp_file) self._registered_model_version.log_artifact("pipeline.json", temp_file) def _to_pipeline_definition(self) -> Dict[str, Any]: From 80f3f1067638359bdc83c0999cf3b84e973091ef Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:20:01 +0200 Subject: [PATCH 080/117] Update client/verta/verta/pipeline/_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 236d679225..fd1969a072 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -86,7 +86,7 @@ def copy_graph(self) -> PipelineGraph: :class:`~verta.pipeline.PipelineGraph` A deep copy of the PipelineGraph object for this pipeline. """ - return copy.deepcopy(self._graph) + return copy.deepcopy(self.graph) def _log_pipeline_definition_artifact(self) -> None: """ From 33f5083be0a0f26133a2ff29436f786e2615a1bd Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Sun, 27 Aug 2023 21:20:21 +0200 Subject: [PATCH 081/117] Update client/verta/verta/pipeline/_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index fd1969a072..0d234a7cd8 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -59,7 +59,7 @@ def __repr__(self): "RegisteredPipeline:", f"pipeline name: {self.name}", f"pipeline id: {self.id}", - f"\n{self._graph}", + f"\n{self.graph}", ) ) From 39f5d4b47ddce754d58102482c5560b8a114c938 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Sun, 27 Aug 2023 22:33:13 +0200 Subject: [PATCH 082/117] test: fix scope of unique IDs variable in mocked RMV fixture --- client/verta/tests/unit_tests/conftest.py | 10 +++++----- .../unit_tests/pipeline/test_registered_pipeline.py | 8 ++++++++ 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 93bc2fdb95..8f82d95681 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -131,6 +131,7 @@ def make_mock_registered_model_version( """Return a callable function for creating mocked objects of the RegisteredModelVersion class. """ + unique_ids = set() class MockRegisteredModelVersion(RegisteredModelVersion): def __repr__(self): # avoid network calls when displaying test results @@ -149,16 +150,15 @@ def _make_mock_registered_model_version(): test session. """ - ids = set() model_ver_id = random.randint(1, 1000000) - while model_ver_id in ids: + while model_ver_id in unique_ids: model_ver_id = random.randint(1, 1000000) - ids.add(model_ver_id) + unique_ids.add(model_ver_id) reg_model_id = random.randint(1, 1000000) - while reg_model_id in ids: + while reg_model_id in unique_ids: reg_model_id = random.randint(1, 1000000) - ids.add(reg_model_id) + unique_ids.add(reg_model_id) return MockRegisteredModelVersion( mock_conn, diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 25e87aaabe..5c415141cb 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -300,3 +300,11 @@ def test_from_pipeline_definition( ) assert isinstance(pipeline, RegisteredPipeline) assert pipeline.id == rmv.id + + +def test_unique_ids(make_mock_registered_model_version) -> None: + rmv_1 = make_mock_registered_model_version() + rmv_2 = make_mock_registered_model_version() + assert rmv_1 is not rmv_2 + assert rmv_1.id is not rmv_2.id + assert rmv_1.registered_model_id is not rmv_2.registered_model_id From 0af5258bfe121ab9c59ffdd7b20dde7aebe74334 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Sun, 27 Aug 2023 22:56:30 +0200 Subject: [PATCH 083/117] fix: clean up after recent commits --- client/verta/tests/unit_tests/conftest.py | 10 +++++++++- .../tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- .../unit_tests/pipeline/test_registered_pipeline.py | 10 +--------- client/verta/verta/pipeline/_registered_pipeline.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/client/verta/tests/unit_tests/conftest.py b/client/verta/tests/unit_tests/conftest.py index 8f82d95681..7a95d8ea6d 100644 --- a/client/verta/tests/unit_tests/conftest.py +++ b/client/verta/tests/unit_tests/conftest.py @@ -181,15 +181,23 @@ def make_mock_pipeline_step(make_mock_registered_model_version) -> Callable: The optional `name` parameter is for use in tests where names must be known for assertions. """ + unique_names = set() class MockPipelineStep(PipelineStep): def __repr__(self): # avoid network calls when displaying test results return object.__repr__(self) + def _make_unique_name(): + name = f"step{random.randint(1, 1000000)}" + while name in unique_names: + name = f"step{random.randint(1, 1000000)}" + unique_names.add(name) + return name + def _make_mock_pipeline_step(name: Optional[str] = None): return MockPipelineStep( registered_model_version=make_mock_registered_model_version(), - name=name if name else "test_pipeline_step_name", + name=name if name else _make_unique_name(), predecessors=set(), ) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index b1adea9b96..deb56704dc 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -88,7 +88,7 @@ def test_steps_from_pipeline_definition( assert isinstance(gen_step, PipelineStep) # predecessors for each step are also converted to PipelineStep objects for predecessor in gen_step.predecessors: - assert isinstance(i, PipelineStep) + assert isinstance(predecessor, PipelineStep) # the predecessors for each step are all included and have the same name as in the definition assert [s.name for s in gen_step.predecessors] == [ s["predecessors"] for s in graph if gen_step.name == s["name"] diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 5c415141cb..7a2394ef53 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -150,7 +150,7 @@ def test_to_pipeline_definition( assert pipeline_definition == { "pipeline_version_id": pipeline.id, "graph": graph._to_graph_definition(), - "predecessors": graph._to_steps_definition(), + "steps": graph._to_steps_definition(), } @@ -300,11 +300,3 @@ def test_from_pipeline_definition( ) assert isinstance(pipeline, RegisteredPipeline) assert pipeline.id == rmv.id - - -def test_unique_ids(make_mock_registered_model_version) -> None: - rmv_1 = make_mock_registered_model_version() - rmv_2 = make_mock_registered_model_version() - assert rmv_1 is not rmv_2 - assert rmv_1.id is not rmv_2.id - assert rmv_1.registered_model_id is not rmv_2.registered_model_id diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 0d234a7cd8..617bb83aa6 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -93,7 +93,7 @@ def _log_pipeline_definition_artifact(self) -> None: Log the pipeline definition as an artifact of the registered model version. """ with tempfile.NamedTemporaryFile("w+") as temp_file: - json.dump(pipeline._to_pipeline_definition(), temp_file) + json.dump(self._to_pipeline_definition(), temp_file) self._registered_model_version.log_artifact("pipeline.json", temp_file) def _to_pipeline_definition(self) -> Dict[str, Any]: From af2fad7028cf8cae9e448c423dec81535f21cd0b Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Sun, 27 Aug 2023 23:04:21 +0200 Subject: [PATCH 084/117] test: pull scheme and socket dynamically from mock_conn test fixture --- .../verta/tests/unit_tests/pipeline/test_pipeline_graph.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index c65d8ce2a8..748a31729a 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -56,7 +56,8 @@ def test_from_definition( """ for step in pipeline_definition["steps"]: mocked_responses.get( - f"https://test_socket/api/v1/registry/model_versions/{step['model_version_id']}", + f"{mock_conn.scheme}://{mock_conn.socket}/api/v1/registry/model_versions/" + f"{step['model_version_id']}", json={ "model_version": { "id": step["model_version_id"], @@ -67,7 +68,8 @@ def test_from_definition( status=200, ) mocked_responses.get( - f"https://test_socket/api/v1/registry/registered_models/{registered_model_id}", + f"{mock_conn.scheme}://{mock_conn.socket}/api/v1/registry/registered_models/" + f"{registered_model_id}", json={ "registered_model": { "id": registered_model_id, From 9d9682e1d82868fa37d55a388e1b2db1894e215c Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Sun, 27 Aug 2023 23:07:36 +0200 Subject: [PATCH 085/117] test: fix incorrect assertion comment --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index deb56704dc..6f995a450d 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -80,9 +80,9 @@ def test_steps_from_pipeline_definition( assert gen_step.name == def_step["name"] # model version ids are the same for the steps and their definitions assert gen_step.registered_model_version.id == def_step["model_version_id"] - # registered model ids are the same for the steps and their definitions + # the registered model id for each step was fetched and added from the mocked response. assert gen_step._registered_model.id == registered_model_id - # registered model names are fetched and added + # registered model names are fetched and added from the mocked response. assert gen_step._registered_model.name == model_name # each step is converted to a PipelineStep object assert isinstance(gen_step, PipelineStep) From 25ab7537d16dafbffc4bc1d9fa0ad87aaf0c115e Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 00:04:13 +0200 Subject: [PATCH 086/117] test: expand test coverage of user provided resources --- .../pipeline/test_registered_pipeline.py | 49 +++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 7a2394ef53..4b63e077e7 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -155,14 +155,15 @@ def test_to_pipeline_definition( @given(resources=resources()) -def test_to_pipeline_configuration_valid( +def test_to_pipeline_configuration_valid_complete( resources, make_mock_pipeline_graph, make_mock_registered_model_version, make_mock_registered_model, ) -> None: """Test that a pipeline configuration can be constructed from a - RegisteredPipeline object and a valid list of pipeline resources. + RegisteredPipeline object and a valid list of pipeline resources, + where resources are provided for every step. """ mocked_rm = make_mock_registered_model(id=123, name="test_rm") with patch.object( @@ -179,13 +180,55 @@ def test_to_pipeline_configuration_valid( pipeline_resources=step_resources ) assert pipeline_configuration["pipeline_version_id"] == pipeline.id + assert len(graph.steps) == len(pipeline_configuration["steps"]) for graph_step, config_step in zip(graph.steps, pipeline_configuration["steps"]): - # All steps are included in the configuration + # All steps provided are included in the configuration. assert graph_step.name == config_step["name"] # All steps in the config have resources assert "resources" in config_step.keys() +@given(resources=resources()) +def test_to_pipeline_configuration_valid_incomplete( + resources, + make_mock_pipeline_graph, + make_mock_registered_model_version, + make_mock_registered_model, +) -> None: + """Test that a pipeline configuration can be constructed from a + RegisteredPipeline object and a valid list of pipeline resources, + where resources are not provided for every step. + """ + mocked_rm = make_mock_registered_model(id=123, name="test_rm") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + graph = make_mock_pipeline_graph() + partial_steps = list(graph.steps)[:-1] + excluded_step = list(graph.steps)[-1] + step_resources = {step.name: resources for step in partial_steps} + pipeline = RegisteredPipeline( + graph=graph, + registered_model_version=make_mock_registered_model_version(), + ) + + pipeline_configuration = pipeline._to_pipeline_configuration( + pipeline_resources=step_resources + ) + assert pipeline_configuration["pipeline_version_id"] == pipeline.id + # All steps have been included in the configuration + assert len(graph.steps) == len( pipeline_configuration["steps"]) + # Compare the steps that have resources, allowing zip to drop the excluded step. + for graph_step, config_step in zip(partial_steps, pipeline_configuration["steps"]): + # All steps provided are included in the configuration. + assert graph_step.name == config_step["name"] + # All steps for which resource were provided have resources in the config. + assert "resources" in config_step.keys() + # The step for which no resources were provided is in the config without resources. + assert excluded_step.name == pipeline_configuration["steps"][-1]["name"] + assert "resources" not in pipeline_configuration["steps"][-1].keys() + + @given(resources=resources()) def test_to_pipeline_configuration_invalid_resources( resources, From ed25f1bfe1fa34ab88a69a56b0e9ce18535792d7 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 00:07:06 +0200 Subject: [PATCH 087/117] test: move assembly of test component to more logical location in function --- .../verta/tests/unit_tests/pipeline/test_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 4b63e077e7..724a909418 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -250,12 +250,12 @@ def test_to_pipeline_configuration_invalid_resources( ): graph = make_mock_pipeline_graph() step_resources = {step.name: resources for step in graph.steps} - step_resources["invalid_step_name"] = resources pipeline = RegisteredPipeline( graph=graph, registered_model_version=make_mock_registered_model_version(), ) # step name not in pipeline + step_resources["invalid_step_name"] = resources with pytest.raises(ValueError) as err: pipeline._to_pipeline_configuration(pipeline_resources=step_resources) assert ( From f725f3e8419aa660247c66ff120484fdfcf1c5d0 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 00:43:49 +0200 Subject: [PATCH 088/117] refactor: drop validation from attribute getters, and add tests for mutation of predecessors and steps --- .../pipeline/test_pipeline_graph.py | 30 ++++++++- .../pipeline/test_registered_pipeline.py | 61 +++++++++++++------ .../verta/verta/pipeline/_pipeline_graph.py | 7 ++- client/verta/verta/pipeline/_pipeline_step.py | 4 +- .../verta/pipeline/_registered_pipeline.py | 2 +- 5 files changed, 77 insertions(+), 27 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 748a31729a..dc69876b7d 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -5,11 +5,12 @@ from unittest.mock import patch +import pytest from hypothesis import given, HealthCheck, settings, strategies as st import verta from tests.unit_tests.strategies import pipeline_definition -from verta.pipeline import PipelineGraph +from verta.pipeline import PipelineGraph, PipelineStep def test_set_steps(make_mock_pipeline_step, make_mock_registered_model) -> None: @@ -162,3 +163,30 @@ def test_to_steps_definition( assert sorted(step_specs, key=lambda x: x["name"]) == sorted( expected_definition, key=lambda x: x["name"] ) + + +def test_bad_mutation_of_step_predecessors_exception( + make_mock_registered_model_version, make_mock_registered_model, make_mock_pipeline_step +): + """Test that we throw the correct exception when a user tries to mutate + the predecessors of a step in an inappropriate way. + """ + mocked_rmv = make_mock_registered_model_version() + mocked_rm = make_mock_registered_model( + id=mocked_rmv.registered_model_id, name="test_rmv" + ) + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + step = PipelineStep( + registered_model_version=mocked_rmv, + name="test_name", + predecessors=set(), + ) + step.predecessors.add("not_a_step") + with pytest.raises(TypeError) as err: + PipelineGraph(steps={step}) + assert ( + str(err.value) == f"individual predecessors of a PipelineStep must be type" + f" PipelineStep, not ." + ) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 724a909418..08e6c3ef32 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -190,10 +190,10 @@ def test_to_pipeline_configuration_valid_complete( @given(resources=resources()) def test_to_pipeline_configuration_valid_incomplete( - resources, - make_mock_pipeline_graph, - make_mock_registered_model_version, - make_mock_registered_model, + resources, + make_mock_pipeline_graph, + make_mock_registered_model_version, + make_mock_registered_model, ) -> None: """Test that a pipeline configuration can be constructed from a RegisteredPipeline object and a valid list of pipeline resources, @@ -201,7 +201,7 @@ def test_to_pipeline_configuration_valid_incomplete( """ mocked_rm = make_mock_registered_model(id=123, name="test_rm") with patch.object( - verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): graph = make_mock_pipeline_graph() partial_steps = list(graph.steps)[:-1] @@ -217,7 +217,7 @@ def test_to_pipeline_configuration_valid_incomplete( ) assert pipeline_configuration["pipeline_version_id"] == pipeline.id # All steps have been included in the configuration - assert len(graph.steps) == len( pipeline_configuration["steps"]) + assert len(graph.steps) == len(pipeline_configuration["steps"]) # Compare the steps that have resources, allowing zip to drop the excluded step. for graph_step, config_step in zip(partial_steps, pipeline_configuration["steps"]): # All steps provided are included in the configuration. @@ -258,30 +258,27 @@ def test_to_pipeline_configuration_invalid_resources( step_resources["invalid_step_name"] = resources with pytest.raises(ValueError) as err: pipeline._to_pipeline_configuration(pipeline_resources=step_resources) - assert ( - str(err.value) - == "pipeline_resources contains resources for a step not in the " - "pipeline: 'invalid_step_name'" - ) + assert ( + str(err.value) == "pipeline_resources contains resources for a step not in the " + "pipeline: 'invalid_step_name'" + ) step_resources.pop("invalid_step_name") # step name not a string step_resources.update({123: resources}) with pytest.raises(TypeError) as err2: pipeline._to_pipeline_configuration(pipeline_resources=step_resources) - assert ( - str(err2.value) - == "pipeline_resources keys must be type str, not " - ) + assert ( + str(err2.value) == "pipeline_resources keys must be type str, not " + ) step_resources.pop(123) # step resource not a Resources object step_resources.update({"step_1": "not_resources"}) with pytest.raises(TypeError) as err3: pipeline._to_pipeline_configuration(pipeline_resources=step_resources) - assert ( - str(err3.value) - == "pipeline_resources values must be type Resources, not " - ) - + assert ( + str(err3.value) + == "pipeline_resources values must be type Resources, not " + ) def test_to_pipeline_configuration_no_resources( @@ -343,3 +340,27 @@ def test_from_pipeline_definition( ) assert isinstance(pipeline, RegisteredPipeline) assert pipeline.id == rmv.id + + +def test_bad_mutation_of_graph_steps_exception( + make_mock_registered_model, + make_mock_registered_model_version, + make_mock_pipeline_graph, +): + """Test that we throw the correct exception when a user tries to mutate + the steps of a graph in an inappropriate way. + """ + mocked_rm = make_mock_registered_model(id=123, name="test_rm") + mocked_rmv = make_mock_registered_model_version() + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + graph = make_mock_pipeline_graph() + + graph.steps.add("not_a_step") + with pytest.raises(TypeError) as err: + RegisteredPipeline(graph=graph, registered_model_version=mocked_rmv) + assert ( + str(err.value) == f"individual steps of a PipelineGraph must be type" + f" PipelineStep, not ." + ) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 9b3fdd3637..1929bc1eed 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -24,7 +24,8 @@ class PipelineGraph: def __init__(self, steps: Set[PipelineStep]): self._steps = self._validate_steps(steps) - self._predecessors = [s.predecessors for s in self._steps] + for step in self._steps: + step._validate_predecessors(step.predecessors) # throws an exception if any step's predecessors attr has been inappropriately mutated. def __repr__(self) -> str: @@ -36,7 +37,7 @@ def _format_steps(self) -> str: @property def steps(self) -> Set[PipelineStep]: - return self._validate_steps(self._steps) + return self._steps @steps.setter def steps(self, value): @@ -87,7 +88,7 @@ def _validate_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: if not isinstance(step, PipelineStep): raise TypeError( f"individual steps of a PipelineGraph must be type" - f" PipelineStep, not {type(step)}" + f" PipelineStep, not {type(step)}." ) return steps diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index d50ed8283d..1d8a379584 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -136,7 +136,7 @@ def set_name(self, name: str) -> str: @property def predecessors(self) -> Set["PipelineStep"]: - return self._validate_predecessors(self._predecessors) + return self._predecessors @predecessors.setter def predecessors(self, value) -> None: @@ -184,7 +184,7 @@ def _validate_predecessors( if not isinstance(step, PipelineStep): raise TypeError( f"individual predecessors of a PipelineStep must be type" - f" PipelineStep, not {type(step)} for predecessor '{step}'" + f" PipelineStep, not {type(step)}." ) return predecessors diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 617bb83aa6..3d4979d208 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -50,7 +50,7 @@ def __init__( self._name = self._registered_model_version.name self._id = self._registered_model_version.id self._graph = graph - self._graph_steps = self._graph.steps + self._graph._validate_steps(self._graph.steps) # throws an exception if the graph's steps attr has been inappropriately mutated. def __repr__(self): From 92efc2c0e1e175a7f32071323f97c5088ca9e843 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 00:58:20 +0200 Subject: [PATCH 089/117] docs: simplify language in doc strings for functions that format data for the backend --- client/verta/verta/pipeline/_pipeline_step.py | 28 ++++++++----------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 1d8a379584..72912202f8 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import Any, Dict, Optional, Set +from typing import Any, Dict, List, Optional, Set from verta._internal_utils._utils import Configuration, Connection from verta.registry.entities import RegisteredModel, RegisteredModelVersion @@ -15,9 +15,9 @@ class PipelineStep: Name of the step, for use within the scope of the pipeline only. registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` Registered model version to run for this step. - predecessors : set, optional - Set of unique PipelineSteps whose outputs will be treated as inputs to this - step. If not included, the step is assumed to be an initial step. + predecessors : list, optional + List of unique PipelineSteps whose outputs will be treated as inputs to + this step. If not included, the step is assumed to be an initial step. Attributes ---------- @@ -34,7 +34,7 @@ def __init__( name: str, registered_model_version: RegisteredModelVersion, predecessors: Optional[ - Set["PipelineStep"] + List["PipelineStep"] ] = None, # Optional because it could be the first step with no predecessors ): self._name = self.set_name(name) @@ -145,7 +145,7 @@ def predecessors(self, value) -> None: "can't set attribute 'predecessors'; please use set_predecessors()" ) - def set_predecessors(self, steps: Set["PipelineStep"]) -> Set["PipelineStep"]: + def set_predecessors(self, steps: List["PipelineStep"]) -> Set["PipelineStep"]: """Set the predecessors associated with this step. Parameters @@ -168,7 +168,7 @@ def set_predecessors(self, steps: Set["PipelineStep"]) -> Set["PipelineStep"]: @staticmethod def _validate_predecessors( - predecessors: Set["PipelineStep"] + predecessors: List["PipelineStep"] ) -> Set["PipelineStep"]: """Validate that the provided predecessors are a set of PipelineStep objects. @@ -178,15 +178,15 @@ def _validate_predecessors( Set of PipelineStep objects whose outputs will be treated as inputs to this step. """ - if not isinstance(predecessors, set): - raise TypeError(f"steps must be type set, not {type(predecessors)}") + if not isinstance(predecessors, list): + raise TypeError(f"steps must be type list, not {type(predecessors)}") for step in predecessors: if not isinstance(step, PipelineStep): raise TypeError( f"individual predecessors of a PipelineStep must be type" f" PipelineStep, not {type(step)}." ) - return predecessors + return set(predecessors) def _get_registered_model(self) -> RegisteredModel: """Fetch the registered model associated with this step's model version. @@ -257,9 +257,7 @@ def _to_graph_spec(self) -> Dict[str, Any]: """Return a dictionary representation of predecessors for this step, formatted for a pipeline definition. - The back-end expects a list of steps and their predecessors as part of the - `graph` object within a PipelineDefinition. This method converts this individual - PipelineStep to a formatted dict for that purpose. + This is fed to the backend as 'graph' in our PipelineDefinition schema """ return { "name": self.name, @@ -270,9 +268,7 @@ def _to_step_spec(self) -> Dict[str, Any]: """Return a dictionary representation of this step, formatted for a pipeline definition. - The back-end expects a list of steps and their model version as part of the - `steps` object within a PipelineDefinition. This method converts this individual - PipelineStep to a formatted dict for that purpose. + This is fed to the backend as 'steps' in our PipelineDefinition schema """ return { "name": self.name, From 7af253a05ad7c0ccb3e0f26baaab52e9fcda1c46 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 01:11:37 +0200 Subject: [PATCH 090/117] docs: simplify language in doc strings for functions that format data for the backend --- client/verta/verta/pipeline/_pipeline_step.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 72912202f8..d94923f6b5 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import Any, Dict, List, Optional, Set +from typing import Any, Dict, Optional, Set from verta._internal_utils._utils import Configuration, Connection from verta.registry.entities import RegisteredModel, RegisteredModelVersion @@ -15,8 +15,8 @@ class PipelineStep: Name of the step, for use within the scope of the pipeline only. registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` Registered model version to run for this step. - predecessors : list, optional - List of unique PipelineSteps whose outputs will be treated as inputs to + predecessors : set, optional + Set of unique PipelineSteps whose outputs will be treated as inputs to this step. If not included, the step is assumed to be an initial step. Attributes @@ -34,7 +34,7 @@ def __init__( name: str, registered_model_version: RegisteredModelVersion, predecessors: Optional[ - List["PipelineStep"] + Set["PipelineStep"] ] = None, # Optional because it could be the first step with no predecessors ): self._name = self.set_name(name) @@ -145,12 +145,12 @@ def predecessors(self, value) -> None: "can't set attribute 'predecessors'; please use set_predecessors()" ) - def set_predecessors(self, steps: List["PipelineStep"]) -> Set["PipelineStep"]: + def set_predecessors(self, steps: Set["PipelineStep"]) -> Set["PipelineStep"]: """Set the predecessors associated with this step. Parameters ---------- - steps : list + steps : set List of PipelineStep objects whose outputs will be treated as inputs to this step. Returns @@ -168,7 +168,7 @@ def set_predecessors(self, steps: List["PipelineStep"]) -> Set["PipelineStep"]: @staticmethod def _validate_predecessors( - predecessors: List["PipelineStep"] + predecessors: Set["PipelineStep"] ) -> Set["PipelineStep"]: """Validate that the provided predecessors are a set of PipelineStep objects. @@ -178,7 +178,7 @@ def _validate_predecessors( Set of PipelineStep objects whose outputs will be treated as inputs to this step. """ - if not isinstance(predecessors, list): + if not isinstance(predecessors, set): raise TypeError(f"steps must be type list, not {type(predecessors)}") for step in predecessors: if not isinstance(step, PipelineStep): @@ -186,7 +186,7 @@ def _validate_predecessors( f"individual predecessors of a PipelineStep must be type" f" PipelineStep, not {type(step)}." ) - return set(predecessors) + return predecessors def _get_registered_model(self) -> RegisteredModel: """Fetch the registered model associated with this step's model version. From c3689c71837dafb4e6af3cc61e49c16d9da79998 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 20:04:56 +0200 Subject: [PATCH 091/117] fix: spacing on repr function --- client/verta/verta/pipeline/_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index d94923f6b5..016d5194d7 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -50,7 +50,7 @@ def __init__( def __repr__(self) -> str: return "\n ".join( ( - "\n PipelineStep:", + "PipelineStep:", f"step name: {self.name}", f"registered_model: {self._registered_model.name}", f"registered_model_id: {self._registered_model_id}", From 1942bf76557a071f8584212ebdcea59abb4377c9 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Mon, 28 Aug 2023 20:07:13 +0200 Subject: [PATCH 092/117] Update client/verta/verta/pipeline/_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_step.py | 1 - 1 file changed, 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 016d5194d7..5e5d54c623 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -203,7 +203,6 @@ def _get_registered_model(self) -> RegisteredModel: conn=self.registered_model_version._conn, conf=self.registered_model_version._conf, ) - self._registered_model = rm return rm @classmethod From 65ef63b7e04fb948ff1e78cd36e1f8d54cdab128 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 23:30:52 +0200 Subject: [PATCH 093/117] refactor: use ID from RM object instead of duplicating via class var --- .../verta/tests/unit_tests/pipeline/test_pipeline_step.py | 4 ++-- client/verta/verta/pipeline/_pipeline_step.py | 6 ++---- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 6f995a450d..2fb99b0ce4 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -240,7 +240,7 @@ def test_change_model_version( predecessors=set(), ) assert step.registered_model_version == rmv_1 - assert step._registered_model_id == rmv_1.registered_model_id + assert step._registered_model.id == rmv_1.registered_model_id step.set_registered_model_version(rmv_2) assert step.registered_model_version == rmv_2 - assert step._registered_model_id == rmv_2.registered_model_id + assert step._registered_model.id == rmv_2.registered_model_id diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 5e5d54c623..4e52c05728 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -44,7 +44,6 @@ def __init__( self._predecessors = ( self._validate_predecessors(predecessors) if predecessors else set() ) - self._registered_model_id = self._registered_model_version.registered_model_id self._registered_model: RegisteredModel = self._get_registered_model() def __repr__(self) -> str: @@ -53,7 +52,7 @@ def __repr__(self) -> str: "PipelineStep:", f"step name: {self.name}", f"registered_model: {self._registered_model.name}", - f"registered_model_id: {self._registered_model_id}", + f"registered_model_id: {self._registered_model.id}", f"registered_model_version: {self.registered_model_version.name}", f"registered_model_version_id: {self.registered_model_version.id}", f"predecessors: {[s.name for s in self.predecessors]}", @@ -97,7 +96,6 @@ def set_registered_model_version( f"registered_model_version must be a RegisteredModelVersion object, not {type(registered_model_version)}" ) self._registered_model_version = registered_model_version - self._registered_model_id = registered_model_version.registered_model_id self._registered_model = self._get_registered_model() return self.registered_model_version @@ -199,7 +197,7 @@ def _get_registered_model(self) -> RegisteredModel: :class:`~verta.registry.entities.RegisteredModel` """ rm = RegisteredModel._get_by_id( - id=self._registered_model_id, + id=self._registered_model_version.registered_model_id, conn=self.registered_model_version._conn, conf=self.registered_model_version._conf, ) From 36086fedce8141103f10b14927e7c34d14b072d0 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 23:43:20 +0200 Subject: [PATCH 094/117] refactor: support setting predecessros to None --- client/verta/verta/pipeline/_pipeline_step.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 4e52c05728..de731c959b 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -41,9 +41,7 @@ def __init__( self._registered_model_version = self.set_registered_model_version( registered_model_version ) - self._predecessors = ( - self._validate_predecessors(predecessors) if predecessors else set() - ) + self._predecessors = self.set_predecessors(predecessors) self._registered_model: RegisteredModel = self._get_registered_model() def __repr__(self) -> str: @@ -143,13 +141,14 @@ def predecessors(self, value) -> None: "can't set attribute 'predecessors'; please use set_predecessors()" ) - def set_predecessors(self, steps: Set["PipelineStep"]) -> Set["PipelineStep"]: + def set_predecessors(self, steps: Optional[Set["PipelineStep"]] = None) -> Set["PipelineStep"]: """Set the predecessors associated with this step. Parameters ---------- - steps : set - List of PipelineStep objects whose outputs will be treated as inputs to this step. + steps : set, optional + Set of PipelineStep objects whose outputs will be treated as inputs to this step. + Empty set used if no input provided. Returns ------- @@ -161,7 +160,10 @@ def set_predecessors(self, steps: Set["PipelineStep"]) -> Set["PipelineStep"]: TypeError If the provided value for ``steps`` is not a set of PipelineStep objects. """ - self._predecessors = self._validate_predecessors(steps) + if steps: + self._predecessors = self._validate_predecessors(steps) + return self.predecessors + self._predecessors = set() return self.predecessors @staticmethod From 83589dfe985a7ff0ebedf4f182333b2c66346725 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Mon, 28 Aug 2023 23:49:42 +0200 Subject: [PATCH 095/117] refactor: black formatting --- .../tests/unit_tests/pipeline/test_pipeline_graph.py | 10 ++++++---- .../tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- client/verta/verta/pipeline/_pipeline_step.py | 10 ++++++---- client/verta/verta/pipeline/_registered_pipeline.py | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index dc69876b7d..b117b4ac86 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -166,7 +166,9 @@ def test_to_steps_definition( def test_bad_mutation_of_step_predecessors_exception( - make_mock_registered_model_version, make_mock_registered_model, make_mock_pipeline_step + make_mock_registered_model_version, + make_mock_registered_model, + make_mock_pipeline_step, ): """Test that we throw the correct exception when a user tries to mutate the predecessors of a step in an inappropriate way. @@ -176,7 +178,7 @@ def test_bad_mutation_of_step_predecessors_exception( id=mocked_rmv.registered_model_id, name="test_rmv" ) with patch.object( - verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): step = PipelineStep( registered_model_version=mocked_rmv, @@ -187,6 +189,6 @@ def test_bad_mutation_of_step_predecessors_exception( with pytest.raises(TypeError) as err: PipelineGraph(steps={step}) assert ( - str(err.value) == f"individual predecessors of a PipelineStep must be type" - f" PipelineStep, not ." + str(err.value) == f"individual predecessors of a PipelineStep must be type" + f" PipelineStep, not ." ) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 2fb99b0ce4..8d55ddd658 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -240,7 +240,7 @@ def test_change_model_version( predecessors=set(), ) assert step.registered_model_version == rmv_1 - assert step._registered_model.id == rmv_1.registered_model_id + assert step._registered_model.id == rmv_1.registered_model_id step.set_registered_model_version(rmv_2) assert step.registered_model_version == rmv_2 assert step._registered_model.id == rmv_2.registered_model_id diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index de731c959b..ccdce230bc 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -141,7 +141,9 @@ def predecessors(self, value) -> None: "can't set attribute 'predecessors'; please use set_predecessors()" ) - def set_predecessors(self, steps: Optional[Set["PipelineStep"]] = None) -> Set["PipelineStep"]: + def set_predecessors( + self, steps: Optional[Set["PipelineStep"]] = None + ) -> Set["PipelineStep"]: """Set the predecessors associated with this step. Parameters @@ -168,7 +170,7 @@ def set_predecessors(self, steps: Optional[Set["PipelineStep"]] = None) -> Set[" @staticmethod def _validate_predecessors( - predecessors: Set["PipelineStep"] + predecessors: Set["PipelineStep"], ) -> Set["PipelineStep"]: """Validate that the provided predecessors are a set of PipelineStep objects. @@ -265,9 +267,9 @@ def _to_graph_spec(self) -> Dict[str, Any]: def _to_step_spec(self) -> Dict[str, Any]: """Return a dictionary representation of this step, formatted for a - pipeline definition. + pipeline definition. - This is fed to the backend as 'steps' in our PipelineDefinition schema + This is fed to the backend as 'steps' in our PipelineDefinition schema """ return { "name": self.name, diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 3d4979d208..7043fe99d2 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -172,7 +172,7 @@ def _to_pipeline_configuration( @staticmethod def _get_pipeline_definition_artifact( - registered_model_version: RegisteredModelVersion + registered_model_version: RegisteredModelVersion, ) -> Dict[str, Any]: """Get the pipeline definition artifact from the registered model version. From 50c333f51237c72a7b85d8615111c6aeff4c7df0 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 17:22:38 +0200 Subject: [PATCH 096/117] refactor: Allow users to input list, set, or tuple for step predecessors and for graph steps --- .../verta/verta/pipeline/_pipeline_graph.py | 30 ++++++++++------ client/verta/verta/pipeline/_pipeline_step.py | 34 ++++++++++++------- 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 1929bc1eed..2663c6d385 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import Any, Dict, List, Set +from typing import Any, Dict, List, Set, Tuple, Union from verta._internal_utils._utils import Configuration, Connection from ._pipeline_step import PipelineStep @@ -12,7 +12,7 @@ class PipelineGraph: Parameters ---------- - steps : set of :class:`~verta.pipeline.PipelineStep` + steps : list, set, or tuple of :class:`~verta.pipeline.PipelineStep` Set of all possible steps of the pipeline. Ordering of steps in the pipeline itself is determined by the predecessors provided to each step. @@ -43,13 +43,17 @@ def steps(self) -> Set[PipelineStep]: def steps(self, value): raise AttributeError("can't set attribute 'steps'; please use set_steps()") - def set_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: + def set_steps( + self, steps: Union[List[PipelineStep], Set[PipelineStep], Tuple[PipelineStep]] + ) -> Set[PipelineStep]: """Update the set of steps for this PipelineGraph to the provided value. Parameters ---------- - steps : set of :class:`~verta.deployment.PipelineStep` - Set of all possible steps of the pipline graph. + steps : list, set, tuple of :class:`~verta.deployment.PipelineStep` + List, set, or tuple of all possible steps of the pipline graph. + All options are converted to a set, so order is irrelevant and + duplicates are removed. Returns ------- @@ -64,13 +68,15 @@ def set_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: self._steps = self._validate_steps(steps) return self.steps - def _validate_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: + def _validate_steps( + self, steps: Union[List[PipelineStep], Set[PipelineStep], Tuple[PipelineStep]] + ) -> Set[PipelineStep]: """Validate that the provided steps are a set of PipelineStep objects. Parameters ---------- - steps : set of :class:`~verta.deployment.PipelineStep` - Set of steps provided by a user. + steps : list, set, or tuple of :class:`~verta.deployment.PipelineStep` + List, set, or tuple of steps provided by a user. Returns ------- @@ -82,15 +88,17 @@ def _validate_steps(self, steps: Set[PipelineStep]) -> Set[PipelineStep]: TypeError If steps is not a set of PipelineStep objects. """ - if not isinstance(steps, set): - raise TypeError(f"steps must be type set, not {type(steps)}") + if not isinstance(steps, (list, set, tuple)): + raise TypeError( + f"steps must be type list, set, or tuple, not {type(steps)}" + ) for step in steps: if not isinstance(step, PipelineStep): raise TypeError( f"individual steps of a PipelineGraph must be type" f" PipelineStep, not {type(step)}." ) - return steps + return set(steps) @classmethod def _from_definition( diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index ccdce230bc..09413f828a 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import Any, Dict, Optional, Set +from typing import Any, Dict, List, Optional, Set, Tuple, Union from verta._internal_utils._utils import Configuration, Connection from verta.registry.entities import RegisteredModel, RegisteredModelVersion @@ -15,9 +15,9 @@ class PipelineStep: Name of the step, for use within the scope of the pipeline only. registered_model_version : :class:`~verta.registry.entities.RegisteredModelVersion` Registered model version to run for this step. - predecessors : set, optional - Set of unique PipelineSteps whose outputs will be treated as inputs to - this step. If not included, the step is assumed to be an initial step. + predecessors : list, set, tuple, optional + List, set, or tuple of unique PipelineStep objects whose outputs will be treated as + inputs to this step. If not included, the step is assumed to be an initial step. Attributes ---------- @@ -34,7 +34,7 @@ def __init__( name: str, registered_model_version: RegisteredModelVersion, predecessors: Optional[ - Set["PipelineStep"] + Union[List["PipelineStep"], Set["PipelineStep"], Tuple["PipelineStep"]] ] = None, # Optional because it could be the first step with no predecessors ): self._name = self.set_name(name) @@ -142,15 +142,19 @@ def predecessors(self, value) -> None: ) def set_predecessors( - self, steps: Optional[Set["PipelineStep"]] = None + self, + steps: Optional[ + Union[List["PipelineStep"], Set["PipelineStep"], Tuple["PipelineStep"]] + ] = None, ) -> Set["PipelineStep"]: """Set the predecessors associated with this step. Parameters ---------- - steps : set, optional - Set of PipelineStep objects whose outputs will be treated as inputs to this step. - Empty set used if no input provided. + steps : list, set, or tuple, optional + List, set, or tuple of PipelineStep objects whose outputs will be treated as + inputs to this step. All options are converted to a set, so order is irrelevant + and duplicates are removed. An empty set used if no input is provided. Returns ------- @@ -170,7 +174,9 @@ def set_predecessors( @staticmethod def _validate_predecessors( - predecessors: Set["PipelineStep"], + predecessors: Union[ + Set["PipelineStep"], List["PipelineStep"], Tuple["PipelineStep"] + ] ) -> Set["PipelineStep"]: """Validate that the provided predecessors are a set of PipelineStep objects. @@ -180,15 +186,17 @@ def _validate_predecessors( Set of PipelineStep objects whose outputs will be treated as inputs to this step. """ - if not isinstance(predecessors, set): - raise TypeError(f"steps must be type list, not {type(predecessors)}") + if not isinstance(predecessors, (set, list, tuple)): + raise TypeError( + f"steps must be type list, set, or tuple, not {type(predecessors)}" + ) for step in predecessors: if not isinstance(step, PipelineStep): raise TypeError( f"individual predecessors of a PipelineStep must be type" f" PipelineStep, not {type(step)}." ) - return predecessors + return set(predecessors) def _get_registered_model(self) -> RegisteredModel: """Fetch the registered model associated with this step's model version. From d9d98f98d13953470a50de656ad3746db2801fbd Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 18:03:00 +0200 Subject: [PATCH 097/117] refactor: make RegisteredPipeline properly responsive to changes in RMV name via UI --- client/verta/verta/pipeline/_registered_pipeline.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 7043fe99d2..d24b55b9aa 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -47,8 +47,6 @@ def __init__( to refresh the cache of the RMV, because pipelines are immutable. """ self._registered_model_version = registered_model_version - self._name = self._registered_model_version.name - self._id = self._registered_model_version.id self._graph = graph self._graph._validate_steps(self._graph.steps) # throws an exception if the graph's steps attr has been inappropriately mutated. @@ -65,11 +63,11 @@ def __repr__(self): @property def name(self): - return self._name + return self._registered_model_version.name @property def id(self): - return self._id + return self._registered_model_version.id @property def graph(self): From 86b1032a4b391608f9fabfaf1ee6f4d9201c31ce Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 18:06:18 +0200 Subject: [PATCH 098/117] Update client/verta/verta/pipeline/_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 2663c6d385..7910e33ab8 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -51,7 +51,7 @@ def set_steps( Parameters ---------- steps : list, set, tuple of :class:`~verta.deployment.PipelineStep` - List, set, or tuple of all possible steps of the pipline graph. + List, set, or tuple of all possible steps of the pipeline graph. All options are converted to a set, so order is irrelevant and duplicates are removed. From 93df1429a8cfb0a232597496137d7f7093745bd7 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 18:20:42 +0200 Subject: [PATCH 099/117] refactor: move type casting from validation function to setter function --- client/verta/verta/pipeline/_pipeline_graph.py | 4 ++-- client/verta/verta/pipeline/_pipeline_step.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 7910e33ab8..60a434b58c 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -65,7 +65,7 @@ def set_steps( TypeError If ``steps`` is not a set of PipelineStep objects. """ - self._steps = self._validate_steps(steps) + self._steps = set(self._validate_steps(steps)) return self.steps def _validate_steps( @@ -98,7 +98,7 @@ def _validate_steps( f"individual steps of a PipelineGraph must be type" f" PipelineStep, not {type(step)}." ) - return set(steps) + return steps @classmethod def _from_definition( diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 09413f828a..76a916d2ed 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -167,7 +167,7 @@ def set_predecessors( If the provided value for ``steps`` is not a set of PipelineStep objects. """ if steps: - self._predecessors = self._validate_predecessors(steps) + self._predecessors = set(self._validate_predecessors(steps)) return self.predecessors self._predecessors = set() return self.predecessors @@ -196,7 +196,7 @@ def _validate_predecessors( f"individual predecessors of a PipelineStep must be type" f" PipelineStep, not {type(step)}." ) - return set(predecessors) + return predecessors def _get_registered_model(self) -> RegisteredModel: """Fetch the registered model associated with this step's model version. From 0c04859d5277bae3536667ca01e0c3235b260935 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 19:39:03 +0200 Subject: [PATCH 100/117] fix: type annotation for PipelineStep init to accept a union of iterables --- client/verta/verta/pipeline/_pipeline_graph.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 60a434b58c..5e3047b4a0 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -22,7 +22,9 @@ class PipelineGraph: Set of PipelineSteps comprising all possible steps in this PiplineGraph. """ - def __init__(self, steps: Set[PipelineStep]): + def __init__( + self, steps: Union[List[PipelineStep], Set[PipelineStep], Tuple[PipelineStep]] + ): self._steps = self._validate_steps(steps) for step in self._steps: step._validate_predecessors(step.predecessors) From 1fb8e1e26f88bb79d4b5fad15d7a0fac300c8a28 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:44:24 +0200 Subject: [PATCH 101/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index b117b4ac86..207e5f3163 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -175,7 +175,7 @@ def test_bad_mutation_of_step_predecessors_exception( """ mocked_rmv = make_mock_registered_model_version() mocked_rm = make_mock_registered_model( - id=mocked_rmv.registered_model_id, name="test_rmv" + id=mocked_rmv.registered_model_id, name="test_rm" ) with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm From e58cb001074c2f2d27b8588c3b7491516e4003ce Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:45:11 +0200 Subject: [PATCH 102/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 8d55ddd658..fefbabe87f 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -42,7 +42,7 @@ def test_steps_from_pipeline_definition( graph = pipeline_definition["graph"] for step in pipeline_definition["steps"]: mocked_responses.get( - f"https://test_socket/api/v1/registry/model_versions/{step['model_version_id']}", + f"{mock_conn.scheme}://{mock_conn.socket}/api/v1/registry/model_versions/{step['model_version_id']}", json={ "model_version": { "id": step["model_version_id"], From 33e55112ec72ba3a2872779511b1fecb496b2d9e Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:45:24 +0200 Subject: [PATCH 103/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index fefbabe87f..cc3383c154 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -53,7 +53,7 @@ def test_steps_from_pipeline_definition( status=200, ) mocked_responses.get( - f"https://test_socket/api/v1/registry/registered_models/{registered_model_id}", + f"{mock_conn.scheme}://{mock_conn.socket}/api/v1/registry/registered_models/{registered_model_id}", json={ "registered_model": { "id": registered_model_id, From 304bef1e647e73a847648fad86ff70db6a240371 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:45:44 +0200 Subject: [PATCH 104/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index cc3383c154..13f0e7b9a5 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -15,8 +15,8 @@ @given( pipeline_definition=pipeline_definition(), - registered_model_id=st.integers(min_value=1, max_value=2**63), # max value limit avoids protobuf "Value out of range" error + registered_model_id=st.integers(min_value=1, max_value=2**63), model_version_name=st.text(min_size=1), model_name=st.text(min_size=1), ) From 3db95af02ae4706e61199da61e8b1f5dcece8a0e Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:49:12 +0200 Subject: [PATCH 105/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 13f0e7b9a5..7d3b7558b6 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -70,7 +70,7 @@ def test_steps_from_pipeline_definition( # we have the same number of steps as in the pipeline definition assert len(generated_steps) == len(pipeline_definition["steps"]) # sort both group of steps for side-by-side comparison - generated_steps_sorted = sorted(list(generated_steps), key=lambda x: x.name) + generated_steps_sorted = sorted(generated_steps, key=lambda x: x.name) definition_steps_sorted = sorted( pipeline_definition["steps"], key=lambda x: x["name"] ) From 073a0588ecc569174dd00d386b2a44facf61b8fa Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:50:09 +0200 Subject: [PATCH 106/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 7d3b7558b6..24eb3f52a3 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -215,7 +215,7 @@ def test_change_model_version( rmv_1 = make_mock_registered_model_version() rmv_2 = make_mock_registered_model_version() mocked_responses.get( - f"https://test_socket/api/v1/registry/registered_models/{rmv_1.registered_model_id}", + f"{rmv_1._conn.scheme}://{rmv_1._conn.socket}/api/v1/registry/registered_models/{rmv_1.registered_model_id}", json={ "registered_model": { "id": rmv_1.registered_model_id, From 4eade5932facf0d47dc398212290f6012d376ddf Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:52:30 +0200 Subject: [PATCH 107/117] Update client/verta/tests/unit_tests/pipeline/test_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/tests/unit_tests/pipeline/test_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py index 24eb3f52a3..60ea91ca8f 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_step.py @@ -225,7 +225,7 @@ def test_change_model_version( status=200, ) mocked_responses.get( - f"https://test_socket/api/v1/registry/registered_models/{rmv_2.registered_model_id}", + f"{rmv_2._conn.scheme}://{rmv_2._conn.socket}/api/v1/registry/registered_models/{rmv_2.registered_model_id}", json={ "registered_model": { "id": rmv_2.registered_model_id, From 0085a8d7c65cad7fdbcfe0df7c2be06dd4f6c420 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 20:55:17 +0200 Subject: [PATCH 108/117] docs: simplify doc strings --- client/verta/verta/pipeline/_pipeline_graph.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 5e3047b4a0..f19726da00 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -130,19 +130,13 @@ def _from_definition( def _to_graph_definition(self) -> List[Dict[str, Any]]: """Create a pipeline graph specification from this PipelineGraph. - The back-end expects a list of steps and their predecessors as part of the - `graph` object within a PipelineDefinition. This method converts this PipelineGraph - to a formatted list of steps with predecessors for that purpose. A list is used - to remain json serializable, as this will be converted and uploaded as an artifact. + This is fed to the backend as 'graph' in our PipelineDefinition schema. """ return [step._to_graph_spec() for step in self.steps] def _to_steps_definition(self) -> List[Dict[str, Any]]: """Create a pipeline steps specification from this PipelineGraph. - The back-end expects a list of steps and their model versions as part of the - `steps` object within a PipelineDefinition. This method converts this PipelineGraph - to a formatted list of steps with model versions for that purpose. A list is used - to remain json serializable, as this will be converted and uploaded as an artifact. + This is fed to the backend as 'steps' in our PipelineDefinition schema. """ return [step._to_step_spec() for step in self.steps] From 38b1add6036d2741b05b16c5a8218965b7fc6b7d Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:56:54 +0200 Subject: [PATCH 109/117] Update client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- .../tests/unit_tests/pipeline/test_registered_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index 08e6c3ef32..f89d25fded 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -34,8 +34,8 @@ def test_copy_graph( ) copied_graph = pipeline.copy_graph() # convert from sets to lists and sort for side-by-side comparison - graph_steps_sorted = sorted(list(graph.steps), key=lambda x: x.name) - copied_graph_steps_sorted = sorted(list(copied_graph.steps), key=lambda x: x.name) + graph_steps_sorted = sorted(graph.steps, key=lambda x: x.name) + copied_graph_steps_sorted = sorted(copied_graph.steps, key=lambda x: x.name) for orig_step, copied_step in zip(graph_steps_sorted, copied_graph_steps_sorted): assert orig_step is not copied_step From 695ff3a7f5ac715bcc00dd6165abd9e770dbf323 Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Tue, 29 Aug 2023 20:57:58 +0200 Subject: [PATCH 110/117] Update client/verta/verta/pipeline/_pipeline_step.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 76a916d2ed..aa41f495cb 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -154,7 +154,7 @@ def set_predecessors( steps : list, set, or tuple, optional List, set, or tuple of PipelineStep objects whose outputs will be treated as inputs to this step. All options are converted to a set, so order is irrelevant - and duplicates are removed. An empty set used if no input is provided. + and duplicates are removed. An empty set is used if no input is provided. Returns ------- From 530fc31b91ab1499934ea6b5d78eedbb85cfad68 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 20:59:06 +0200 Subject: [PATCH 111/117] refactor: drop newline from repr --- client/verta/verta/pipeline/_registered_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index d24b55b9aa..4a4470383d 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -57,7 +57,7 @@ def __repr__(self): "RegisteredPipeline:", f"pipeline name: {self.name}", f"pipeline id: {self.id}", - f"\n{self.graph}", + f"{self.graph}", ) ) From 3b1807fb0a3e832012eb47bd69d58acff530117c Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 21:09:28 +0200 Subject: [PATCH 112/117] refactor: move validate predecessors call to validate steps function --- client/verta/verta/pipeline/_pipeline_graph.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index f19726da00..44266f3df4 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -26,9 +26,7 @@ def __init__( self, steps: Union[List[PipelineStep], Set[PipelineStep], Tuple[PipelineStep]] ): self._steps = self._validate_steps(steps) - for step in self._steps: - step._validate_predecessors(step.predecessors) - # throws an exception if any step's predecessors attr has been inappropriately mutated. + def __repr__(self) -> str: return f"PipelineGraph steps:\n{self._format_steps()}" @@ -100,6 +98,8 @@ def _validate_steps( f"individual steps of a PipelineGraph must be type" f" PipelineStep, not {type(step)}." ) + # throw an exception if any step's predecessors attr has been inappropriately mutated. + step._validate_predecessors(step.predecessors) return steps @classmethod From c2a367eb4a47ed085344b5f100346989457ac764 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 21:19:47 +0200 Subject: [PATCH 113/117] feature: check for uniqueness of step names --- .../pipeline/test_pipeline_graph.py | 21 ++++++++++++++++++- .../verta/verta/pipeline/_pipeline_graph.py | 4 ++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index 207e5f3163..de96d930f7 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -142,7 +142,7 @@ def test_to_steps_definition( Definitions are type list to remain json serializable. """ - mocked_rm = make_mock_registered_model(id=123, name="test_rmv") + mocked_rm = make_mock_registered_model(id=123, name="test_rm") with patch.object( verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm ): @@ -192,3 +192,22 @@ def test_bad_mutation_of_step_predecessors_exception( str(err.value) == f"individual predecessors of a PipelineStep must be type" f" PipelineStep, not ." ) + + +def test_step_name_uniqueness_exception( + make_mock_registered_model, make_mock_pipeline_step +): + mocked_rm = make_mock_registered_model(id=123, name="test_rm") + with patch.object( + verta.pipeline.PipelineStep, "_get_registered_model", return_value=mocked_rm + ): + step_1 = make_mock_pipeline_step(name="step_1") + step_2 = make_mock_pipeline_step(name="step_2") + step_3 = make_mock_pipeline_step(name="step_1") + + with pytest.raises(ValueError) as err: + PipelineGraph(steps={step_1, step_2, step_3}) + assert ( + str(err.value) == "step names must be unique within a PipelineGraph" + ) +ª diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 44266f3df4..c24cac3e36 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -100,6 +100,10 @@ def _validate_steps( ) # throw an exception if any step's predecessors attr has been inappropriately mutated. step._validate_predecessors(step.predecessors) + if [s.name for s in steps] is not set([s.name for s in steps]): + raise ValueError( + "step names must be unique within a PipelineGraph" + ) return steps @classmethod From 0712c0f73bb009d96e2513fa18b0e8127db9fe3b Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 21:26:44 +0200 Subject: [PATCH 114/117] refactor: black formatting --- .../verta/tests/unit_tests/pipeline/test_pipeline_graph.py | 5 +---- client/verta/verta/pipeline/_pipeline_graph.py | 5 +---- client/verta/verta/pipeline/_pipeline_step.py | 3 ++- 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py index de96d930f7..82e9eb24f8 100644 --- a/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py +++ b/client/verta/tests/unit_tests/pipeline/test_pipeline_graph.py @@ -207,7 +207,4 @@ def test_step_name_uniqueness_exception( with pytest.raises(ValueError) as err: PipelineGraph(steps={step_1, step_2, step_3}) - assert ( - str(err.value) == "step names must be unique within a PipelineGraph" - ) -ª + assert str(err.value) == "step names must be unique within a PipelineGraph" diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index c24cac3e36..c7cc39436c 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -27,7 +27,6 @@ def __init__( ): self._steps = self._validate_steps(steps) - def __repr__(self) -> str: return f"PipelineGraph steps:\n{self._format_steps()}" @@ -101,9 +100,7 @@ def _validate_steps( # throw an exception if any step's predecessors attr has been inappropriately mutated. step._validate_predecessors(step.predecessors) if [s.name for s in steps] is not set([s.name for s in steps]): - raise ValueError( - "step names must be unique within a PipelineGraph" - ) + raise ValueError("step names must be unique within a PipelineGraph") return steps @classmethod diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index aa41f495cb..7189f0714e 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -91,7 +91,8 @@ def set_registered_model_version( """ if not isinstance(registered_model_version, RegisteredModelVersion): raise TypeError( - f"registered_model_version must be a RegisteredModelVersion object, not {type(registered_model_version)}" + f"registered_model_version must be a RegisteredModelVersion object, " + f"not {type(registered_model_version)}" ) self._registered_model_version = registered_model_version self._registered_model = self._get_registered_model() From 0266f9fcf32923b7b744ca06e8fdbd777f89a721 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Tue, 29 Aug 2023 21:49:47 +0200 Subject: [PATCH 115/117] fix: use len comparison to avoid empty list != empty set issue, and pull scheme and socket dynamically --- .../unit_tests/pipeline/test_registered_pipeline.py | 9 ++++----- client/verta/verta/pipeline/_pipeline_graph.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py index f89d25fded..012024590c 100644 --- a/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py +++ b/client/verta/tests/unit_tests/pipeline/test_registered_pipeline.py @@ -318,23 +318,22 @@ def test_from_pipeline_definition( mocked RMV fixture to return a simple, consistent pipeline definition. Calls related to the fetching of the RMV and RM are mocked. """ + rmv = make_mock_registered_model_version() mocked_responses.get( - "https://test_socket/api/v1/registry/model_versions/1", + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/model_versions/1", json={}, status=200, ) mocked_responses.get( - "https://test_socket/api/v1/registry/model_versions/2", + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/model_versions/2", json={}, status=200, ) mocked_responses.get( - "https://test_socket/api/v1/registry/registered_models/0", + f"{rmv._conn.scheme}://{rmv._conn.socket}/api/v1/registry/registered_models/0", json={}, status=200, ) - - rmv = make_mock_registered_model_version() pipeline = RegisteredPipeline._from_pipeline_definition( registered_model_version=rmv, ) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index c7cc39436c..e725a79eb0 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -99,7 +99,7 @@ def _validate_steps( ) # throw an exception if any step's predecessors attr has been inappropriately mutated. step._validate_predecessors(step.predecessors) - if [s.name for s in steps] is not set([s.name for s in steps]): + if len([s.name for s in steps]) is not len(set([s.name for s in steps])): raise ValueError("step names must be unique within a PipelineGraph") return steps From 95bbca99deabc072444cbaa13d67bc1d0456bf74 Mon Sep 17 00:00:00 2001 From: ewagner-verta Date: Wed, 30 Aug 2023 18:55:28 +0200 Subject: [PATCH 116/117] fix: circular import problem --- client/verta/verta/pipeline/_pipeline_graph.py | 2 +- client/verta/verta/pipeline/_pipeline_step.py | 4 ++-- client/verta/verta/pipeline/_registered_pipeline.py | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index e725a79eb0..8a194c7f10 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Set, Tuple, Union -from verta._internal_utils._utils import Configuration, Connection +from .._internal_utils._utils import Configuration, Connection from ._pipeline_step import PipelineStep diff --git a/client/verta/verta/pipeline/_pipeline_step.py b/client/verta/verta/pipeline/_pipeline_step.py index 7189f0714e..04b3a68eba 100644 --- a/client/verta/verta/pipeline/_pipeline_step.py +++ b/client/verta/verta/pipeline/_pipeline_step.py @@ -2,8 +2,8 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union -from verta._internal_utils._utils import Configuration, Connection -from verta.registry.entities import RegisteredModel, RegisteredModelVersion +from .._internal_utils._utils import Configuration, Connection +from ..registry.entities import RegisteredModel, RegisteredModelVersion class PipelineStep: diff --git a/client/verta/verta/pipeline/_registered_pipeline.py b/client/verta/verta/pipeline/_registered_pipeline.py index 4a4470383d..ecb02b0555 100644 --- a/client/verta/verta/pipeline/_registered_pipeline.py +++ b/client/verta/verta/pipeline/_registered_pipeline.py @@ -5,9 +5,9 @@ import tempfile from typing import Any, Dict, Optional -from verta.endpoint.resources import Resources -from verta.pipeline import PipelineGraph -from verta.registry.entities import RegisteredModelVersion +from ..endpoint.resources import Resources +from ._pipeline_graph import PipelineGraph +from ..registry.entities import RegisteredModelVersion class RegisteredPipeline: From fe3c11dba31809f79bf566fcfa02a08d866666ab Mon Sep 17 00:00:00 2001 From: ewagner-verta <114943931+ewagner-verta@users.noreply.github.com> Date: Wed, 30 Aug 2023 19:33:43 +0200 Subject: [PATCH 117/117] Update client/verta/verta/pipeline/_pipeline_graph.py Co-authored-by: Liu <96442646+liuverta@users.noreply.github.com> --- client/verta/verta/pipeline/_pipeline_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/verta/verta/pipeline/_pipeline_graph.py b/client/verta/verta/pipeline/_pipeline_graph.py index 8a194c7f10..55c2d7f4af 100644 --- a/client/verta/verta/pipeline/_pipeline_graph.py +++ b/client/verta/verta/pipeline/_pipeline_graph.py @@ -99,7 +99,7 @@ def _validate_steps( ) # throw an exception if any step's predecessors attr has been inappropriately mutated. step._validate_predecessors(step.predecessors) - if len([s.name for s in steps]) is not len(set([s.name for s in steps])): + if len([s.name for s in steps]) != len(set([s.name for s in steps])): raise ValueError("step names must be unique within a PipelineGraph") return steps