diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4162a7a..6e89f7d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -28,7 +28,7 @@ jobs: run: make download-poetry - name: Set up pip cache - uses: actions/cache@v3.3.2 + uses: actions/cache@v4.0.0 with: path: ~/.cache/pypoetry/virtualenvs key: venv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }} @@ -67,7 +67,7 @@ jobs: run: make download-poetry - name: Set up pip cache - uses: actions/cache@v3.3.2 + uses: actions/cache@v4.0.0 with: path: ~/.cache/pypoetry/virtualenvs key: venv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }} @@ -106,7 +106,7 @@ jobs: # Cannot check more than compile action here, need GCP environment for upload, run, schedule run: | cd example - poetry run -C .. vertex-deployer -log DEBUG deploy dummy_pipeline --compile --env-file example.env + poetry run -C .. vertex-deployer -log DEBUG deploy dummy_pipeline --compile --env-file example.env --skip-validation - name: Test create command run: | diff --git a/.github/workflows/deploy_docs.yaml b/.github/workflows/deploy_docs.yaml index 814c18b..e73739b 100644 --- a/.github/workflows/deploy_docs.yaml +++ b/.github/workflows/deploy_docs.yaml @@ -24,7 +24,7 @@ jobs: run: make download-poetry - name: Set up pip cache - uses: actions/cache@v3.3.2 + uses: actions/cache@v4.0.0 with: path: ~/.cache/pypoetry/virtualenvs key: venv-${{ env.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4e301dd..8693742 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,20 +14,20 @@ repos: - id: check-added-large-files - repo: local hooks: - - id: isort - name: isort - entry: poetry run isort --settings-path pyproject.toml + - id: ruff-isort + name: ruff isort + entry: poetry run ruff check --fix --select I --config pyproject.toml --show-fixes types: [python] language: system stages: [commit, push] - - id: black - name: black - entry: poetry run black --config pyproject.toml + - id: ruff-format + name: ruff format + entry: poetry run ruff format --config pyproject.toml types: [python] language: system stages: [commit, push] - - id: ruff - name: ruff + - id: ruff-check + name: ruff check entry: poetry run ruff check --config pyproject.toml types: [python] language: system diff --git a/README.md b/README.md index 4583624..83369c3 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,10 @@

Vertex Pipelines Deployer

Deploy Vertex Pipelines within minutes

- This tool is a wrapper around kfp and google-cloud-aiplatform that allows you to check, compile, upload, run and schedule Vertex Pipelines in a standardized manner. + This tool is a wrapper around kfp and google-cloud-aiplatform that allows you to check, compile, upload, run, and schedule Vertex Pipelines in a standardized manner.

-
+
@@ -18,40 +18,40 @@ [![Pre-commit](https://img.shields.io/badge/pre--commit-enabled-informational?logo=pre-commit&logoColor=white)](https://github.com/ornikar/vertex-eduscore/blob/develop/.pre-commit-config.yaml) [![License](https://img.shields.io/github/license/artefactory/vertex-pipelines-deployer)](https://github.com/artefactory/vertex-pipelines-deployer/blob/main/LICENSE) -[![CI](https://github.com/artefactory/vertex-pipelines-deployer/actions/workflows/ci.yaml/badge.svg?branch%3Amain&event%3Apush)](https://github.com/artefactory/vertex-pipelines-deployer/actions/workflows/ci.yaml/badge.svg?query=branch%3Amain) -[![Release](https://github.com/artefactory/vertex-pipelines-deployer/actions/workflows/release.yaml/badge.svg?branch%3Amain&event%3Apush)](https://github.com/artefactory/vertex-pipelines-deployer/actions/workflows/release.yaml/badge.svg?query=branch%3Amain) +[![CI](https://github.com/artefactory/vertex-pipelines-deployer/actions/workflows/ci.yaml/badge.svg?branch=main&event=push)](https://github.com/artefactory/vertex-pipelines-deployer/actions/workflows/ci.yaml) +[![Release](https://github.com/artefactory/vertex-pipelines-deployer/actions/workflows/release.yaml/badge.svg?branch=main&event=push)](https://github.com/artefactory/vertex-pipelines-deployer/actions/workflows/release.yaml)
📚 Table of Contents - +
@@ -63,26 +63,22 @@ Three use cases: -1. **CI:** check pipeline validity. -1. **Dev mode:** quickly iterate over your pipelines by compiling and running them in multiple environments (test, dev, staging, etc) without duplicating code or looking for the right kfp / aiplatform snippet. -2. **CD:** deploy your pipelines to Vertex Pipelines in a standardized manner in your CD with Cloud Build or GitHub Actions. +1. **CI:** Check pipeline validity. +2. **Dev mode:** Quickly iterate over your pipelines by compiling and running them in multiple environments (test, dev, staging, etc.) without duplicating code or searching for the right kfp/aiplatform snippet. +3. **CD:** Deploy your pipelines to Vertex Pipelines in a standardized manner in your CD with Cloud Build or GitHub Actions. -Four commands: +Two main commands: -- `check`: check your pipelines (imports, compile, check configs validity against pipeline definition). -- `deploy`: compile, upload to Artifact Registry, run and schedule your pipelines. -- `config`: display the configuration from `pyproject.toml`. -- `create`: create a new pipeline and config files. -- `init`: initialize the project with necessary configuration files and directory structure. -- `list`: list all pipelines in the `vertex/pipelines` folder. +- `check`: Check your pipelines (imports, compile, check configs validity against pipeline definition). +- `deploy`: Compile, upload to Artifact Registry, run, and schedule your pipelines. ## 📋 Prerequisites -- Unix-like environment (Linux, macOS, WSL, etc...) +- Unix-like environment (Linux, macOS, WSL, etc.) - Python 3.8 to 3.10 - Google Cloud SDK - A GCP project with Vertex Pipelines enabled @@ -344,7 +340,8 @@ vertex-deployer deploy dummy_pipeline \ --tags my-tag \ --config-filepath vertex/configs/dummy_pipeline/config_test.json \ --experiment-name my-experiment \ - --enable-caching + --enable-caching \ + --skip-validation ``` ### ✅ CLI: Checking Pipelines are valid with `check` diff --git a/deployer/cli.py b/deployer/cli.py index 10f9daa..178a504 100644 --- a/deployer/cli.py +++ b/deployer/cli.py @@ -24,9 +24,10 @@ list_config_filepaths, load_config, load_vertex_settings, + validate_or_log_settings, ) -from deployer.utils.console import ask_user_for_model_fields -from deployer.utils.logging import LoguruLevel, console +from deployer.utils.console import ask_user_for_model_fields, console +from deployer.utils.logging import LoguruLevel from deployer.utils.utils import ( dict_to_repr, import_pipeline_from_dir, @@ -216,9 +217,18 @@ def deploy( # noqa: C901 resolve_path=True, ), ] = constants.DEFAULT_LOCAL_PACKAGE_PATH, + skip_validation: Annotated[ + bool, + typer.Option( + "--skip-validation / --no-skip", + "-y / -n", + help="Whether to continue without user validation of the settings.", + ), + ] = True, ): """Compile, upload, run and schedule pipelines.""" vertex_settings = load_vertex_settings(env_file=env_file) + validate_or_log_settings(vertex_settings, skip_validation=skip_validation, env_file=env_file) if schedule: if cron is None or cron == "": @@ -352,17 +362,18 @@ def check( if all: logger.info("Checking all pipelines") - pipelines_to_check = ctx.obj["pipeline_names"].__members__.values() + # unpack enum to get list of pipeline names + pipelines_to_check = [x.value for x in ctx.obj["pipeline_names"]] elif pipeline_name is not None: logger.info(f"Checking pipeline {pipeline_name}") pipelines_to_check = [pipeline_name] if config_filepath is None: to_check = { - p.value: list_config_filepaths(deployer_settings.config_root_path, p.value) + p: list_config_filepaths(deployer_settings.config_root_path, p) for p in pipelines_to_check } else: - to_check = {p.value: [config_filepath] for p in pipelines_to_check} + to_check = {p: [config_filepath] for p in pipelines_to_check} try: with console.status("Checking pipelines..."): diff --git a/deployer/pipeline_checks.py b/deployer/pipeline_checks.py index 0fd0412..d971903 100644 --- a/deployer/pipeline_checks.py +++ b/deployer/pipeline_checks.py @@ -85,7 +85,7 @@ def import_pipeline(self): try: _ = self.pipeline except (ImportError, ModuleNotFoundError) as e: - raise ValueError(f"Pipeline import failed: {e.__repr__()}") # noqa: B904 + raise ValueError(f"Pipeline import failed: {e}") from e return self @model_validator(mode="after") diff --git a/deployer/settings.py b/deployer/settings.py index b6e611e..86afe94 100644 --- a/deployer/settings.py +++ b/deployer/settings.py @@ -31,6 +31,7 @@ class _DeployerDeploySettings(CustomBaseModel): enable_caching: bool = False experiment_name: Optional[str] = None local_package_path: Path = constants.DEFAULT_LOCAL_PACKAGE_PATH + skip_validation: bool = True class _DeployerCheckSettings(CustomBaseModel): diff --git a/deployer/utils/config.py b/deployer/utils/config.py index 5da39a3..0ebb4ef 100644 --- a/deployer/utils/config.py +++ b/deployer/utils/config.py @@ -5,11 +5,15 @@ from typing import List, Optional, Tuple, Union import tomlkit.items +from loguru import logger from pydantic import ValidationError from pydantic_settings import BaseSettings, SettingsConfigDict +from rich.prompt import Confirm +from rich.table import Table from tomlkit import TOMLDocument from tomlkit.toml_file import TOMLFile +from deployer.utils.console import console from deployer.utils.exceptions import BadConfigError, UnsupportedConfigFileError @@ -28,6 +32,7 @@ def load_vertex_settings(env_file: Optional[Path] = None) -> VertexPipelinesSett """Load the settings from the environment.""" try: settings = VertexPipelinesSettings(_env_file=env_file, _env_file_encoding="utf-8") + print(settings) except ValidationError as e: msg = "Validation failed for VertexPipelinesSettings. " if env_file is not None: @@ -39,6 +44,42 @@ def load_vertex_settings(env_file: Optional[Path] = None) -> VertexPipelinesSett return settings +def validate_or_log_settings( + settings: VertexPipelinesSettings, + skip_validation: bool, + env_file: Optional[Path] = None, +) -> None: + """Validate the settings or log them if validation is skipped. + + Args: + settings (VertexPipelinesSettings): The settings to validate or log. + skip_validation (bool): Whether to skip validation. + env_file (Optional[Path], optional): The path to the environment file. Defaults to None. + + Raises: + ValueError: If the user chooses to exit. + """ + msg = "Loaded settings from environment" + if env_file is not None: + msg += f" and `.env` file: `{env_file}`." + + if skip_validation: + msg += "\nLoaded settings for Vertex:" + msg += "\n" + "\n".join(f" {k:<30} {v:<30}" for k, v in settings.model_dump().items()) + logger.info(msg) + else: + table = Table(show_header=True, header_style="bold", show_lines=True) + table.add_column("Setting Name") + table.add_column("Value") + for k, v in settings.model_dump().items(): + table.add_row(k, v) + + console.print(msg) + console.print(table) + if not Confirm.ask("Do you want to continue with these settings? ", console=console): + raise ValueError("User chose to exit") + + class ConfigType(str, Enum): # noqa: D101 json = "json" py = "py" diff --git a/deployer/utils/console.py b/deployer/utils/console.py index 3f85ffb..029bd97 100644 --- a/deployer/utils/console.py +++ b/deployer/utils/console.py @@ -3,8 +3,11 @@ from typing import Type from pydantic import BaseModel +from rich.console import Console from rich.prompt import Prompt +console = Console() + def ask_user_for_model_fields(model: Type[BaseModel]) -> dict: """Ask user for model fields and return a dictionary with the results. diff --git a/deployer/utils/logging.py b/deployer/utils/logging.py index 3197e07..5252445 100644 --- a/deployer/utils/logging.py +++ b/deployer/utils/logging.py @@ -1,7 +1,6 @@ from enum import Enum from loguru import logger -from rich.console import Console class LoguruLevel(str, Enum): # noqa: D101 @@ -25,6 +24,3 @@ def __enter__(self) -> None: # noqa: D105 def __exit__(self, exc_type, exc_val, exc_tb) -> None: # noqa: D105 logger.enable(self.name) - - -console = Console() diff --git a/deployer/utils/models.py b/deployer/utils/models.py index c29a4ab..92fa7c7 100644 --- a/deployer/utils/models.py +++ b/deployer/utils/models.py @@ -46,7 +46,7 @@ def create_model_from_func( func_model = create_model( __model_name=model_name, __base__=CustomBaseModel, - **{name: (annotation, ...) for name, annotation in func_typing.items()} + **{name: (annotation, ...) for name, annotation in func_typing.items()}, ) return func_model diff --git a/deployer/utils/utils.py b/deployer/utils/utils.py index da7d557..1567345 100644 --- a/deployer/utils/utils.py +++ b/deployer/utils/utils.py @@ -1,14 +1,16 @@ import importlib +import traceback import warnings from enum import Enum from pathlib import Path -from typing import Any, Callable, Dict, List, Mapping, Optional, Protocol +from types import TracebackType +from typing import Any, Callable, Dict, List, Mapping, Optional, Protocol, Union from loguru import logger from pydantic import ValidationError from rich.table import Table -from deployer.utils.logging import console +from deployer.utils.console import console from deployer.utils.models import ChecksTableRow @@ -37,19 +39,70 @@ def __call__( # noqa: D102 ... +def filter_lines_from(tb: TracebackType, target_file: Union[Path, str]) -> str: + """Filters a traceback object to only show the lines from a specific file. + + Traceback objects can contain lines from multiple files (e.g. when a function is called from a + different module).This function removes all the lines that are not related to the target file. + + Example: + >>> traceback.print_tb(tb) + File "path/to/file3.py", line 30, in + call_function_from_file2() + File "path/to/file2.py", line 20, in call_function_from_file2 + call_function_from_file1() + File "path/to/file1.py", line 10, in call_function_from_file1 + raise ValueError("Something went wrong.") + >>> filter_lines_from(tb, "path/to/file2.py") + File "path/to/file2.py", line 20, in call_function_from_file2 + call_function_from_file1() + + Args: + tb (TracebackType): the traceback object to be filtered. + target_file (Path | str): the file from which to show the traceback lines. + + Raises: + TypeError: if target_file is not a Path or a str. + + Returns: + str: a string containing the filtered traceback. + """ + # ensure that the path is absolute + if isinstance(target_file, Path): + target_file = str(target_file.resolve()) + elif isinstance(target_file, str): + target_file = str(Path(target_file).resolve()) + else: + raise TypeError(f"target_file should be a Path or a str, but got {type(target_file)}.") + + filtered_traceback: list[traceback.FrameSummary] = [ + frame for frame in traceback.extract_tb(tb) if target_file in frame.filename + ] + + if filtered_traceback: + string_filtered_traceback = "".join(traceback.format_list(filtered_traceback)) + else: + string_filtered_traceback = "Could not find potential source of error." + + return string_filtered_traceback + + def import_pipeline_from_dir(dirpath: Path, pipeline_name: str) -> GraphComponentType: """Import a pipeline from a directory.""" dirpath_ = Path(dirpath).resolve().relative_to(Path.cwd()) parent_module = ".".join(dirpath_.parts) - module_path = f"{parent_module}.{pipeline_name}" + module_import_path = f"{parent_module}.{pipeline_name}" # used with import statements + module_folder_path = dirpath_ / f"{pipeline_name}.py" # used as a path to a file try: - pipeline_module = importlib.import_module(module_path) + pipeline_module = importlib.import_module(module_import_path) except ModuleNotFoundError as e: raise e except Exception as e: raise ImportError( - f"Error while importing pipeline from {module_path}: {e.__repr__()}" + f"Error while importing pipeline from {module_import_path}: \n {type(e).__name__}:" + f"{e} \n\nPotential sources of error:\n" + f"{filter_lines_from(e.__traceback__, module_folder_path)}" ) from e try: @@ -57,20 +110,20 @@ def import_pipeline_from_dir(dirpath: Path, pipeline_name: str) -> GraphComponen if pipeline is None: pipeline = pipeline_module.pipeline warnings.warn( - f"Pipeline in `{module_path}` is named `pipeline` instead of `{pipeline_name}`. " - "This is deprecated and will be removed in a future version. " + f"Pipeline in `{module_import_path}` is named `pipeline` instead of " + f"`{pipeline_name}`. This is deprecated and will be removed in a future version. " f"Please rename your pipeline to `{pipeline_name}`.", FutureWarning, stacklevel=1, ) except AttributeError as e: raise ImportError( - f"Pipeline object not found in `{module_path}`. " + f"Pipeline object not found in `{module_import_path}`. " "Please check that the pipeline is correctly defined and named." f"It should be named `{pipeline_name}` or `pipeline` (deprecated)." ) from e - logger.debug(f"Pipeline {module_path} imported successfully.") + logger.debug(f"Pipeline {module_import_path} imported successfully.") return pipeline diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index 3951277..8c41c18 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -118,6 +118,7 @@ $ vertex-deployer deploy [OPTIONS] PIPELINE_NAME * `--enable-caching, -ec / --no-enable-caching`: Whether to enable caching when running the pipeline. [default: no-enable-caching] * `--experiment-name, -en TEXT`: The name of the experiment to run the pipeline in. Defaults to '{pipeline_name}-experiment'. * `--local-package-path, -lpp PATH`: Local dir path where pipelines will be compiled. [default: vertex/pipelines/compiled_pipelines] +* `--skip-validation, -y / --no-skip, -n`: Whether to skip the confirmation prompt. [default: yes] * `--help`: Show this message and exit. ## `vertex-deployer init` diff --git a/docs/advanced_user_guide.md b/docs/advanced_user_guide.md index 961ebaa..1f051ea 100644 --- a/docs/advanced_user_guide.md +++ b/docs/advanced_user_guide.md @@ -53,7 +53,7 @@ This command includes the following: Now that you have a base image, you can compile your pipeline and trigger a run that will use the latest version of your docker base image ```bash -vertex-deployer deploy --compile --run --env-file .env --config-name my_config.json +vertex-deployer deploy --compile --run --env-file .env --config-name my_config.json --skip-validation ``` ## ✅ CI: Check your pipelines and config integrity diff --git a/docs/usage.md b/docs/usage.md index 7600a1b..51a227d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -10,7 +10,8 @@ --tags my-tag \ --config-filepath vertex/configs/dummy_pipeline/config_test.json \ --experiment-name my-experiment \ - --enable-caching + --enable-caching \ + --skip-validation ``` Check pipelines: diff --git a/example/requirements.txt b/example/requirements.txt index 2bc90b1..8f3236c 100644 --- a/example/requirements.txt +++ b/example/requirements.txt @@ -5,4 +5,4 @@ pandas<2.0.0 --extra-index-url https://europe-west1-python.pkg.dev/data-sandbox-fr/artefactory/simple kfp google-cloud-aiplatform -vertex_deployer==0.3.2 +vertex_deployer==0.4.0 diff --git a/pyproject.toml b/pyproject.toml index 20f72f9..0741e5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,12 +24,10 @@ toml = "^0.10" tomlkit = "^0.12" [tool.poetry.group.dev.dependencies] -black = "^23.7" -isort = "^5.12" -pytest = "^7.4" +pytest = "^8.0" pre-commit = "^3.3" ipykernel = "^6.9" -nbstripout = "^0.6" +nbstripout = "^0.7" ruff = "^0.1" pytest-cov = "^4.1" codespell = "^2.2" @@ -46,21 +44,14 @@ profiling = ["pyinstrument"] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" -[tool.black] -python_version = "3.10" -line-length = 99 - -[tool.isort] -profile = "black" -known_first_party = ["vertex", "tests"] - [tool.ruff] +target-version = "py38" +line-length = 99 ignore = [ "D100", "D205", "D415", ] -line-length = 99 select = [ "B", # bugbear "C4", # comprehensions @@ -71,12 +62,16 @@ select = [ "W", # flake8 "S", # bandit "N", # pep8-naming - "RUF" # ruff + "RUF", # ruff + "I", # isort ] [tool.ruff.pydocstyle] convention = "google" +[tool.ruff.lint.isort] +known-first-party = ["vertex", "tests", "deployer"] + [tool.ruff.per-file-ignores] "*cli.py" = ["D", "B008"] "*__init__.py" = [ diff --git a/tests/conftest.py b/tests/conftest.py index 4ef25c2..718f804 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,3 +14,9 @@ def dummy_pipeline(name: str, artifact: Input[Artifact]) -> None: dummy_component(name=name, artifact=artifact) return dummy_pipeline + + +try: + raise Exception("This is an exception.") +except Exception as e: + exception_traceback = e.__traceback__ diff --git a/tests/unit_tests/test_utils.py b/tests/unit_tests/test_utils.py index c6b158f..76d64ac 100644 --- a/tests/unit_tests/test_utils.py +++ b/tests/unit_tests/test_utils.py @@ -2,8 +2,9 @@ from unittest.mock import patch import pytest +from conftest import exception_traceback -from deployer.utils.utils import make_enum_from_python_package_dir +from deployer.utils.utils import filter_lines_from, make_enum_from_python_package_dir class TestMakeEnumFromPythonPackageDir: @@ -63,3 +64,53 @@ def test_directory_does_not_exist(self, mock_glob): make_enum_from_python_package_dir(dir_path, raise_if_not_found=True) mock_glob.assert_not_called() + + +class TestFilterLinesFrom: + try: + raise Exception("This is an exception.") + except Exception as e: + traceback = e.__traceback__ + + def test_pathlib_input(self): + # Given + internal_path = Path(__file__) + external_path = Path("tests/conftest.py") + + # When + internal_output = filter_lines_from(self.traceback, internal_path) + external_output = filter_lines_from(exception_traceback, external_path) + assert internal_output == ( + f' File "{internal_path}", line 71, in TestFilterLinesFrom\n' + ' raise Exception("This is an exception.")\n' + ) + assert external_output == ( + f' File "{external_path.resolve()}", line 20, in \n' + ' raise Exception("This is an exception.")\n' + ) + + def test_string_input(self): + # Given + internal_path = str(Path(__file__)) + external_path = "tests/conftest.py" + + # When + internal_output = filter_lines_from(self.traceback, internal_path) + external_output = filter_lines_from(exception_traceback, external_path) + print(internal_output) + assert internal_output == ( + f' File "{internal_path}", line 71, in TestFilterLinesFrom\n' + ' raise Exception("This is an exception.")\n' + ) + assert external_output == ( + f' File "{Path(external_path).resolve()}", line 20, in \n' + ' raise Exception("This is an exception.")\n' + ) + + def test_empty_result(self): + # Given + path = Path(__file__) + + # When + output = filter_lines_from(exception_traceback, path) + assert output == "Could not find potential source of error."