From 56d69f418d40d0a30007e6b66af212409e2717fc Mon Sep 17 00:00:00 2001 From: Jules Bertrand <33326907+julesbertrand@users.noreply.github.com> Date: Mon, 6 Nov 2023 11:12:52 +0100 Subject: [PATCH] feat: configure with pyproject (#82) --- .github/workflows/ci.yaml | 9 ++- README.md | 64 ++++++++++++++++++++- deployer/cli.py | 87 ++++++++++++++++++++++------ deployer/configuration.py | 105 ++++++++++++++++++++++++++++++++++ deployer/pipeline_checks.py | 16 +++--- deployer/pipeline_deployer.py | 11 ++-- deployer/utils/exceptions.py | 4 ++ deployer/utils/utils.py | 33 ++++++++++- docs/CLI_REFERENCE.md | 15 +++++ pyproject.toml | 7 +++ 10 files changed, 316 insertions(+), 35 deletions(-) create mode 100644 deployer/configuration.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f3920ce..5389d50 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -87,6 +87,11 @@ jobs: cd example poetry run vertex-deployer --version + - name: Test config command + run: | + cd example + poetry run vertex-deployer config --all + - name: Test list command run: | cd example @@ -95,7 +100,7 @@ jobs: - name: Test check command run: | cd example - poetry run vertex-deployer -log DEBUG check --all + poetry run vertex-deployer check --all - name: Test deploy command # Cannot check more than compile action here, need GCP environment for upload, run, schedule @@ -107,4 +112,4 @@ jobs: run: | cd example poetry run vertex-deployer create test_pipeline --config-type py - [ -e vertex/pipelines/test_pipeline.py ] && echo 1 || echo 0 + [ -e example/vertex/pipelines/test_pipeline.py ] && echo 1 || echo 0 diff --git a/README.md b/README.md index eb873c7..840c11b 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,9 @@ - [CLI: Other commands](#🛠️-cli-other-commands) - [`create`](#create) - [`list`](#list) - - [CLI: Options](#🍭-cli-options) + - [`config`](#config) + - [CLI: Options](#cli-options) +- [Configuration](#configuration) [Full CLI documentation](docs/CLI_REFERENCE.md) @@ -208,7 +210,8 @@ vertex You must have at least these files. If you need to share some config elements between pipelines, you can have a `shared` folder in `configs` and import them in your pipeline configs. - You can use the [`create`](../usage#create) command to create a new pipeline and config files. + If you're following a different folder structure, you can change the default paths in the `pyproject.toml` file. + See [Configuration](#configuration) section for more information. #### Pipelines @@ -328,6 +331,14 @@ You can list all pipelines in the `vertex/pipelines` folder using the `list` com vertex-deployer list --with-configs ``` +#### `config` + +You can check your `vertex-deployer` configuration options using the `config` command. +Fields set in `pyproject.toml` will overwrite default values and will be displayed differently: +```bash +vertex-deployer config --all +``` + ### 🍭 CLI: Options ```bash @@ -346,6 +357,54 @@ vertex-deployer --log-level DEBUG deploy ... +## Configuration + +You can configure the deployer using the `pyproject.toml` file to better fit your needs. +This will overwrite default values. It can be usefull if you always use the same options, e.g. always the same `--scheduler-timezone` + +```toml +[tool.vertex-deployer] +pipelines_root_path = "my/path/to/vertex/pipelines" +configs_root_path = "my/path/to/vertex/configs" +log_level = "INFO" + +[tool.vertex-deployer.deploy] +scheduler_timezone = "Europe/Paris" +``` + +You can display all the configurable parameterss with default values by running: +```bash +$ vertex-deployer config --all +'*' means the value was set in config file + +* pipelines_root_path=my/path/to/vertex/pipelines +* config_root_path=my/path/to/vertex/configs +* log_level=INFO +deploy + env_file=None + compile=True + upload=False + run=False + schedule=False + cron=None + delete_last_schedule=False + * scheduler_timezone=Europe/Paris + tags=['latest'] + config_filepath=None + config_name=None + enable_caching=False + experiment_name=None + local_package_path=vertex/pipelines/compiled_pipelines +check + all=False + config_filepath=None + raise_error=False +list + with_configs=True +create + config_type=json +``` + ## Repository Structure ``` @@ -360,6 +419,7 @@ vertex-deployer --log-level DEBUG deploy ... ├─ deployer # Source code │ ├─ __init__.py │ ├─ cli.py +│ ├─ configuration.py │ ├─ constants.py │ ├─ pipeline_checks.py │ ├─ pipeline_deployer.py diff --git a/deployer/cli.py b/deployer/cli.py index ba0d504..6499add 100644 --- a/deployer/cli.py +++ b/deployer/cli.py @@ -2,17 +2,18 @@ from pathlib import Path from typing import List +import rich.traceback import typer from loguru import logger from pydantic import ValidationError from typing_extensions import Annotated +from deployer.configuration import load_configuration from deployer.constants import ( - CONFIG_ROOT_PATH, DEFAULT_LOCAL_PACKAGE_PATH, + DEFAULT_SCHEDULER_TIMEZONE, DEFAULT_TAGS, PIPELINE_MINIMAL_TEMPLATE, - PIPELINE_ROOT_PATH, PYTHON_CONFIG_TEMPLATE, ) from deployer.utils.config import ( @@ -23,12 +24,19 @@ ) from deployer.utils.logging import LoguruLevel, console from deployer.utils.utils import ( + dict_to_repr, import_pipeline_from_dir, make_enum_from_python_package_dir, print_check_results_table, print_pipelines_list, ) +rich.traceback.install() + +deployer_config = load_configuration() + +PipelineName = make_enum_from_python_package_dir(deployer_config.pipelines_root_path) + def display_version_and_exit(value: bool): if value: @@ -38,12 +46,16 @@ def display_version_and_exit(value: bool): raise typer.Exit() -app = typer.Typer(no_args_is_help=True, rich_help_panel="rich", rich_markup_mode="markdown") +app = typer.Typer( + no_args_is_help=True, + rich_help_panel="rich", + rich_markup_mode="markdown", + context_settings={"default_map": deployer_config.model_dump()}, +) -@app.callback(name="set_logger") -def cli_set_logger( - ctx: typer.Context, +@app.callback(invoke_without_command=True) +def main( log_level: Annotated[ LoguruLevel, typer.Option("--log-level", "-log", help="Set the logging level.") ] = LoguruLevel.INFO, @@ -60,7 +72,26 @@ def cli_set_logger( logger.configure(handlers=[{"sink": sys.stderr, "level": log_level}]) -PipelineName = make_enum_from_python_package_dir(PIPELINE_ROOT_PATH) +@app.command() +def config( + all: Annotated[ + bool, typer.Option("--all", "-a", help="Whether to display all configuration values.") + ] = False, +): + """Display the configuration from pyproject.toml.""" + + if all: + config_repr = dict_to_repr( + dict_=deployer_config.model_dump(), + subdict=deployer_config.model_dump(exclude_unset=True), + ) + config_str = "[italic]'*' means the value was set in config file[/italic]\n\n" + config_str += "\n".join(config_repr) + else: + config_repr = dict_to_repr(dict_=deployer_config.model_dump(exclude_unset=True)) + config_str = "\n".join(config_repr) + + console.print(config_str) @app.command(no_args_is_help=True) @@ -116,6 +147,13 @@ def deploy( # noqa: C901 help="Whether to delete the previous schedule before creating a new one.", ), ] = False, + scheduler_timezone: Annotated[ + str, + typer.Option( + help="Timezone for scheduling the pipeline." + " Must be a valid string from IANA time zone database", + ), + ] = DEFAULT_SCHEDULER_TIMEZONE, tags: Annotated[ List[str], typer.Option(help="The tags to use when uploading the pipeline.") ] = DEFAULT_TAGS, @@ -186,7 +224,9 @@ def deploy( # noqa: C901 " Please specify only one to run or schedule a pipeline." ) - pipeline_func = import_pipeline_from_dir(PIPELINE_ROOT_PATH, pipeline_name.value) + pipeline_func = import_pipeline_from_dir( + deployer_config.pipelines_root_path, pipeline_name.value + ) from deployer.pipeline_deployer import VertexPipelineDeployer @@ -204,7 +244,9 @@ def deploy( # noqa: C901 if run or schedule: if config_name is not None: - config_filepath = Path(CONFIG_ROOT_PATH) / pipeline_name.value / config_name + config_filepath = ( + Path(deployer_config.config_root_path) / pipeline_name.value / config_name + ) parameter_values, input_artifacts = load_config(config_filepath) if compile: @@ -234,6 +276,7 @@ def deploy( # noqa: C901 parameter_values=parameter_values, tag=tags[0] if tags else None, delete_last_schedule=delete_last_schedule, + scheduler_timezone=scheduler_timezone, ) @@ -289,7 +332,7 @@ def check( if len(PipelineName.__members__) == 0: raise ValueError( "No pipeline found. Please check that the pipeline root path is correct" - f" ('{PIPELINE_ROOT_PATH}')" + f" ('{deployer_config.pipelines_root_path}')" ) from deployer.pipeline_checks import Pipelines @@ -302,7 +345,8 @@ def check( pipelines_to_check = [pipeline_name] if config_filepath is None: to_check = { - p.value: list_config_filepaths(CONFIG_ROOT_PATH, p.value) for p in pipelines_to_check + p.value: list_config_filepaths(deployer_config.config_root_path, p.value) + for p in pipelines_to_check } else: to_check = {p.value: [config_filepath] for p in pipelines_to_check} @@ -312,7 +356,12 @@ def check( Pipelines.model_validate( { "pipelines": { - p: {"pipeline_name": p, "config_paths": config_filepaths} + p: { + "pipeline_name": p, + "config_paths": config_filepaths, + "pipelines_root_path": deployer_config.pipelines_root_path, + "config_root_path": deployer_config.config_root_path, + } for p, config_filepaths in to_check.items() } } @@ -339,13 +388,13 @@ def list( if len(PipelineName.__members__) == 0: logger.warning( "No pipeline found. Please check that the pipeline root path is" - f" correct (current: '{PIPELINE_ROOT_PATH}')" + f" correct (current: '{deployer_config.pipelines_root_path}')" ) raise typer.Exit() if with_configs: pipelines_dict = { - p.name: list_config_filepaths(CONFIG_ROOT_PATH, p.name) + p.name: list_config_filepaths(deployer_config.config_root_path, p.name) for p in PipelineName.__members__.values() } else: @@ -368,18 +417,18 @@ def create( """Create files structure for a new pipeline.""" logger.info(f"Creating pipeline {pipeline_name}") - if not Path(PIPELINE_ROOT_PATH).is_dir(): + if not Path(deployer_config.pipelines_root_path).is_dir(): raise FileNotFoundError( - f"Pipeline root path '{PIPELINE_ROOT_PATH}' does not exist." + f"Pipeline root path '{deployer_config.pipelines_root_path}' does not exist." " Please check that the pipeline root path is correct" - f" or create it with `mkdir -p {PIPELINE_ROOT_PATH}`." + f" or create it with `mkdir -p {deployer_config.pipelines_root_path}`." ) - pipeline_filepath = Path(PIPELINE_ROOT_PATH) / f"{pipeline_name}.py" + pipeline_filepath = Path(deployer_config.pipelines_root_path) / f"{pipeline_name}.py" pipeline_filepath.touch(exist_ok=False) pipeline_filepath.write_text(PIPELINE_MINIMAL_TEMPLATE.format(pipeline_name=pipeline_name)) - config_dirpath = Path(CONFIG_ROOT_PATH) / pipeline_name + config_dirpath = Path(deployer_config.config_root_path) / pipeline_name config_dirpath.mkdir(exist_ok=False) for config_name in ["test", "dev", "prod"]: config_filepath = config_dirpath / f"{config_name}.{config_type}" diff --git a/deployer/configuration.py b/deployer/configuration.py new file mode 100644 index 0000000..f07fbff --- /dev/null +++ b/deployer/configuration.py @@ -0,0 +1,105 @@ +from functools import lru_cache +from pathlib import Path +from typing import Any, Dict, List, Optional + +import toml +from loguru import logger +from pydantic import ValidationError + +from deployer import constants +from deployer.utils.config import ConfigType +from deployer.utils.exceptions import InvalidPyProjectTOMLError +from deployer.utils.models import CustomBaseModel + + +class DeployerDeployConfig(CustomBaseModel): + """Configuration for Vertex Deployer `deploy` command.""" + + env_file: Optional[Path] = None + compile: bool = True + upload: bool = False + run: bool = False + schedule: bool = False + cron: Optional[str] = None + delete_last_schedule: bool = False + scheduler_timezone: str = constants.DEFAULT_SCHEDULER_TIMEZONE + tags: List[str] = constants.DEFAULT_TAGS + config_filepath: Optional[Path] = None + config_name: Optional[str] = None + enable_caching: bool = False + experiment_name: Optional[str] = None + local_package_path: Path = constants.DEFAULT_LOCAL_PACKAGE_PATH + + +class DeployerCheckConfig(CustomBaseModel): + """Configuration for Vertex Deployer `check` command.""" + + all: bool = False + config_filepath: Optional[Path] = None + raise_error: bool = False + + +class DeployerListConfig(CustomBaseModel): + """Configuration for Vertex Deployer `list` command.""" + + with_configs: bool = False + + +class DeployerCreateConfig(CustomBaseModel): + """Configuration for Vertex Deployer `create` command.""" + + config_type: ConfigType = ConfigType.json + + +class DeployerConfig(CustomBaseModel): + """Configuration for Vertex Deployer.""" + + pipelines_root_path: Path = constants.PIPELINE_ROOT_PATH + config_root_path: Path = constants.CONFIG_ROOT_PATH + log_level: str = "INFO" + deploy: DeployerDeployConfig = DeployerDeployConfig() + check: DeployerCheckConfig = DeployerCheckConfig() + list: DeployerListConfig = DeployerListConfig() + create: DeployerCreateConfig = DeployerCreateConfig() + + +def find_pyproject_toml(path_project_root: Path) -> Optional[str]: + """Find the pyproject.toml file.""" + path_pyproject_toml = path_project_root / "pyproject.toml" + if path_pyproject_toml.is_file(): + if path_pyproject_toml.exists(): + return str(path_pyproject_toml) + return None + + +def parse_pyproject_toml(path_pyproject_toml: str) -> Dict[str, Any]: + """Parse a pyproject toml file, pulling out relevant parts for Deployer.""" + pyproject_toml = toml.load(path_pyproject_toml) + config: dict[str, Any] = pyproject_toml.get("tool", {}).get("vertex_deployer", {}) + config = {k.replace("--", "").replace("-", "_"): v for k, v in config.items()} + return config + + +@lru_cache() +def load_configuration() -> DeployerConfig: + """Load the configuration for Vertex Deployer.""" + path_project_root = Path.cwd().resolve() + path_pyproject_toml = find_pyproject_toml(path_project_root) + + if path_pyproject_toml is None: + logger.debug( + "No pyproject.toml file found. Using default configuration for Vertex Deployer." + ) + config = {} + else: + config = parse_pyproject_toml(path_pyproject_toml) + + try: + config = DeployerConfig(**config) + except ValidationError as e: + msg = f"In {path_pyproject_toml}:\n{e}\n" + msg += "Please check your configuration file." + + raise InvalidPyProjectTOMLError(msg) from e + + return config diff --git a/deployer/pipeline_checks.py b/deployer/pipeline_checks.py index 7d63390..b7bd8a9 100644 --- a/deployer/pipeline_checks.py +++ b/deployer/pipeline_checks.py @@ -8,11 +8,7 @@ from pydantic_core import PydanticCustomError from typing_extensions import Annotated -from deployer.constants import ( - CONFIG_ROOT_PATH, - PIPELINE_ROOT_PATH, - TEMP_LOCAL_PACKAGE_PATH, -) +from deployer.constants import TEMP_LOCAL_PACKAGE_PATH from deployer.pipeline_deployer import VertexPipelineDeployer from deployer.utils.config import list_config_filepaths, load_config from deployer.utils.exceptions import BadConfigError @@ -53,13 +49,17 @@ class Pipeline(CustomBaseModel): pipeline_name: str config_paths: Annotated[List[Path], Field(validate_default=True)] = None + pipelines_root_path: Path + config_root_path: Path @model_validator(mode="before") @classmethod def populate_config_names(cls, data: Any) -> Any: """Populate config names before validation""" if data.get("config_paths") is None: - data["config_paths"] = list_config_filepaths(CONFIG_ROOT_PATH, data["pipeline_name"]) + data["config_paths"] = list_config_filepaths( + str(data["config_root_path"]), data["pipeline_name"] + ) return data @computed_field @@ -67,7 +67,9 @@ def pipeline(self) -> Any: """Import pipeline""" if getattr(self, "_pipeline", None) is None: with disable_logger("deployer.utils.utils"): - self._pipeline = import_pipeline_from_dir(PIPELINE_ROOT_PATH, self.pipeline_name) + self._pipeline = import_pipeline_from_dir( + str(self.pipelines_root_path), self.pipeline_name + ) return self._pipeline @model_validator(mode="after") diff --git a/deployer/pipeline_deployer.py b/deployer/pipeline_deployer.py index 2ab3ce6..2b4b608 100644 --- a/deployer/pipeline_deployer.py +++ b/deployer/pipeline_deployer.py @@ -11,7 +11,7 @@ from loguru import logger from requests import HTTPError -from deployer.constants import DEFAULT_LOCAL_PACKAGE_PATH, DEFAULT_SCHEDULER_TIMEZONE +from deployer.constants import DEFAULT_LOCAL_PACKAGE_PATH from deployer.utils.exceptions import ( MissingGoogleArtifactRegistryHostError, TagNotFoundError, @@ -221,6 +221,7 @@ def schedule( parameter_values: Optional[dict] = None, tag: Optional[str] = None, delete_last_schedule: bool = False, + scheduler_timezone: str = "Europe/Paris", ) -> VertexPipelineDeployer: """Create pipeline schedule on Vertex AI Pipelines @@ -228,12 +229,14 @@ def schedule( and if either the tag or the template_name and version_name are provided. Args: - cron (str): Cron expression without TZ. TZ is hardcoded to 'TZ=Europe/Paris'. + cron (str): Cron expression without TZ. enable_caching (bool, optional): Whether to enable caching. Defaults to False. parameter_values (dict, optional): Pipeline parameter values. Defaults to None. tag (str, optional): Tag of the pipeline template. Defaults to None. delete_last_schedule (bool, optional): Whether to delete previous schedule. Defaults to False. + scheduler_timezone (str, optional): Scheduler timezone. Must be a valid string from + IANA time zone database. Defaults to 'Europe/Paris'. """ self._check_gar_host() @@ -290,9 +293,9 @@ def schedule( display_name=schedule_display_name, location=self.region, ) - # TZ must be a valid string from IANA time zone database + pipeline_job_schedule.create( - cron=f"TZ={DEFAULT_SCHEDULER_TIMEZONE} {cron}", + cron=f"TZ={scheduler_timezone} {cron}", service_account=self.service_account, ) diff --git a/deployer/utils/exceptions.py b/deployer/utils/exceptions.py index 3baeb49..a6e7278 100644 --- a/deployer/utils/exceptions.py +++ b/deployer/utils/exceptions.py @@ -12,3 +12,7 @@ class UnsupportedConfigFileError(Exception): class BadConfigError(ValueError): """Raised when a config is invalid.""" + + +class InvalidPyProjectTOMLError(Exception): + """Raised when the configuration is invalid.""" diff --git a/deployer/utils/utils.py b/deployer/utils/utils.py index 1b8acfb..a8e7991 100644 --- a/deployer/utils/utils.py +++ b/deployer/utils/utils.py @@ -2,7 +2,7 @@ import warnings from enum import Enum from pathlib import Path -from typing import Dict, Optional +from typing import Dict, List, Mapping, Optional from kfp.components import graph_component from loguru import logger @@ -64,6 +64,37 @@ def import_pipeline_from_dir(dirpath: Path, pipeline_name: str) -> graph_compone return pipeline +def dict_to_repr(dict_: dict, subdict: dict = None, depth: int = 0, indent: int = 2) -> List[str]: + """Convert a dictionary to a list of strings for printing, recursively. + + Args: + dict_ (dict): The dictionary to convert. + subdict (dict, optional): A subdictionary to highlight in the output. + Defaults to {}. + depth (int, optional): The depth of the dictionary. Defaults to 0. + indent (int, optional): The indentation level. Defaults to 2. + + Returns: + list[str]: A list of strings representing the dictionary. + """ + if subdict is None: + subdict = {} + + dict_repr = [] + for k, v in dict_.items(): + if isinstance(v, Mapping): + v_ref = subdict.get(k, {}) + dict_repr.append(" " * indent * depth + f"{k}") + dict_repr.extend(dict_to_repr(v, v_ref, depth=depth + 1, indent=indent)) + else: + if subdict.get(k): + v_str = " " * indent * depth + f"[cyan]* {k}={v}[/cyan]" + else: + v_str = " " * indent * depth + f"[white]{k}={v}[/white]" + dict_repr.append(v_str) + return dict_repr + + def print_pipelines_list(pipelines_dict: Dict[str, list], with_configs: bool = False) -> None: """This function prints a table of pipelines to the console. diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index 79b00a4..163afb6 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -53,6 +53,21 @@ $ vertex-deployer check [OPTIONS] * `--raise-error, -re / --no-raise-error, -nre`: Whether to raise an error if the pipeline is not valid. [default: no-raise-error] * `--help`: Show this message and exit. +## `vertex-deployer config` + +Display the configuration from pyproject.toml. + +**Usage**: + +```console +$ vertex-deployer config [OPTIONS] +``` + +**Options**: + +* `-a, --all`: Whether to display all configuration values. +* `--help`: Show this message and exit. + ## `vertex-deployer create` Create files structure for a new pipeline. diff --git a/pyproject.toml b/pyproject.toml index 4ae737e..09b87f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,3 +90,10 @@ tag_format = "{version}" [tool.semantic_release.changelog] exclude_commit_patterns = ['''^chore\(release\).*'''] + + +[tool.vertex_deployer] +log_level = "DEBUG" + +[tool.vertex_deployer.list] +with_configs = true