diff --git a/.github/workflows/deploy_docs.yaml b/.github/workflows/deploy_docs.yaml new file mode 100644 index 0000000..50ccd49 --- /dev/null +++ b/.github/workflows/deploy_docs.yaml @@ -0,0 +1,45 @@ +name: Deploy MkDocs to GitHub Pages + +on: + push: + branches: + - main + +env: + python-version: "3.10" + +jobs: + deploy-docs: + name: Deploy docs + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Set up Python ${{ env.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ env.python-version }} + + - name: Install poetry + run: make download-poetry + + - name: Set up pip cache + uses: actions/cache@v3.2.4 + with: + path: ~/.cache/pypoetry/virtualenvs + key: venv-${{ env.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }} + + - name: Set Poetry Path + run: | + echo "$HOME/.poetry/bin" >> $GITHUB_PATH + + + - name: Install requirements + run: | + poetry run pip install --upgrade pip + poetry install --with docs + + - name: Deploying MkDocs documentation + run: | + poetry run mkdocs build + poetry run mkdocs gh-deploy --force diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 120ba82..77d3083 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -32,8 +32,8 @@ To develop, you will need dev requirements too. Run: make install-dev-requirements ``` -> **Note** -> `poetry.lock` is not committed deliberately, as recommended by Poetry's doc. You can read more about it [here](https://python-poetry.org/docs/basic-usage/#as-a-library-developer). +!!! note "About poetry.lock" + `poetry.lock` is not committed deliberately, as recommended by Poetry's doc. You can read more about it [here](https://python-poetry.org/docs/basic-usage/#as-a-library-developer). ### Codestyle @@ -69,6 +69,11 @@ The Release GitHub Action does the following: The action is triggered by any push to main. -> [!NOTE] -> The release action will be triggered by any push to `main` only if the 'CI' job in the 'release.yaml' workflow succeeds. -> Python Semantic Release will take care of version number update, tag creation and release creation. +!!! tip + The release action will be triggered by any push to `main` only if the 'CI' job in the 'release.yaml' workflow succeeds. + Python Semantic Release will take care of version number update, tag creation and release creation. + + +When it's done, rebase develop to keep it up to date with main. + +And you're done ! 🎉 diff --git a/README.md b/README.md index 047526e..eb873c7 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@

Vertex Pipelines Deployer

Deploy Vertex Pipelines within minutes

- This tool is a wrapper aound kfp and google-cloud-aiplatform that allows you to check, compile, upload, run and schedule Vertex Pipelines to a Vertex AI Pipelines endpoint in a standardized manner. + This tool is a wrapper around kfp and google-cloud-aiplatform that allows you to check, compile, upload, run and schedule Vertex Pipelines in a standardized manner.


@@ -23,48 +23,55 @@ -## Table of Contents -- [Why this tool?](#why-this-tool) -- [Prerequisites](#prerequisites) -- [Installation](#installation) +## 📚 Table of Contents +- [Why this tool?](#❓-why-this-tool) +- [Prerequisites](#📋-prerequisites) +- [Installation](#📦-installation) - [From git repo](#from-git-repo) - [From GCS (not available in PyPI yet)](#from-gcs-not-available-in-pypi-yet) - [Add to requirements](#add-to-requirements) -- [Usage](#usage) - - [Setup](#setup) - - [Folder Structure](#folder-structure) - - [CLI: Deploying a Pipeline with `deploy`](#cli-deploying-a-pipeline-with-deploy) - - [CLI: Checking Pipelines are valid with `check`](#cli-checking-pipelines-are-valid-with-check) - - [CLI: Other commands](#cli-other-commands) +- [Usage](#🚀-usage) + - [Setup](#🛠️-setup) + - [Folder Structure](#📁-folder-structure) + - [CLI: Deploying a Pipeline with `deploy`](#🚀-cli-deploying-a-pipeline-with-deploy) + - [CLI: Checking Pipelines are valid with `check`](#✅-cli-checking-pipelines-are-valid-with-check) + - [CLI: Other commands](#🛠️-cli-other-commands) - [`create`](#create) - [`list`](#list) - - [CLI: Options](#cli-options) + - [CLI: Options](#🍭-cli-options) [Full CLI documentation](docs/CLI_REFERENCE.md) -## Why this tool? + +## ❓ Why this tool? + Three uses cases: + 1. **CI:** check pipeline validity. 1. **Dev mode:** duickly iterate over your pipelines by compiling and running them in multiple environments (test, dev, staging, etc) without duplicating code or looking for the right kfp / aiplatform snippet. 2. **CD:** deploy your pipelines to Vertex Pipelines in a standardized manner in your CD with Cloud Build or GitHub Actions. Four commands: + - `check`: check your pipelines (imports, compile, check configs validity against pipeline definition). - `deploy`: compile, upload to Artifact Registry, run and schedule your pipelines. - `create`: create a new pipeline and config files. - `list`: list all pipelines in the `vertex/pipelines` folder. + -## Prerequisites +## 📋 Prerequisites + - Unix-like environment (Linux, macOS, WSL, etc...) - Python 3.8 to 3.10 - Google Cloud SDK - A GCP project with Vertex Pipelines enabled + -## Installation - +## 📦 Installation + ### From git repo @@ -107,25 +114,25 @@ Then add the following line to your `requirements.in` file: ```bash file:my/path/to/vertex_deployer-$VERSION.tar.gz ``` + -## Usage - -### Setup +## 🚀 Usage + +### 🛠️ Setup 1. Setup your GCP environment: - ```bash export PROJECT_ID= gcloud config set project $PROJECT_ID gcloud auth login gcloud auth application-default login ``` -2. You need the following APIs to be enabled: - - Cloud Build API - - Artifact Registry API - - Cloud Storage API - - Vertex AI API +2. You need the following APIs to be enabled: +- Cloud Build API +- Artifact Registry API +- Cloud Storage API +- Vertex AI API ```bash gcloud services enable \ cloudbuild.googleapis.com \ @@ -133,6 +140,7 @@ gcloud services enable \ storage.googleapis.com \ aiplatform.googleapis.com ``` + 3. Create an artifact registry repository for your base images (Docker format): ```bash export GAR_DOCKER_REPO_ID= @@ -141,7 +149,9 @@ gcloud artifacts repositories create ${GAR_DOCKER_REPO_ID} \ --location=${GAR_LOCATION} \ --repository-format=docker ``` + 4. Build and upload your base images to the repository. To do so, please follow Google Cloud Build documentation. + 5. Create an artifact registry repository for your pipelines (KFP format): ```bash export GAR_PIPELINES_REPO_ID= @@ -149,12 +159,14 @@ gcloud artifacts repositories create ${GAR_PIPELINES_REPO_ID} \ --location=${GAR_LOCATION} \ --repository-format=kfp ``` + 6. Create a GCS bucket for Vertex Pipelines staging: ```bash export GCP_REGION= export VERTEX_STAGING_BUCKET_NAME= gcloud storage buckets create gs://${VERTEX_STAGING_BUCKET_NAME} --location=${GCP_REGION} ``` + 7. Create a service account for Vertex Pipelines: ```bash export VERTEX_SERVICE_ACCOUNT_NAME=foobar @@ -178,7 +190,7 @@ gcloud artifacts repositories add-iam-policy-binding ${GAR_PIPELINES_REPO_ID} \ You can use the deployer CLI (see example below) or import [`VertexPipelineDeployer`](deployer/pipeline_deployer.py) in your code (try it yourself). -### Folder Structure +### 📁 Folder Structure You must respect the following folder structure. If you already follow the [Vertex Pipelines Starter Kit folder structure](https://github.com/artefactory/vertex-pipeline-starter-kit), it should be pretty smooth to use this tool: @@ -192,9 +204,11 @@ vertex └─ {pipeline_name}.py ``` -> [!NOTE] -> You must have at least these files. If you need to share some config elements between pipelines, -> you can have a `shared` folder in `configs` and import them in your pipeline configs. +!!! tip "About folder structure" + You must have at least these files. If you need to share some config elements between pipelines, + you can have a `shared` folder in `configs` and import them in your pipeline configs. + + You can use the [`create`](../usage#create) command to create a new pipeline and config files. #### Pipelines @@ -226,10 +240,12 @@ They must be located in the `config/{pipeline_name}` folder. `.py` files are useful to define complex configs (e.g. a list of dicts) while `.json` / `.toml` files are useful to define simple configs (e.g. a string). **How to format them?** + - `.json` and `.toml` files must be valid json files containing only one dict of key: value representing parameter values. - `.py` files must be valid python files with two important elements: - - `parameter_values` to pass arguments to your pipeline - - `input_artifacts` if you want to retrieve and create input artifacts to your pipeline. + + * `parameter_values` to pass arguments to your pipeline + * `input_artifacts` if you want to retrieve and create input artifacts to your pipeline. See [Vertex Documentation](https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.PipelineJob) for more information. **How to name them?** @@ -252,13 +268,15 @@ VERTEX_STAGING_BUCKET_NAME=YOUR_VERTEX_STAGING_BUCKET_NAME # GCS Bucket for Ver VERTEX_SERVICE_ACCOUNT=YOUR_VERTEX_SERVICE_ACCOUNT # Vertex Pipelines Service Account ``` -> **Note** -> We're using env files and dotenv to load the environment variables. -> No default value for `--env-file` argument is provided to ensure that you don't accidentally deploy to the wrong project. -> An [`example.env`](./example/example.env) file is provided in this repo. -> This also allows you to work with multiple environments thanks to env files (`test.env`, `dev.env`, `prod.env`, etc) +!!! note "About env files" + We're using env files and dotenv to load the environment variables. + No default value for `--env-file` argument is provided to ensure that you don't accidentally deploy to the wrong project. + An [`example.env`](./example/example.env) file is provided in this repo. + This also allows you to work with multiple environments thanks to env files (`test.env`, `dev.env`, `prod.env`, etc) + -### CLI: Deploying a Pipeline with `deploy` + +### 🚀 CLI: Deploying a Pipeline with `deploy` Let's say you defines a pipeline in `dummy_pipeline.py` and a config file named `config_test.json`. You can deploy your pipeline using the following command: ```bash @@ -274,7 +292,7 @@ vertex-deployer deploy dummy_pipeline \ --enable-caching ``` -### CLI: Checking Pipelines are valid with `check` +### ✅ CLI: Checking Pipelines are valid with `check` To check that your pipelines are valid, you can use the `check` command. It uses a pydantic model to: - check that your pipeline imports and definition are valid @@ -292,7 +310,7 @@ vertex-deployer check --all ``` -### CLI: Other commands +### 🛠️ CLI: Other commands #### `create` @@ -310,17 +328,24 @@ You can list all pipelines in the `vertex/pipelines` folder using the `list` com vertex-deployer list --with-configs ``` -### CLI: Options +### 🍭 CLI: Options ```bash vertex-deployer --help ``` +To see package version: +```bash +vertex-deployer --version +``` + To adapt log level, use the `--log-level` option. Default is `INFO`. ```bash vertex-deployer --log-level DEBUG deploy ... ``` + + ## Repository Structure ``` @@ -332,7 +357,7 @@ vertex-deployer --log-level DEBUG deploy ... │ │ └─ release.yaml │ ├─ CODEOWNERS │ └─ PULL_REQUEST_TEMPLATE.md -├─ deployer +├─ deployer # Source code │ ├─ __init__.py │ ├─ cli.py │ ├─ constants.py @@ -344,8 +369,10 @@ vertex-deployer --log-level DEBUG deploy ... │ ├─ logging.py │ ├─ models.py │ └─ utils.py +├─ docs/ # Documentation folder (mkdocs) +├─ templates/ # Semantic Release templates ├─ tests/ -├─ example +├─ example # Example folder with dummy pipeline and config | ├─ example.env │ └─ vertex │ ├─ components @@ -364,8 +391,12 @@ vertex-deployer --log-level DEBUG deploy ... │ └─ dummy_pipeline.py ├─ .gitignore ├─ .pre-commit-config.yaml +├─ catalog-info.yaml # Roadie integration configuration +├─ CHANGELOG.md +├─ CONTRIBUTING.md ├─ LICENSE ├─ Makefile +├─ mkdocs.yml # Mkdocs configuration ├─ pyproject.toml └─ README.md ``` diff --git a/catalog-info.yaml b/catalog-info.yaml new file mode 100644 index 0000000..d3b2414 --- /dev/null +++ b/catalog-info.yaml @@ -0,0 +1,19 @@ +apiVersion: backstage.io/v1alpha1 +kind: Component +metadata: + name: vertex-pipelines-deployer + title: Vertex Pipelines Deployer + description: > + Check, compile, upload, run and schedule Vertex Pipelines in a standardized manner. + annotations: + github.com/project-slug: artefactory/vertex-pipelines-deployer + backstage.io/techdocs-ref: dir:. + tags: + - vertex + - mlops + - python + +spec: + type: service + owner: julesbertrand + lifecycle: experimental diff --git a/docs/CLI_REFERENCE.md b/docs/CLI_REFERENCE.md index 30db910..79b00a4 100644 --- a/docs/CLI_REFERENCE.md +++ b/docs/CLI_REFERENCE.md @@ -69,7 +69,7 @@ $ vertex-deployer create [OPTIONS] PIPELINE_NAME **Options**: -* `--config-type, -ct [json|py]`: The type of the config to create. [default: ConfigType.json] +* `--config-type, -ct [json|py|toml]`: The type of the config to create. [default: ConfigType.json] * `--help`: Show this message and exit. ## `vertex-deployer deploy` @@ -93,7 +93,7 @@ $ vertex-deployer deploy [OPTIONS] PIPELINE_NAME:{} * `--upload, -u / --no-upload, -nu`: Whether to upload the pipeline to Google Artifact Registry. [default: no-upload] * `--run, -r / --no-run, -nr`: Whether to run the pipeline. [default: no-run] * `--schedule, -s / --no-schedule, -ns`: Whether to create a schedule for the pipeline. [default: no-schedule] -* `--cron TEXT`: Cron expression for scheduling the pipeline. To pass it to the CLI, use hyphens e.g. '0-10-*-*-*'. +* `--cron TEXT`: Cron expression for scheduling the pipeline. To pass it to the CLI, use hyphens e.g. `0-10-*-*-*`. * `--delete-last-schedule, -dls / --no-delete-last-schedule`: Whether to delete the previous schedule before creating a new one. [default: no-delete-last-schedule] * `--tags TEXT`: The tags to use when uploading the pipeline. [default: latest] * `--config-filepath, -cfp PATH`: Path to the json/py file with parameter values and input artifacts to use when running the pipeline. diff --git a/docs/changelog.md b/docs/changelog.md new file mode 100644 index 0000000..786b75d --- /dev/null +++ b/docs/changelog.md @@ -0,0 +1 @@ +--8<-- "CHANGELOG.md" diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..ea38c9b --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1 @@ +--8<-- "CONTRIBUTING.md" diff --git a/docs/example.md b/docs/example.md new file mode 100644 index 0000000..cec6468 --- /dev/null +++ b/docs/example.md @@ -0,0 +1,11 @@ +## 🚧 Dummy Pipeline + +## 🚧 Dev: Compile and run to fasten your dev cycle + +## 🚧 CI: Check your pipelines and config integrity + +## 🚧 CD: Deploy your pipelines in a standardized manner + +### 🚧 Github Action + +### 🚧 CloudBuild trigger diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..96d44ac --- /dev/null +++ b/docs/index.md @@ -0,0 +1,14 @@ +# Vertex Pipelines Deployer + +Deploy Vertex Pipelines within minutes + +This tool is a wrapper around [kfp](https://www.kubeflow.org/docs/components/pipelines/v2/hello-world/) and [google-cloud-aiplatform](https://cloud.google.com/python/docs/reference/aiplatform/latest) to check, compile, upload, run and schedule Vertex Pipelines in a standardized manner. + + +!!! info + This project is looking for beta testers and contributors. + + You can contact code owners or **[submit a new issue](https://github.com/artefactory/vertex-pipelines-deployer/issues/new/choose)** if you want to help. + + +--8<-- "README.md:why" diff --git a/docs/install.md b/docs/install.md new file mode 100644 index 0000000..4bcc6e6 --- /dev/null +++ b/docs/install.md @@ -0,0 +1,14 @@ +??? abstract "TL;DR" + Install from GCS: + ```bash + export VERSION=0.2.1 + gsutil -m cp gs://vertex-pipelines-deployer/vertex_deployer-$VERSION.tar.gz . + pip install ./vertex_deployer-$VERSION.tar.gz + ``` + + In your requirements: + ```bash + file:my/path/to/vertex_deployer-$VERSION.tar.gz + ``` + +--8<-- "README.md:installation" diff --git a/docs/setup.md b/docs/setup.md new file mode 100644 index 0000000..bedcafb --- /dev/null +++ b/docs/setup.md @@ -0,0 +1,19 @@ +??? abstract "TL;DR" + You need a GCP project ready to use Vertex Pipelines. + + And the following file structure from [Vertex Pipeline Starter Kit](https://github.com/artefactory/vertex-pipeline-starter-kit): + ```bash + vertex + ├─ configs/ + │ └─ {pipeline_name} + │ └─ {config_name}.json + └─ pipelines/ + └─ {pipeline_name}.py + ``` + +### 📋 Prerequisites + +--8<-- "README.md:prerequisites" + + +--8<-- "README.md:setup" diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..7600a1b --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,21 @@ +??? abstract "TL;DR" + Deploy pipeline: + ```bash + vertex-deployer deploy dummy_pipeline \ + --compile \ + --upload \ + --run \ + --env-file example.env \ + --local-package-path . \ + --tags my-tag \ + --config-filepath vertex/configs/dummy_pipeline/config_test.json \ + --experiment-name my-experiment \ + --enable-caching + ``` + + Check pipelines: + ```bash + vertex-deployer check --all + ``` + +--8<-- "README.md:usage" diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..c3a36d7 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,17 @@ +site_name: Vertex Pipelines Deployer +repo_name: artefactory/vertex-pipelines-deployer +repo_url: https://github.com/artefactory/vertex-pipelines-deployer + +plugins: + - techdocs-core + +nav: + - Get Started: + - Welcome: index.md + - Installation: install.md + - Prerequisites & Setup: setup.md + - Usage: usage.md + - CLI Reference: CLI_REFERENCE.md + - 🚧 Example: example.md + - Contributing: contributing.md + - Changelog: changelog.md diff --git a/pyproject.toml b/pyproject.toml index 0af3f0a..48e5a8d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,11 @@ nbstripout = "^0.6.1" ruff = "^0.0.289" pytest-cov = "^4.1.0" + +[tool.poetry.group.docs.dependencies] +mkdocs = "^1.5.3" +mkdocs-techdocs-core = "^1.2.3" + [tool.poetry.extras] profiling = ["pyinstrument"]