Skip to content

Commit

Permalink
tests: cicd: reworked e2e tests to support multiple cases and added c…
Browse files Browse the repository at this point in the history
…onfig/test for grouping
  • Loading branch information
Lasica committed Nov 21, 2023
1 parent 6bbb464 commit ded9fc7
Show file tree
Hide file tree
Showing 9 changed files with 157 additions and 7 deletions.
22 changes: 16 additions & 6 deletions .github/workflows/test_and_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ jobs:
e2e_tests:
runs-on: ubuntu-latest
needs: [unit_tests, sonarcloud]
strategy:
matrix:
e2e_case: ["standard", "grouping"]
steps:
- uses: actions/checkout@v4

Expand All @@ -120,7 +123,7 @@ jobs:
# kedro 0.18.1 is on purpose here, due to https://github.com/kedro-org/kedro-starters/issues/99
run: |
pip install $(find "./dist" -name "*.tar.gz")
kedro new --starter spaceflights --config tests/e2e/starter-config.yml --verbose
kedro new --starter spaceflights --config tests/e2e/${{ matrix.e2e_case }}/starter-config.yml --verbose
- name: Install project dependencies
working-directory: ./spaceflights
Expand All @@ -139,8 +142,13 @@ jobs:
sed -i 's/\(COPY src\/requirements.txt.*\)$/\1\nCOPY kedro-vertexai.tar.gz ./g' Dockerfile
echo "!data/01_raw" >> .dockerignore
kedro vertexai init gid-ml-ops-sandbox europe-west4
mv ../tests/e2e/catalog.yml conf/base/catalog.yml
mv ../tests/e2e/vertexai.yml conf/base/vertexai.yml
cp ../tests/e2e/${{ matrix.e2e_case }}/catalog.yml conf/base/catalog.yml
cp ../tests/e2e/${{ matrix.e2e_case }}/vertexai.yml conf/base/vertexai.yml
# Introducing tagging to pipelines
if [[ "${{ matrix.e2e_case }}" == "grouping" ]]; then
mv ../tests/e2e/${{ matrix.e2e_case }}/pipeline_data_processing.py src/spaceflights/pipelines/data_processing/pipeline.py
mv ../tests/e2e/${{ matrix.e2e_case }}/pipeline_data_science.py src/spaceflights/pipelines/data_science/pipeline.py
fi
- name: Prepare docker env
uses: docker/setup-buildx-action@v3
Expand All @@ -151,14 +159,15 @@ jobs:
- name: Build pipeline docker image
run: |
cd ./spaceflights
docker build --build-arg BASE_IMAGE=python:3.8-buster --tag kedro-vertexai-e2e:latest --load .
docker pull gcr.io/gid-ml-ops-sandbox/kedro-vertexai-e2e:${{ matrix.e2e_case }} || true
docker build --build-arg BASE_IMAGE=python:3.10-buster --tag kedro-vertexai-e2e:${{ matrix.e2e_case }} --load --cache-from=gcr.io/gid-ml-ops-sandbox/kedro-vertexai-e2e:${{ matrix.e2e_case }} .
- name: Publish docker image to GCR
uses: mattes/gce-docker-push-action@v1
with:
creds: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
src: kedro-vertexai-e2e:latest
dst: gcr.io/gid-ml-ops-sandbox/kedro-vertexai-e2e:latest
src: kedro-vertexai-e2e:${{ matrix.e2e_case }}
dst: gcr.io/gid-ml-ops-sandbox/kedro-vertexai-e2e:${{ matrix.e2e_case }}

- name: Set up GCP Credentials
uses: google-github-actions/[email protected]
Expand All @@ -172,6 +181,7 @@ jobs:
cd ./spaceflights
export KEDRO_CONFIG_COMMIT_ID=$GITHUB_SHA
kedro vertexai run-once --wait-for-completion
publish:
if: github.event.pull_request == null && github.ref == 'refs/heads/master'
needs: [ e2e_tests, codeql ]
Expand Down
File renamed without changes.
35 changes: 35 additions & 0 deletions tests/e2e/grouping/pipeline_data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from kedro.pipeline import Pipeline, node, pipeline

from .nodes import (
create_model_input_table,
preprocess_companies,
preprocess_shuttles,
)


def create_pipeline(**kwargs) -> Pipeline:
return pipeline(
[
node(
func=preprocess_companies,
inputs="companies",
outputs="preprocessed_companies",
name="preprocess_companies_node",
tags=["grp:preprocessing"],
),
node(
func=preprocess_shuttles,
inputs="shuttles",
outputs="preprocessed_shuttles",
name="preprocess_shuttles_node",
tags=["grp:preprocessing"],
),
node(
func=create_model_input_table,
inputs=["preprocessed_shuttles", "preprocessed_companies", "reviews"],
outputs="model_input_table",
name="create_model_input_table_node",
tags=["grp:preprocessing"],
),
]
)
29 changes: 29 additions & 0 deletions tests/e2e/grouping/pipeline_data_science.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from kedro.pipeline import Pipeline, node, pipeline

from .nodes import evaluate_model, split_data, train_model


def create_pipeline(**kwargs) -> Pipeline:
return pipeline(
[
node(
func=split_data,
inputs=["model_input_table", "params:model_options"],
outputs=["X_train", "X_test", "y_train", "y_test"],
name="split_data_node",
tags=["grp:preprocessing"],
),
node(
func=train_model,
inputs=["X_train", "y_train"],
outputs="regressor",
name="train_model_node",
),
node(
func=evaluate_model,
inputs=["regressor", "X_test", "y_test"],
outputs=None,
name="evaluate_model_node",
),
]
)
3 changes: 3 additions & 0 deletions tests/e2e/grouping/starter-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
project_name: Spaceflights VertexAI E2E Test With Grouping
repo_name: spaceflights
python_package: spaceflights
57 changes: 57 additions & 0 deletions tests/e2e/grouping/vertexai.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
project_id: gid-ml-ops-sandbox
region: europe-west4
run_config:
# Name of the image to run as the pipeline steps
image: gcr.io/gid-ml-ops-sandbox/kedro-vertexai-e2e:grouping

# Pull policy to be used for the steps. Use Always if you push the images
# on the same tag, or Never if you use only local images
image_pull_policy: IfNotPresent

# Location of Vertex AI GCS root
root: gid-ml-ops-sandbox-plugin-tests/staging

# Name of the kubeflow experiment to be created
experiment_name: kedro-vertex-e2e-grouping

# Name of the scheduled run, templated with the schedule parameters
scheduled_run_name: kedro-vertex-e2e-grouping

# Optional service account to run vertex AI Pipeline with
service_account: [email protected]

# Optional pipeline description
# description: "Very Important Pipeline"
grouping:
cls: kedro_vertexai.grouping.TagNodeGrouper
params:
tag_prefix: "grp:"

# How long to keep underlying Argo workflow (together with pods and data
# volume after pipeline finishes) [in seconds]. Default: 1 week
ttl: 604800

# Optional network configuration
# network:

# Name of the vpc to use for running Vertex Pipeline
# vpc: my-vpc

# Hosts aliases to be placed in /etc/hosts when pipeline is executed
# host_aliases:
# - ip: 127.0.0.1
# hostnames: me.local

# What Kedro pipeline should be run as the last step regardless of the
# pipeline status. Used to send notifications or raise the alerts
# on_exit_pipeline: notify_via_slack

# Optional section allowing adjustment of the resources, reservations and limits
# for the nodes. When not provided they're set to 500m cpu and 1024Mi memory.
# If you don't want to specify pipeline resources set both to None in __default__.
resources:

# Default settings for the nodes
__default__:
cpu: 500m
memory: 1024Mi
16 changes: 16 additions & 0 deletions tests/e2e/standard/catalog.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
companies:
type: pandas.CSVDataSet
filepath: data/01_raw/companies.csv
layer: raw

reviews:
type: pandas.CSVDataSet
filepath: data/01_raw/reviews.csv
layer: raw

shuttles:
type: pandas.ExcelDataSet
filepath: data/01_raw/shuttles.xlsx
layer: raw
load_args:
engine: openpyxl
File renamed without changes.
2 changes: 1 addition & 1 deletion tests/e2e/vertexai.yml → tests/e2e/standard/vertexai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ project_id: gid-ml-ops-sandbox
region: europe-west4
run_config:
# Name of the image to run as the pipeline steps
image: gcr.io/gid-ml-ops-sandbox/kedro-vertexai-e2e:latest
image: gcr.io/gid-ml-ops-sandbox/kedro-vertexai-e2e:standard

# Pull policy to be used for the steps. Use Always if you push the images
# on the same tag, or Never if you use only local images
Expand Down

0 comments on commit ded9fc7

Please sign in to comment.