Skip to content

Commit

Permalink
Simplfy parameters in Iris example pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
gmfrasca committed Mar 19, 2024
1 parent 7377bb5 commit 3a313a0
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 120 deletions.
99 changes: 30 additions & 69 deletions config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,8 @@ data:
# PIPELINE DEFINITION
# Name: iris-training-pipeline
# Inputs:
# min_max_scaler: bool
# neighbors: int
# standard_scaler: bool
# Outputs:
# train-model-metrics: system.ClassificationMetrics
components:
comp-create-dataset:
executorLabel: exec-create-dataset
Expand All @@ -35,8 +32,6 @@ data:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
parameters:
min_max_scaler:
parameterType: BOOLEAN
standard_scaler:
parameterType: BOOLEAN
outputDefinitions:
Expand All @@ -58,10 +53,6 @@ data:
parameterType: NUMBER_INTEGER
outputDefinitions:
artifacts:
metrics:
artifactType:
schemaTitle: system.ClassificationMetrics
schemaVersion: 0.0.1
model:
artifactType:
schemaTitle: system.Model
Expand All @@ -72,16 +63,16 @@ data:
container:
args:
- --executor_input
- '{{"{{"}}${{"}}"}}'
- '{{$}}'
- --function_to_execute
- create_dataset
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\
\ && \"$0\" \"$@\"\n"
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ 'kfp==2.0.0-beta.13' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
Expand All @@ -97,21 +88,21 @@ data:
\ col_names = [\n 'Sepal_Length', 'Sepal_Width', 'Petal_Length',\
\ 'Petal_Width', 'Labels'\n ]\n df = pd.read_csv(csv_url, names=col_names)\n\
\n with open(iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n"
image: quay.io/hukhan/iris-base:1
image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0
exec-normalize-dataset:
container:
args:
- --executor_input
- '{{"{{"}}${{"}}"}}'
- '{{$}}'
- --function_to_execute
- normalize_dataset
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\
\ && \"$0\" \"$@\"\n"
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ 'scikit-learn==1.4.0' 'kfp==2.0.0-beta.13' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
Expand All @@ -124,32 +115,28 @@ data:
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef normalize_dataset(\n input_iris_dataset: Input[Dataset],\n\
\ normalized_iris_dataset: Output[Dataset],\n standard_scaler: bool,\n\
\ min_max_scaler: bool,\n):\n if standard_scaler is min_max_scaler:\n\
\ raise ValueError(\n 'Exactly one of standard_scaler\
\ or min_max_scaler must be True.')\n\n import pandas as pd\n from\
\ sklearn.preprocessing import MinMaxScaler\n from sklearn.preprocessing\
\ import StandardScaler\n\n with open(input_iris_dataset.path) as f:\n\
\ df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n if standard_scaler:\n\
\ scaler = StandardScaler()\n if min_max_scaler:\n scaler\
\ = MinMaxScaler()\n\n df = pd.DataFrame(scaler.fit_transform(df))\n\
\ df['Labels'] = labels\n normalized_iris_dataset.metadata['state']\
\ = \"Normalized\"\n with open(normalized_iris_dataset.path, 'w') as\
\ f:\n df.to_csv(f)\n\n"
image: quay.io/hukhan/iris-base:1
):\n\n import pandas as pd\n from sklearn.preprocessing import MinMaxScaler\n\
\ from sklearn.preprocessing import StandardScaler\n\n with open(input_iris_dataset.path)\
\ as f:\n df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n\
\ scaler = StandardScaler() if standard_scaler else MinMaxScaler()\n\n\
\ df = pd.DataFrame(scaler.fit_transform(df))\n df['Labels'] = labels\n\
\ normalized_iris_dataset.metadata['state'] = \"Normalized\"\n with\
\ open(normalized_iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n"
image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0
exec-train-model:
container:
args:
- --executor_input
- '{{"{{"}}${{"}}"}}'
- '{{$}}'
- --function_to_execute
- train_model
command:
- sh
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\
\ && \"$0\" \"$@\"\n"
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ 'scikit-learn==1.4.0' 'kfp==2.0.0-beta.13' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
Expand All @@ -161,32 +148,19 @@ data:
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef train_model(\n normalized_iris_dataset: Input[Dataset],\n\
\ model: Output[Model],\n metrics: Output[ClassificationMetrics],\n\
\ n_neighbors: int,\n):\n import pickle\n\n import pandas as pd\n\
\ from sklearn.neighbors import KNeighborsClassifier\n\n from sklearn.metrics\
\ import roc_curve\n from sklearn.model_selection import train_test_split,\
\ cross_val_predict\n from sklearn.metrics import confusion_matrix\n\n\
\n with open(normalized_iris_dataset.path) as f:\n df = pd.read_csv(f)\n\
\n y = df.pop('Labels')\n X = df\n\n X_train, X_test, y_train,\
\ y_test = train_test_split(X, y, random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\
\ clf.fit(X_train, y_train)\n\n predictions = cross_val_predict(\n\
\ clf, X_train, y_train, cv=3)\n metrics.log_confusion_matrix(\n\
\ ['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica'],\n \
\ confusion_matrix(\n y_train,\n predictions).tolist()\
\ # .tolist() to convert np array to list.\n )\n\n model.metadata['framework']\
\ = 'scikit-learn'\n with open(model.path, 'wb') as f:\n pickle.dump(clf,\
\ f)\n\n"
image: quay.io/hukhan/iris-base:1
\ model: Output[Model],\n n_neighbors: int,\n):\n import pickle\n\
\n import pandas as pd\n from sklearn.model_selection import train_test_split\n\
\ from sklearn.neighbors import KNeighborsClassifier\n\n with open(normalized_iris_dataset.path)\
\ as f:\n df = pd.read_csv(f)\n\n y = df.pop('Labels')\n X\
\ = df\n\n X_train, X_test, y_train, y_test = train_test_split(X, y,\
\ random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\
\ clf.fit(X_train, y_train)\n\n model.metadata['framework'] = 'scikit-learn'\n\
\ with open(model.path, 'wb') as f:\n pickle.dump(clf, f)\n\n"
image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0
pipelineInfo:
name: iris-training-pipeline
root:
dag:
outputs:
artifacts:
train-model-metrics:
artifactSelectors:
- outputArtifactKey: metrics
producerSubtask: train-model
tasks:
create-dataset:
cachingOptions:
Expand All @@ -209,12 +183,9 @@ data:
outputArtifactKey: iris_dataset
producerTask: create-dataset
parameters:
min_max_scaler:
runtimeValue:
constant: false
standard_scaler:
runtimeValue:
constant: true
constant: 1.0
taskInfo:
name: normalize-dataset
train-model:
Expand All @@ -237,23 +208,13 @@ data:
name: train-model
inputDefinitions:
parameters:
min_max_scaler:
defaultValue: true
parameterType: BOOLEAN
neighbors:
defaultValue: 3
parameterType: NUMBER_INTEGER
standard_scaler:
defaultValue: false
parameterType: BOOLEAN
outputDefinitions:
artifacts:
train-model-metrics:
artifactType:
schemaTitle: system.ClassificationMetrics
schemaVersion: 0.0.1
schemaVersion: 2.1.0
sdkVersion: kfp-2.0.1
sdkVersion: kfp-2.0.0-beta.13

{{ else }}
apiVersion: v1
kind: ConfigMap
Expand Down
13 changes: 2 additions & 11 deletions docs/example_pipelines/iris/iris-pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ def normalize_dataset(
input_iris_dataset: Input[Dataset],
normalized_iris_dataset: Output[Dataset],
standard_scaler: bool,
min_max_scaler: bool,
):
if standard_scaler is min_max_scaler:
raise ValueError(
'Exactly one of standard_scaler or min_max_scaler must be True.')

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
Expand All @@ -47,10 +43,7 @@ def normalize_dataset(
df = pd.read_csv(f)
labels = df.pop('Labels')

if standard_scaler:
scaler = StandardScaler()
if min_max_scaler:
scaler = MinMaxScaler()
scaler = StandardScaler() if standard_scaler else MinMaxScaler()

df = pd.DataFrame(scaler.fit_transform(df))
df['Labels'] = labels
Expand Down Expand Up @@ -93,15 +86,13 @@ def train_model(
@dsl.pipeline(name='iris-training-pipeline')
def my_pipeline(
standard_scaler: bool,
min_max_scaler: bool,
neighbors: int,
):
create_dataset_task = create_dataset()

normalize_dataset_task = normalize_dataset(
input_iris_dataset=create_dataset_task.outputs['iris_dataset'],
standard_scaler=True,
min_max_scaler=False)
standard_scaler=True)

train_model(
normalized_iris_dataset=normalize_dataset_task
Expand Down
58 changes: 18 additions & 40 deletions docs/example_pipelines/iris/iris-pipeline.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# PIPELINE DEFINITION
# Name: iris-training-pipeline
# Inputs:
# min_max_scaler: bool
# neighbors: int
# standard_scaler: bool
components:
Expand All @@ -22,8 +21,6 @@ components:
schemaTitle: system.Dataset
schemaVersion: 0.0.1
parameters:
min_max_scaler:
parameterType: BOOLEAN
standard_scaler:
parameterType: BOOLEAN
outputDefinitions:
Expand Down Expand Up @@ -63,18 +60,15 @@ deploymentSpec:
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ && \"$0\" \"$@\"\n"
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ 'kfp==2.0.0-beta.13' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
Expand All @@ -96,34 +90,27 @@ deploymentSpec:
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ 'scikit-learn==1.4.0' && \"$0\" \"$@\"\n"
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ 'scikit-learn==1.4.0' 'kfp==2.0.0-beta.13' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
\ *\n\ndef normalize_dataset(\n input_iris_dataset: Input[Dataset],\n\
\ normalized_iris_dataset: Output[Dataset],\n standard_scaler: bool,\n\
\ min_max_scaler: bool,\n):\n if standard_scaler is min_max_scaler:\n\
\ raise ValueError(\n 'Exactly one of standard_scaler\
\ or min_max_scaler must be True.')\n\n import pandas as pd\n from\
\ sklearn.preprocessing import MinMaxScaler\n from sklearn.preprocessing\
\ import StandardScaler\n\n with open(input_iris_dataset.path) as f:\n\
\ df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n if standard_scaler:\n\
\ scaler = StandardScaler()\n if min_max_scaler:\n scaler\
\ = MinMaxScaler()\n\n df = pd.DataFrame(scaler.fit_transform(df))\n\
\ df['Labels'] = labels\n normalized_iris_dataset.metadata['state']\
\ = \"Normalized\"\n with open(normalized_iris_dataset.path, 'w') as\
\ f:\n df.to_csv(f)\n\n"
):\n\n import pandas as pd\n from sklearn.preprocessing import MinMaxScaler\n\
\ from sklearn.preprocessing import StandardScaler\n\n with open(input_iris_dataset.path)\
\ as f:\n df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n\
\ scaler = StandardScaler() if standard_scaler else MinMaxScaler()\n\n\
\ df = pd.DataFrame(scaler.fit_transform(df))\n df['Labels'] = labels\n\
\ normalized_iris_dataset.metadata['state'] = \"Normalized\"\n with\
\ open(normalized_iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n"
image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0
exec-train-model:
container:
Expand All @@ -137,18 +124,15 @@ deploymentSpec:
- -c
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ 'scikit-learn==1.4.0' && \"$0\" \"$@\"\n"
\ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\
\ 'scikit-learn==1.4.0' 'kfp==2.0.0-beta.13' && \"$0\" \"$@\"\n"
- sh
- -ec
- 'program_path=$(mktemp -d)
printf "%s" "$0" > "$program_path/ephemeral_component.py"
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@"
'
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
Expand All @@ -161,7 +145,6 @@ deploymentSpec:
\ random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\
\ clf.fit(X_train, y_train)\n\n model.metadata['framework'] = 'scikit-learn'\n\
\ with open(model.path, 'wb') as f:\n pickle.dump(clf, f)\n\n"

image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0
pipelineInfo:
name: iris-training-pipeline
Expand Down Expand Up @@ -189,12 +172,9 @@ root:
outputArtifactKey: iris_dataset
producerTask: create-dataset
parameters:
min_max_scaler:
runtimeValue:
constant: false
standard_scaler:
runtimeValue:
constant: true
constant: 1.0
taskInfo:
name: normalize-dataset
train-model:
Expand All @@ -217,11 +197,9 @@ root:
name: train-model
inputDefinitions:
parameters:
min_max_scaler:
parameterType: BOOLEAN
neighbors:
parameterType: NUMBER_INTEGER
standard_scaler:
parameterType: BOOLEAN
schemaVersion: 2.1.0
sdkVersion: kfp-2.7.0
sdkVersion: kfp-2.0.0-beta.13

0 comments on commit 3a313a0

Please sign in to comment.