diff --git a/config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl b/config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl index c4e9f150..86281971 100644 --- a/config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl +++ b/config/internal/apiserver/sample-pipeline/sample-pipeline.yaml.tmpl @@ -12,9 +12,8 @@ data: # PIPELINE DEFINITION # Name: iris-training-pipeline # Inputs: - # min_max_scaler: bool - # neighbors: int - # standard_scaler: bool + # neighbors: int [Default: 3.0] + # standard_scaler: bool [Default: True] # Outputs: # train-model-metrics: system.ClassificationMetrics components: @@ -35,8 +34,6 @@ data: schemaTitle: system.Dataset schemaVersion: 0.0.1 parameters: - min_max_scaler: - parameterType: BOOLEAN standard_scaler: parameterType: BOOLEAN outputDefinitions: @@ -80,15 +77,18 @@ data: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -97,7 +97,7 @@ data: \ col_names = [\n 'Sepal_Length', 'Sepal_Width', 'Petal_Length',\ \ 'Petal_Width', 'Labels'\n ]\n df = pd.read_csv(csv_url, names=col_names)\n\ \n with open(iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n" - image: quay.io/hukhan/iris-base:1 + image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0 exec-normalize-dataset: container: args: @@ -110,32 +110,31 @@ data: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ + \ 'scikit-learn==1.4.0' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef normalize_dataset(\n input_iris_dataset: Input[Dataset],\n\ \ normalized_iris_dataset: Output[Dataset],\n standard_scaler: bool,\n\ - \ min_max_scaler: bool,\n):\n if standard_scaler is min_max_scaler:\n\ - \ raise ValueError(\n 'Exactly one of standard_scaler\ - \ or min_max_scaler must be True.')\n\n import pandas as pd\n from\ - \ sklearn.preprocessing import MinMaxScaler\n from sklearn.preprocessing\ - \ import StandardScaler\n\n with open(input_iris_dataset.path) as f:\n\ - \ df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n if standard_scaler:\n\ - \ scaler = StandardScaler()\n if min_max_scaler:\n scaler\ - \ = MinMaxScaler()\n\n df = pd.DataFrame(scaler.fit_transform(df))\n\ - \ df['Labels'] = labels\n normalized_iris_dataset.metadata['state']\ - \ = \"Normalized\"\n with open(normalized_iris_dataset.path, 'w') as\ - \ f:\n df.to_csv(f)\n\n" - image: quay.io/hukhan/iris-base:1 + ):\n\n import pandas as pd\n from sklearn.preprocessing import MinMaxScaler\n\ + \ from sklearn.preprocessing import StandardScaler\n\n with open(input_iris_dataset.path)\ + \ as f:\n df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n\ + \ scaler = StandardScaler() if standard_scaler else MinMaxScaler()\n\n\ + \ df = pd.DataFrame(scaler.fit_transform(df))\n df['Labels'] = labels\n\ + \ normalized_iris_dataset.metadata['state'] = \"Normalized\"\n with\ + \ open(normalized_iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n" + image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0 exec-train-model: container: args: @@ -148,27 +147,31 @@ data: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ + \ 'scikit-learn==1.4.0' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef train_model(\n normalized_iris_dataset: Input[Dataset],\n\ \ model: Output[Model],\n metrics: Output[ClassificationMetrics],\n\ \ n_neighbors: int,\n):\n import pickle\n\n import pandas as pd\n\ - \ from sklearn.neighbors import KNeighborsClassifier\n\n from sklearn.metrics\ - \ import roc_curve\n from sklearn.model_selection import train_test_split,\ - \ cross_val_predict\n from sklearn.metrics import confusion_matrix\n\n\ - \n with open(normalized_iris_dataset.path) as f:\n df = pd.read_csv(f)\n\ - \n y = df.pop('Labels')\n X = df\n\n X_train, X_test, y_train,\ - \ y_test = train_test_split(X, y, random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\ + \ from sklearn.model_selection import train_test_split\n from sklearn.neighbors\ + \ import KNeighborsClassifier\n\n from sklearn.metrics import roc_curve\n\ + \ from sklearn.model_selection import train_test_split, cross_val_predict\n\ + \ from sklearn.metrics import confusion_matrix\n\n\n with open(normalized_iris_dataset.path)\ + \ as f:\n df = pd.read_csv(f)\n\n y = df.pop('Labels')\n X\ + \ = df\n\n X_train, X_test, y_train, y_test = train_test_split(X, y,\ + \ random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\ \ clf.fit(X_train, y_train)\n\n predictions = cross_val_predict(\n\ \ clf, X_train, y_train, cv=3)\n metrics.log_confusion_matrix(\n\ \ ['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica'],\n \ @@ -176,7 +179,7 @@ data: \ # .tolist() to convert np array to list.\n )\n\n model.metadata['framework']\ \ = 'scikit-learn'\n with open(model.path, 'wb') as f:\n pickle.dump(clf,\ \ f)\n\n" - image: quay.io/hukhan/iris-base:1 + image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0 pipelineInfo: name: iris-training-pipeline root: @@ -209,9 +212,6 @@ data: outputArtifactKey: iris_dataset producerTask: create-dataset parameters: - min_max_scaler: - runtimeValue: - constant: false standard_scaler: runtimeValue: constant: true @@ -237,14 +237,13 @@ data: name: train-model inputDefinitions: parameters: - min_max_scaler: - defaultValue: true - parameterType: BOOLEAN neighbors: - defaultValue: 3 + defaultValue: 3.0 + isOptional: true parameterType: NUMBER_INTEGER standard_scaler: - defaultValue: false + defaultValue: true + isOptional: true parameterType: BOOLEAN outputDefinitions: artifacts: @@ -253,7 +252,7 @@ data: schemaTitle: system.ClassificationMetrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 - sdkVersion: kfp-2.0.1 + sdkVersion: kfp-2.7.0 {{ else }} apiVersion: v1 kind: ConfigMap diff --git a/controllers/testdata/declarative/case_7/expected/created/sample-pipeline.yaml.tmpl b/controllers/testdata/declarative/case_7/expected/created/sample-pipeline.yaml.tmpl index a01dde1e..832cedfc 100644 --- a/controllers/testdata/declarative/case_7/expected/created/sample-pipeline.yaml.tmpl +++ b/controllers/testdata/declarative/case_7/expected/created/sample-pipeline.yaml.tmpl @@ -11,9 +11,8 @@ data: # PIPELINE DEFINITION # Name: iris-training-pipeline # Inputs: - # min_max_scaler: bool - # neighbors: int - # standard_scaler: bool + # neighbors: int [Default: 3.0] + # standard_scaler: bool [Default: True] # Outputs: # train-model-metrics: system.ClassificationMetrics components: @@ -34,8 +33,6 @@ data: schemaTitle: system.Dataset schemaVersion: 0.0.1 parameters: - min_max_scaler: - parameterType: BOOLEAN standard_scaler: parameterType: BOOLEAN outputDefinitions: @@ -79,15 +76,18 @@ data: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ \ && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ @@ -96,7 +96,7 @@ data: \ col_names = [\n 'Sepal_Length', 'Sepal_Width', 'Petal_Length',\ \ 'Petal_Width', 'Labels'\n ]\n df = pd.read_csv(csv_url, names=col_names)\n\ \n with open(iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n" - image: quay.io/hukhan/iris-base:1 + image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0 exec-normalize-dataset: container: args: @@ -109,32 +109,31 @@ data: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ + \ 'scikit-learn==1.4.0' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef normalize_dataset(\n input_iris_dataset: Input[Dataset],\n\ \ normalized_iris_dataset: Output[Dataset],\n standard_scaler: bool,\n\ - \ min_max_scaler: bool,\n):\n if standard_scaler is min_max_scaler:\n\ - \ raise ValueError(\n 'Exactly one of standard_scaler\ - \ or min_max_scaler must be True.')\n\n import pandas as pd\n from\ - \ sklearn.preprocessing import MinMaxScaler\n from sklearn.preprocessing\ - \ import StandardScaler\n\n with open(input_iris_dataset.path) as f:\n\ - \ df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n if standard_scaler:\n\ - \ scaler = StandardScaler()\n if min_max_scaler:\n scaler\ - \ = MinMaxScaler()\n\n df = pd.DataFrame(scaler.fit_transform(df))\n\ - \ df['Labels'] = labels\n normalized_iris_dataset.metadata['state']\ - \ = \"Normalized\"\n with open(normalized_iris_dataset.path, 'w') as\ - \ f:\n df.to_csv(f)\n\n" - image: quay.io/hukhan/iris-base:1 + ):\n\n import pandas as pd\n from sklearn.preprocessing import MinMaxScaler\n\ + \ from sklearn.preprocessing import StandardScaler\n\n with open(input_iris_dataset.path)\ + \ as f:\n df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n\ + \ scaler = StandardScaler() if standard_scaler else MinMaxScaler()\n\n\ + \ df = pd.DataFrame(scaler.fit_transform(df))\n df['Labels'] = labels\n\ + \ normalized_iris_dataset.metadata['state'] = \"Normalized\"\n with\ + \ open(normalized_iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n" + image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0 exec-train-model: container: args: @@ -147,27 +146,31 @@ data: - -c - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ - \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.1'\ - \ && \"$0\" \"$@\"\n" + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.7.0'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ + \ python3 -m pip install --quiet --no-warn-script-location 'pandas==2.2.0'\ + \ 'scikit-learn==1.4.0' && \"$0\" \"$@\"\n" - sh - -ec - 'program_path=$(mktemp -d) + printf "%s" "$0" > "$program_path/ephemeral_component.py" - python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef train_model(\n normalized_iris_dataset: Input[Dataset],\n\ \ model: Output[Model],\n metrics: Output[ClassificationMetrics],\n\ \ n_neighbors: int,\n):\n import pickle\n\n import pandas as pd\n\ - \ from sklearn.neighbors import KNeighborsClassifier\n\n from sklearn.metrics\ - \ import roc_curve\n from sklearn.model_selection import train_test_split,\ - \ cross_val_predict\n from sklearn.metrics import confusion_matrix\n\n\ - \n with open(normalized_iris_dataset.path) as f:\n df = pd.read_csv(f)\n\ - \n y = df.pop('Labels')\n X = df\n\n X_train, X_test, y_train,\ - \ y_test = train_test_split(X, y, random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\ + \ from sklearn.model_selection import train_test_split\n from sklearn.neighbors\ + \ import KNeighborsClassifier\n\n from sklearn.metrics import roc_curve\n\ + \ from sklearn.model_selection import train_test_split, cross_val_predict\n\ + \ from sklearn.metrics import confusion_matrix\n\n\n with open(normalized_iris_dataset.path)\ + \ as f:\n df = pd.read_csv(f)\n\n y = df.pop('Labels')\n X\ + \ = df\n\n X_train, X_test, y_train, y_test = train_test_split(X, y,\ + \ random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\ \ clf.fit(X_train, y_train)\n\n predictions = cross_val_predict(\n\ \ clf, X_train, y_train, cv=3)\n metrics.log_confusion_matrix(\n\ \ ['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica'],\n \ @@ -175,7 +178,7 @@ data: \ # .tolist() to convert np array to list.\n )\n\n model.metadata['framework']\ \ = 'scikit-learn'\n with open(model.path, 'wb') as f:\n pickle.dump(clf,\ \ f)\n\n" - image: quay.io/hukhan/iris-base:1 + image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0 pipelineInfo: name: iris-training-pipeline root: @@ -208,9 +211,6 @@ data: outputArtifactKey: iris_dataset producerTask: create-dataset parameters: - min_max_scaler: - runtimeValue: - constant: false standard_scaler: runtimeValue: constant: true @@ -236,14 +236,13 @@ data: name: train-model inputDefinitions: parameters: - min_max_scaler: - defaultValue: true - parameterType: BOOLEAN neighbors: - defaultValue: 3 + defaultValue: 3.0 + isOptional: true parameterType: NUMBER_INTEGER standard_scaler: - defaultValue: false + defaultValue: true + isOptional: true parameterType: BOOLEAN outputDefinitions: artifacts: @@ -252,4 +251,4 @@ data: schemaTitle: system.ClassificationMetrics schemaVersion: 0.0.1 schemaVersion: 2.1.0 - sdkVersion: kfp-2.0.1 + sdkVersion: kfp-2.7.0 diff --git a/docs/example_pipelines/iris/iris-pipeline.py b/docs/example_pipelines/iris/iris-pipeline.py index 2122c25e..73e9c24c 100644 --- a/docs/example_pipelines/iris/iris-pipeline.py +++ b/docs/example_pipelines/iris/iris-pipeline.py @@ -7,6 +7,7 @@ from kfp.dsl import Input from kfp.dsl import Model from kfp.dsl import Output +from kfp.dsl import ClassificationMetrics @dsl.component( @@ -33,11 +34,7 @@ def normalize_dataset( input_iris_dataset: Input[Dataset], normalized_iris_dataset: Output[Dataset], standard_scaler: bool, - min_max_scaler: bool, ): - if standard_scaler is min_max_scaler: - raise ValueError( - 'Exactly one of standard_scaler or min_max_scaler must be True.') import pandas as pd from sklearn.preprocessing import MinMaxScaler @@ -47,10 +44,7 @@ def normalize_dataset( df = pd.read_csv(f) labels = df.pop('Labels') - if standard_scaler: - scaler = StandardScaler() - if min_max_scaler: - scaler = MinMaxScaler() + scaler = StandardScaler() if standard_scaler else MinMaxScaler() df = pd.DataFrame(scaler.fit_transform(df)) df['Labels'] = labels @@ -66,6 +60,7 @@ def normalize_dataset( def train_model( normalized_iris_dataset: Input[Dataset], model: Output[Model], + metrics: Output[ClassificationMetrics], n_neighbors: int, ): import pickle @@ -74,6 +69,11 @@ def train_model( from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier + from sklearn.metrics import roc_curve + from sklearn.model_selection import train_test_split, cross_val_predict + from sklearn.metrics import confusion_matrix + + with open(normalized_iris_dataset.path) as f: df = pd.read_csv(f) @@ -85,6 +85,15 @@ def train_model( clf = KNeighborsClassifier(n_neighbors=n_neighbors) clf.fit(X_train, y_train) + predictions = cross_val_predict( + clf, X_train, y_train, cv=3) + metrics.log_confusion_matrix( + ['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica'], + confusion_matrix( + y_train, + predictions).tolist() # .tolist() to convert np array to list. + ) + model.metadata['framework'] = 'scikit-learn' with open(model.path, 'wb') as f: pickle.dump(clf, f) @@ -92,16 +101,14 @@ def train_model( @dsl.pipeline(name='iris-training-pipeline') def my_pipeline( - standard_scaler: bool, - min_max_scaler: bool, - neighbors: int, + standard_scaler: bool = True, + neighbors: int = 3, ): create_dataset_task = create_dataset() normalize_dataset_task = normalize_dataset( input_iris_dataset=create_dataset_task.outputs['iris_dataset'], - standard_scaler=True, - min_max_scaler=False) + standard_scaler=True) train_model( normalized_iris_dataset=normalize_dataset_task diff --git a/docs/example_pipelines/iris/iris-pipeline.yaml b/docs/example_pipelines/iris/iris-pipeline.yaml index 9246b391..4f6ae902 100644 --- a/docs/example_pipelines/iris/iris-pipeline.yaml +++ b/docs/example_pipelines/iris/iris-pipeline.yaml @@ -1,9 +1,10 @@ # PIPELINE DEFINITION # Name: iris-training-pipeline # Inputs: -# min_max_scaler: bool -# neighbors: int -# standard_scaler: bool +# neighbors: int [Default: 3.0] +# standard_scaler: bool [Default: True] +# Outputs: +# train-model-metrics: system.ClassificationMetrics components: comp-create-dataset: executorLabel: exec-create-dataset @@ -22,8 +23,6 @@ components: schemaTitle: system.Dataset schemaVersion: 0.0.1 parameters: - min_max_scaler: - parameterType: BOOLEAN standard_scaler: parameterType: BOOLEAN outputDefinitions: @@ -45,6 +44,10 @@ components: parameterType: NUMBER_INTEGER outputDefinitions: artifacts: + metrics: + artifactType: + schemaTitle: system.ClassificationMetrics + schemaVersion: 0.0.1 model: artifactType: schemaTitle: system.Model @@ -113,17 +116,13 @@ deploymentSpec: - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef normalize_dataset(\n input_iris_dataset: Input[Dataset],\n\ \ normalized_iris_dataset: Output[Dataset],\n standard_scaler: bool,\n\ - \ min_max_scaler: bool,\n):\n if standard_scaler is min_max_scaler:\n\ - \ raise ValueError(\n 'Exactly one of standard_scaler\ - \ or min_max_scaler must be True.')\n\n import pandas as pd\n from\ - \ sklearn.preprocessing import MinMaxScaler\n from sklearn.preprocessing\ - \ import StandardScaler\n\n with open(input_iris_dataset.path) as f:\n\ - \ df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n if standard_scaler:\n\ - \ scaler = StandardScaler()\n if min_max_scaler:\n scaler\ - \ = MinMaxScaler()\n\n df = pd.DataFrame(scaler.fit_transform(df))\n\ - \ df['Labels'] = labels\n normalized_iris_dataset.metadata['state']\ - \ = \"Normalized\"\n with open(normalized_iris_dataset.path, 'w') as\ - \ f:\n df.to_csv(f)\n\n" + ):\n\n import pandas as pd\n from sklearn.preprocessing import MinMaxScaler\n\ + \ from sklearn.preprocessing import StandardScaler\n\n with open(input_iris_dataset.path)\ + \ as f:\n df = pd.read_csv(f)\n labels = df.pop('Labels')\n\n\ + \ scaler = StandardScaler() if standard_scaler else MinMaxScaler()\n\n\ + \ df = pd.DataFrame(scaler.fit_transform(df))\n df['Labels'] = labels\n\ + \ normalized_iris_dataset.metadata['state'] = \"Normalized\"\n with\ + \ open(normalized_iris_dataset.path, 'w') as f:\n df.to_csv(f)\n\n" image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0 exec-train-model: container: @@ -153,20 +152,33 @@ deploymentSpec: ' - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ \ *\n\ndef train_model(\n normalized_iris_dataset: Input[Dataset],\n\ - \ model: Output[Model],\n n_neighbors: int,\n):\n import pickle\n\ - \n import pandas as pd\n from sklearn.model_selection import train_test_split\n\ - \ from sklearn.neighbors import KNeighborsClassifier\n\n with open(normalized_iris_dataset.path)\ + \ model: Output[Model],\n metrics: Output[ClassificationMetrics],\n\ + \ n_neighbors: int,\n):\n import pickle\n\n import pandas as pd\n\ + \ from sklearn.model_selection import train_test_split\n from sklearn.neighbors\ + \ import KNeighborsClassifier\n\n from sklearn.metrics import roc_curve\n\ + \ from sklearn.model_selection import train_test_split, cross_val_predict\n\ + \ from sklearn.metrics import confusion_matrix\n\n\n with open(normalized_iris_dataset.path)\ \ as f:\n df = pd.read_csv(f)\n\n y = df.pop('Labels')\n X\ \ = df\n\n X_train, X_test, y_train, y_test = train_test_split(X, y,\ \ random_state=0)\n\n clf = KNeighborsClassifier(n_neighbors=n_neighbors)\n\ - \ clf.fit(X_train, y_train)\n\n model.metadata['framework'] = 'scikit-learn'\n\ - \ with open(model.path, 'wb') as f:\n pickle.dump(clf, f)\n\n" - + \ clf.fit(X_train, y_train)\n\n predictions = cross_val_predict(\n\ + \ clf, X_train, y_train, cv=3)\n metrics.log_confusion_matrix(\n\ + \ ['Iris-Setosa', 'Iris-Versicolour', 'Iris-Virginica'],\n \ + \ confusion_matrix(\n y_train,\n predictions).tolist()\ + \ # .tolist() to convert np array to list.\n )\n\n model.metadata['framework']\ + \ = 'scikit-learn'\n with open(model.path, 'wb') as f:\n pickle.dump(clf,\ + \ f)\n\n" image: quay.io/opendatahub/ds-pipelines-sample-base:v1.0 pipelineInfo: name: iris-training-pipeline root: dag: + outputs: + artifacts: + train-model-metrics: + artifactSelectors: + - outputArtifactKey: metrics + producerSubtask: train-model tasks: create-dataset: cachingOptions: @@ -189,9 +201,6 @@ root: outputArtifactKey: iris_dataset producerTask: create-dataset parameters: - min_max_scaler: - runtimeValue: - constant: false standard_scaler: runtimeValue: constant: true @@ -217,11 +226,19 @@ root: name: train-model inputDefinitions: parameters: - min_max_scaler: - parameterType: BOOLEAN neighbors: + defaultValue: 3.0 + isOptional: true parameterType: NUMBER_INTEGER standard_scaler: + defaultValue: true + isOptional: true parameterType: BOOLEAN + outputDefinitions: + artifacts: + train-model-metrics: + artifactType: + schemaTitle: system.ClassificationMetrics + schemaVersion: 0.0.1 schemaVersion: 2.1.0 sdkVersion: kfp-2.7.0