From 849ca97104c93c9da16f3e1771243133a0cca30c Mon Sep 17 00:00:00 2001 From: Kristian Flikka Date: Tue, 23 Jun 2020 08:20:10 +0200 Subject: [PATCH] Add logging of model as artifact (#23) * Add logging of model as artifact * Monkey-patch the models with log model according to type --- flass/cli.py | 33 +++++++++++++++++++++++++++------ flass/model.py | 14 ++++++++------ requirements.txt | 1 + setup.py | 5 ++--- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/flass/cli.py b/flass/cli.py index d69db24..8dc2718 100644 --- a/flass/cli.py +++ b/flass/cli.py @@ -7,10 +7,14 @@ import matplotlib.pyplot as plt import mlflow +import mlflow.pyfunc +import mlflow.keras +import mlflow.exceptions + import numpy as np + from skimage.segmentation import mark_boundaries from sklearn.metrics import roc_auc_score, classification_report - from flass.model import train, get_data, plot_incorrect logging.basicConfig( @@ -36,19 +40,22 @@ @click.option("--model-type", required=False, default="kerasconv") @click.option("--subset", required=False, default=-1) @click.option("--lime/--no-lime", default=False) +@click.option("--run-name", required=False) @click.command() -def flass(plot, batch_size, epochs, dataset, model_type, subset, lime): +def flass(plot, batch_size, epochs, dataset, model_type, subset, lime, run_name): logger.info("Obtaining data") data, class_names = get_data(dataset, subset) (x_train, y_train), (x_test, y_test) = data - with mlflow.start_run(run_name=dataset): + with mlflow.start_run(run_name=run_name): + logger.info(f"Artifact URI: {mlflow.get_artifact_uri()}") + logger.info(f"Tracking URI: {mlflow.get_tracking_uri()}") mlflow.log_param("batch_size", batch_size) mlflow.log_param("epochs", epochs) mlflow.log_param("num_train_instances", len(x_train)) mlflow.log_param("ml_method", model_type) - trained_pipeline = train( + trained_model = train( x_train, y_train, batch_size=batch_size, @@ -56,13 +63,27 @@ def flass(plot, batch_size, epochs, dataset, model_type, subset, lime): model_type=model_type, ) + trained_model.log_model(trained_model, "saved-model") + model_location = mlflow.get_artifact_uri("saved-model") + logger.info(f"Loading model from: {model_location}") + + # The pyfunc flavour of Keras seems to require a Pandas dataframe, but the + # Keras flavour (as used when using mlflow.keras) appears to accept the higher + # dimensioned numpy input + try: + loaded_model = mlflow.keras.load_model(model_location) + logger.info("Loaded MLFlow model with keras flavour") + except mlflow.exceptions.MlflowException: + loaded_model = mlflow.pyfunc.load_model(model_location) + logger.info("Loaded MLFlow model with pyfunc flavour") + if lime: # Do a LIME samples = random.sample(range(0, len(x_test)), 10) for i in samples: - limeify(x_test[i], trained_pipeline, class_names) + limeify(x_test[i], loaded_model, class_names) - predicted_y_probabilities = trained_pipeline.predict(x_test) + predicted_y_probabilities = loaded_model.predict(x_test) roc_auc = roc_auc_score(y_test, predicted_y_probabilities, multi_class="ovr") mlflow.log_metric("AUC", roc_auc) logger.info(f"AUC: {roc_auc}") diff --git a/flass/model.py b/flass/model.py index 60c0ae2..15e617f 100755 --- a/flass/model.py +++ b/flass/model.py @@ -2,12 +2,15 @@ import os import matplotlib.pyplot as plt +import mlflow.keras +import mlflow.sklearn import numpy as np -import tensorflow as tf + from skimage.color import gray2rgb from sklearn.pipeline import Pipeline from sklearn.base import BaseEstimator, TransformerMixin from sklearn import svm +import tensorflow as tf os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" logger = logging.getLogger(__name__) @@ -131,17 +134,16 @@ def train(x, y, batch_size, epochs, model_type): if model_type == "kerasconv": model = conv_model() model.summary() - pipeline_steps = [("model", model)] - full_pipeline = Pipeline(steps=pipeline_steps) - full_pipeline.fit(x, y, model__batch_size=batch_size, model__epochs=epochs) - return full_pipeline - + model.fit(x, y, batch_size=batch_size, epochs=epochs) + model.log_model = mlflow.keras.log_model + return model elif model_type == "svm": pipeline_steps = [("image_flattener", ImageFlattener())] model = svm_model() pipeline_steps.append(("model", model)) full_pipeline = Pipeline(steps=pipeline_steps) full_pipeline.fit(x, y) + full_pipeline.log_model = mlflow.sklearn.log_model return full_pipeline diff --git a/requirements.txt b/requirements.txt index 0d4ed64..cf84f47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +azure-storage-blob click graphviz matplotlib diff --git a/setup.py b/setup.py index 5d9150d..751a4b0 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ "Programming Language :: Python :: 3.8", ], install_requires=[ + "azure-storage-blob", "click", "graphviz", "matplotlib", @@ -21,9 +22,7 @@ "tensorflow", ], setup_requires=["wheel", "setuptools"], - extras_require={ - "lime": ["lime"] - }, + extras_require={"lime": ["lime"]}, description="Train Keras Convolutional Neural Network for image classification", long_description="Train Keras Convolutional Neural Network for image classification", entry_points={"console_scripts": ["flass=flass.cli:flass"]},