From 849ca97104c93c9da16f3e1771243133a0cca30c Mon Sep 17 00:00:00 2001
From: Kristian Flikka <kristian.flikka@gmail.com>
Date: Tue, 23 Jun 2020 08:20:10 +0200
Subject: [PATCH] Add logging of model as artifact (#23)

* Add logging of model as artifact

* Monkey-patch the models with log model according to type
---
 flass/cli.py     | 33 +++++++++++++++++++++++++++------
 flass/model.py   | 14 ++++++++------
 requirements.txt |  1 +
 setup.py         |  5 ++---
 4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/flass/cli.py b/flass/cli.py
index d69db24..8dc2718 100644
--- a/flass/cli.py
+++ b/flass/cli.py
@@ -7,10 +7,14 @@
 
 import matplotlib.pyplot as plt
 import mlflow
+import mlflow.pyfunc
+import mlflow.keras
+import mlflow.exceptions
+
 import numpy as np
+
 from skimage.segmentation import mark_boundaries
 from sklearn.metrics import roc_auc_score, classification_report
-
 from flass.model import train, get_data, plot_incorrect
 
 logging.basicConfig(
@@ -36,19 +40,22 @@
 @click.option("--model-type", required=False, default="kerasconv")
 @click.option("--subset", required=False, default=-1)
 @click.option("--lime/--no-lime", default=False)
+@click.option("--run-name", required=False)
 @click.command()
-def flass(plot, batch_size, epochs, dataset, model_type, subset, lime):
+def flass(plot, batch_size, epochs, dataset, model_type, subset, lime, run_name):
     logger.info("Obtaining data")
     data, class_names = get_data(dataset, subset)
 
     (x_train, y_train), (x_test, y_test) = data
 
-    with mlflow.start_run(run_name=dataset):
+    with mlflow.start_run(run_name=run_name):
+        logger.info(f"Artifact URI: {mlflow.get_artifact_uri()}")
+        logger.info(f"Tracking URI: {mlflow.get_tracking_uri()}")
         mlflow.log_param("batch_size", batch_size)
         mlflow.log_param("epochs", epochs)
         mlflow.log_param("num_train_instances", len(x_train))
         mlflow.log_param("ml_method", model_type)
-        trained_pipeline = train(
+        trained_model = train(
             x_train,
             y_train,
             batch_size=batch_size,
@@ -56,13 +63,27 @@ def flass(plot, batch_size, epochs, dataset, model_type, subset, lime):
             model_type=model_type,
         )
 
+        trained_model.log_model(trained_model, "saved-model")
+        model_location = mlflow.get_artifact_uri("saved-model")
+        logger.info(f"Loading model from: {model_location}")
+
+        # The pyfunc flavour of Keras seems to require a Pandas dataframe, but the
+        # Keras flavour (as used when using mlflow.keras) appears to accept the higher
+        # dimensioned numpy input
+        try:
+            loaded_model = mlflow.keras.load_model(model_location)
+            logger.info("Loaded MLFlow model with keras flavour")
+        except mlflow.exceptions.MlflowException:
+            loaded_model = mlflow.pyfunc.load_model(model_location)
+            logger.info("Loaded MLFlow model with pyfunc flavour")
+
         if lime:
             # Do a LIME
             samples = random.sample(range(0, len(x_test)), 10)
             for i in samples:
-                limeify(x_test[i], trained_pipeline, class_names)
+                limeify(x_test[i], loaded_model, class_names)
 
-        predicted_y_probabilities = trained_pipeline.predict(x_test)
+        predicted_y_probabilities = loaded_model.predict(x_test)
         roc_auc = roc_auc_score(y_test, predicted_y_probabilities, multi_class="ovr")
         mlflow.log_metric("AUC", roc_auc)
         logger.info(f"AUC: {roc_auc}")
diff --git a/flass/model.py b/flass/model.py
index 60c0ae2..15e617f 100755
--- a/flass/model.py
+++ b/flass/model.py
@@ -2,12 +2,15 @@
 import os
 
 import matplotlib.pyplot as plt
+import mlflow.keras
+import mlflow.sklearn
 import numpy as np
-import tensorflow as tf
+
 from skimage.color import gray2rgb
 from sklearn.pipeline import Pipeline
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn import svm
+import tensorflow as tf
 
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 logger = logging.getLogger(__name__)
@@ -131,17 +134,16 @@ def train(x, y, batch_size, epochs, model_type):
     if model_type == "kerasconv":
         model = conv_model()
         model.summary()
-        pipeline_steps = [("model", model)]
-        full_pipeline = Pipeline(steps=pipeline_steps)
-        full_pipeline.fit(x, y, model__batch_size=batch_size, model__epochs=epochs)
-        return full_pipeline
-
+        model.fit(x, y, batch_size=batch_size, epochs=epochs)
+        model.log_model = mlflow.keras.log_model
+        return model
     elif model_type == "svm":
         pipeline_steps = [("image_flattener", ImageFlattener())]
         model = svm_model()
         pipeline_steps.append(("model", model))
         full_pipeline = Pipeline(steps=pipeline_steps)
         full_pipeline.fit(x, y)
+        full_pipeline.log_model = mlflow.sklearn.log_model
         return full_pipeline
 
 
diff --git a/requirements.txt b/requirements.txt
index 0d4ed64..cf84f47 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+azure-storage-blob
 click
 graphviz
 matplotlib
diff --git a/setup.py b/setup.py
index 5d9150d..751a4b0 100644
--- a/setup.py
+++ b/setup.py
@@ -9,6 +9,7 @@
         "Programming Language :: Python :: 3.8",
     ],
     install_requires=[
+        "azure-storage-blob",
         "click",
         "graphviz",
         "matplotlib",
@@ -21,9 +22,7 @@
         "tensorflow",
     ],
     setup_requires=["wheel", "setuptools"],
-    extras_require={
-        "lime": ["lime"]
-    },
+    extras_require={"lime": ["lime"]},
     description="Train Keras Convolutional Neural Network for image classification",
     long_description="Train Keras Convolutional Neural Network for image classification",
     entry_points={"console_scripts": ["flass=flass.cli:flass"]},