Skip to content

Commit

Permalink
Merge pull request #8 from ulivc/mlflow
Browse files Browse the repository at this point in the history
Mlflow
  • Loading branch information
stroblme authored Aug 7, 2023
2 parents 3948c09 + 1d47e20 commit c7a3bef
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 82 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ conf/**/*credentials*
# ignore everything in the following folders
data/**
logs/**
mlruns/**
src/test_1optimizer.py
src/test_2optimizer.py
# except their sub-folders
Expand Down
31 changes: 14 additions & 17 deletions conf/base/catalog.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,26 @@
# Documentation for this file format can be found in "The Data Catalog"
# Link: https://docs.kedro.org/en/stable/data/data_catalog.html

data_science.model_tracking:
type: tracking.JSONDataSet
filepath: data/09_tracking/model_history.json

data_science.model:
type: split_optimizer.helpers.dataset.TorchLocalModel
filepath: data/06_models/model.pt
model: model

data_science.test_tracking:
type: tracking.JSONDataSet
filepath: data/09_tracking/test_output.json
data_science.metrics:
type: kedro_mlflow.io.metrics.MlflowMetricsDataSet
prefix: metrics

data_science.loss_curve:
type: kedro.extras.datasets.plotly.JSONDataSet
filepath: data/08_reporting/loss_curve.json
versioned: True
type: kedro_mlflow.io.artifacts.MlflowArtifactDataSet
data_set:
type: kedro.extras.datasets.plotly.JSONDataSet
filepath: data/08_reporting/loss_curve.json


data_science.confusionmatrix:
type: kedro.extras.datasets.plotly.JSONDataSet
filepath: data/08_reporting/confusionmatrix.json
versioned: True

data_science.params_tracking:
type: tracking.JSONDataSet
filepath: data/09_tracking/params.json
type: kedro_mlflow.io.artifacts.MlflowArtifactDataSet
data_set:
type: kedro.extras.datasets.plotly.JSONDataSet
filepath: data/08_reporting/confusionmatrix.json


2 changes: 1 addition & 1 deletion conf/base/parameters/data_science.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
data_science:
number_of_qubits: 6
loss_func: MSELoss
epochs: 10
epochs: 2
learning_rate: 0.03
two_optimizers: True
TEST_SIZE: ${TEST_SIZE}
Expand Down
4 changes: 3 additions & 1 deletion src/split_optimizer/pipeline_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ def register_pipelines() -> dict[str, Pipeline]:
A mapping from pipeline names to ``Pipeline`` objects.
"""
data_processing_pipeline = data_processing.create_pipeline()
data_science_pipeline = data_science.create_pipeline()
data_science_pipeline = data_science.create_training_pipeline()

return {
"debug_pipeline": data_processing_pipeline + data_science_pipeline,
"data_processing_pipeline": data_processing_pipeline,
"data_science_pipeline": data_science_pipeline,
"__default__": data_processing_pipeline + data_science_pipeline,
Expand Down
4 changes: 2 additions & 2 deletions src/split_optimizer/pipelines/data_science/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
generated using Kedro 0.18.1
"""

from .pipeline import create_pipeline
from .pipeline import create_training_pipeline

__all__ = ["create_pipeline"]
__all__ = ["create_training_pipeline"]

__version__ = "0.1"
80 changes: 42 additions & 38 deletions src/split_optimizer/pipelines/data_science/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from typing import Any, Dict, List, Tuple
import plotly.express as px
from .optimizer import Split_optimizer
import mlflow

# epochs: int, TRAINING_SIZE: int, dataset: list[np.ndarray]
from torch.utils.data.dataloader import DataLoader
Expand All @@ -25,7 +26,6 @@ def train_model(
train_dataloader: DataLoader,
test_dataloader: DataLoader,
) -> Dict:


model = Net()
if loss_func == "MSELoss":
Expand All @@ -34,8 +34,7 @@ def train_model(
if two_optimizers:
optimizer = Split_optimizer(model, learning_rate)
else:
optimizer = optim.Adam(model.parameters(), learning_rate )

optimizer = optim.Adam(model.parameters(), learning_rate)

train_loss_list = []
val_loss_list = []
Expand All @@ -48,7 +47,6 @@ def train_model(
loss.backward()
optimizer.step()
total_loss.append(loss.item())

train_loss_list.append(sum(total_loss) / len(total_loss))
print(
"Training [{:.0f}%]\tLoss: {:.4f}".format(
Expand All @@ -68,9 +66,11 @@ def train_model(
val_loss_list.append(np.mean(epoch_loss))

model_history = {"train_loss_list": train_loss_list, "val_loss_list": val_loss_list}
model_tracking = model_history

return {"model": model, "model_history": model_history, "model_tracking":model_tracking}

return {
"model": model,
"model_history": model_history,
}


def test_model(
Expand All @@ -86,18 +86,18 @@ def test_model(
predictions_onehot = []
for data, target in test_dataloader:
output = model(data)

predictions_onehot.append(output)

for i in output:
pred = i.argmax()
if pred == target.argmax():
correct += 1

correct += 1

loss = calculate_loss(output, target)
test_loss.append(loss.item())

accuracy = correct / TEST_SIZE
accuracy = correct / TEST_SIZE
average_test_loss = sum(test_loss) / len(test_loss)

print(
Expand All @@ -115,9 +115,32 @@ def test_model(
"accuracy": accuracy,
"pred": label_predictions,
}
test_tracking = test_output

return test_output, test_tracking
return {"test_output":test_output}


def mlflow_tracking(model_history, test_output):
train_loss = []
for i, e in enumerate(model_history["train_loss_list"]):
train_loss.append({"value": e, "step": i})

val_loss = []
for i, e in enumerate(model_history["val_loss_list"]):
val_loss.append({"value": e, "step": i})

predictions =[]
for i, e in enumerate(test_output["pred"]):
predictions.append({"value": e, "step": i})

metrics = {
"train_loss": train_loss,
"val_loss": val_loss,
"predictions": predictions,
"average_test_loss": {"value": test_output["average_test_loss"], "step": 1},
"accuracy": {"value": test_output["accuracy"], "step": 1},
}

return {"metrics":metrics}


def plot_loss(model_history: dict) -> plt.figure:
Expand All @@ -143,16 +166,16 @@ def plot_loss(model_history: dict) -> plt.figure:
plt.update_layout(
title="Training and Validation Loss", xaxis_title="Epochs", yaxis_title="Loss"
)

return plt
mlflow.log_figure(plt, "loss_curve.html")
return {"loss_curve":plt}


def plot_confusionmatrix(test_output: dict, test_dataloader: DataLoader):
test_labels_onehot=[]

test_labels_onehot = []
for _, target in test_dataloader:
test_labels_onehot.append(target)

test_labels = []
for i in test_labels_onehot:
test_labels.append(np.argmax(i).item())
Expand All @@ -176,24 +199,5 @@ def plot_confusionmatrix(test_output: dict, test_dataloader: DataLoader):
xaxis_title="Real Label",
yaxis_title="Predicted Label",
)
return fig

def parameter_tracking(
epochs:int,
learning_rate:float,
loss_func:str,
TRAINING_SIZE:int,
TEST_SIZE:int,
number_of_qubits:int,
two_optimizer:bool
):
params_tracking = {
"epochs":epochs,
"learning_rate":learning_rate,
"loss_func":loss_func,
"training_size":TRAINING_SIZE,
"test_size":TEST_SIZE,
"number_of_qubits":number_of_qubits,
"two optimizer": two_optimizer
}
return params_tracking
mlflow.log_figure(fig, "confusion_matrix.html")
return {"confusionmatrix":fig}
26 changes: 8 additions & 18 deletions src/split_optimizer/pipelines/data_science/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
test_model,
plot_loss,
plot_confusionmatrix,
parameter_tracking,
mlflow_tracking,
)


def create_pipeline(**kwargs) -> Pipeline:
def create_training_pipeline(**kwargs) -> Pipeline:
return pipeline(
[
node(
Expand All @@ -26,7 +26,6 @@ def create_pipeline(**kwargs) -> Pipeline:
outputs={
"model": "model",
"model_history": "model_history",
"model_tracking": "model_tracking",
},
name="train_model",
),
Expand All @@ -38,28 +37,19 @@ def create_pipeline(**kwargs) -> Pipeline:
"params:TEST_SIZE",
"test_dataloader",
],
outputs=["test_output", "test_tracking"],
outputs={"test_output":"test_output"},
name="test_model",
),
node(plot_loss, inputs="model_history", outputs="loss_curve"),
node(plot_loss, inputs="model_history", outputs={"loss_curve":"loss_curve"}),
node(
plot_confusionmatrix,
inputs=["test_output", "test_dataloader"],
outputs="confusionmatrix",
outputs={"confusionmatrix":"confusionmatrix"},
),
node(
parameter_tracking,
inputs=[
"params:epochs",
"params:learning_rate",
"params:loss_func",
"params:TRAINING_SIZE",
"params:TEST_SIZE",
"params:number_of_qubits"
"params:two_optimizers"
],
outputs="params_tracking",
name="parameter_tracking",
mlflow_tracking,
inputs=["model_history", "test_output"],
outputs={"metrics":"metrics"},
),
],
inputs={
Expand Down
5 changes: 0 additions & 5 deletions src/split_optimizer/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,8 @@
# from kedro.io import DataCatalog
# DATA_CATALOG_CLASS = DataCatalog

from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore
from pathlib import Path


SESSION_STORE_CLASS = SQLiteStore
SESSION_STORE_ARGS = {"path": str(Path(__file__).parents[2] / "data")}

from kedro.config import TemplatedConfigLoader
CONFIG_LOADER_CLASS = TemplatedConfigLoader
CONFIG_LOADER_ARGS = {
Expand Down

0 comments on commit c7a3bef

Please sign in to comment.