Skip to content

Experiments example

Paweł Redzyński edited this page Jan 4, 2021 · 4 revisions

Showcase for dvc experiments use case

Scenario #1: Baseline model, modify parameters and observe training results

Training code (train.py):

import numpy as np
from tensorflow import keras
from tensorflow.keras import layers

import dvclive
from dvclive.keras import DvcLiveCallback

num_classes = 10
input_shape = (28, 28, 1)

def get_params():
    from ruamel.yaml import YAML
    yaml = YAML(typ="safe")
    params_path = "params.yaml"
    with open(params_path, "r") as fd:
        return yaml.load(fd)

def get_data():
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

    x_train = x_train[:10000,:,:]
    y_train = y_train[:10000]

    x_train = x_train.astype("float32") / 255
    x_test = x_test.astype("float32") / 255
    x_train = np.expand_dims(x_train, -1)
    x_test = np.expand_dims(x_test, -1)

    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)

    return (x_train, y_train), (x_test, y_test)

def get_model(optimizer="sgd"):
    model= keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(16, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Conv2D(32, kernel_size=(3, 3), activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        layers.Flatten(),
        layers.Dropout(0.5),
        layers.Dense(num_classes, activation="softmax"),
    ])
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

if __name__ == "__main__":
    params = get_params()
    model =get_model(params["optimizer"])
    (x_train, y_train), (x_test, y_test) = get_data()

    dvclive.init("logs")
    model.fit(x_train, y_train, batch_size=params["batch_size"], epochs=5, validation_data=(x_test, y_test), callbacks=DvcLiveCallback())

params.yaml

optimizer: sgd
batch_size: 64
  1. Copy training code and params into your test folder
  2. git init && dvc init && git add -A && git commit -am "initial commit"
  3. echo "logs.html" >> .gitignore
  4. dvc run -n train -d train.py --params batch_size,optimizer --plots-no-cache logs/ --metrics-no-cache logs.json python train.py
  5. git add -A && git commit -am "run baseline"
  6. dvc exp run train --params optimizer=adam
  7. dvc exp run train --params optimizer=adamax
  8. show experiments results: dvc exp show --sha
  9. use experiments sha to compare training: dvc plots diff master {sha1} {sha2} ( you can also use dvc live logs --rev master {sha1} {sha2})

asciicast