add example (broken)

beacon-biosignals · ericphanson · Jul 8, 2021 · Jul 6, 2021 · Jul 6, 2021 · Jul 6, 2021
commit fd9167e410e941d95d27765cd03c8f04bbb784d3
diff --git a/.gitignore b/.gitignore
@@ -1 +1 @@
-/Manifest.toml
+Manifest.toml
diff --git a/Project.toml b/Project.toml
@@ -16,7 +16,10 @@ julia = "1.5"
 
 [extras]
 Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Flux"]
+test = ["Test", "Flux", "MLDatasets", "Statistics", "Random"]
diff --git a/README.md b/README.md
@@ -47,6 +47,8 @@ model_row.loss # 0.5
 We can make use of the `architecture_version` column to specify a version number for the architectures, in order
 to keep track of for which architectures the weights are valid for.
 
+See [examples/digits.jl](examples/digits.jl) for a larger example.
+
 ## `LegolasFlux.ModelRow`
 
 A `LegolasFlux.ModelRow` is the central object of LegolasFlux. It acts as a Tables.jl-compatible row that can store the weights
@@ -78,4 +80,5 @@ one might name files produced by this row as e.g. `training_run.digits.model.arr
 Note in this example the schema is called `digits.model` instead of just say `digits`, since the package Digits might want to
 create other Legolas schemas as well at some point.
 
-Check out the [Legolas.jl](https://github.com/beacon-biosignals/Legolas.jl/) repo to see more about how its extensible schema system works.
+Check out the [Legolas.jl](https://github.com/beacon-biosignals/Legolas.jl/) repo to see more about how its extensible schema system works,
+and the example at [examples/digits.jl](examples/digits.jl).
diff --git a/examples/Project.toml b/examples/Project.toml
@@ -0,0 +1,6 @@
+[deps]
+Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
+Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
+LegolasFlux = "eb5f792d-d1b1-4535-bae3-d5649ec7daa4"
+MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
diff --git a/examples/digits.jl b/examples/digits.jl
@@ -0,0 +1,104 @@
+# modified from
+# https://discourse.julialang.org/t/how-to-drop-the-dropout-layers-in-flux-jl-when-assessing-model-performance/19924
+
+using Flux, Statistics, Random, Test
+using MLDatasets: MNIST
+using Flux: onehotbatch, onecold, crossentropy, throttle
+using Base.Iterators: repeated, partition
+using Legolas, LegolasFlux
+
+Base.@kwdef struct DigitsConfig
+    seed::Int = 5
+    dropout_rate::Float32 = 0f1
+end
+
+struct DigitsModel
+    chain::Chain
+    config::DigitsConfig
+end
+
+Flux.@functor DigitsModel (chain,)
+
+function DigitsModel(config::DigitsConfig = DigitsConfig())
+    dropout_rate = config.dropout_rate
+    Random.seed!(config.seed)
+    chain = Chain(
+        Dropout(dropout_rate),
+        Conv((3, 3), 1=>32, relu),
+        BatchNorm(32, relu),
+        x -> maxpool(x, (2,2)),
+        Dropout(dropout_rate),
+        Conv((3, 3), 32=>16, relu),
+        Dropout(dropout_rate),
+        x -> maxpool(x, (2,2)),
+        Dropout(dropout_rate),
+        Conv((3, 3), 16=>10, relu),
+        Dropout(dropout_rate),
+        x -> reshape(x, :, size(x, 4)),
+        Dropout(dropout_rate),
+        Dense(90, 10), softmax)
+    return DigitsModel(chain, config)
+end
+
+(m::DigitsModel)(x) = m.chain(x)
+
+const DigitsRow = Legolas.@row("digits.model@1" > "legolas-flux.model@1",
+                               config::DigitsConfig,
+                               epoch::Union{Missing, Int},
+                               accuracy::Union{Missing, Float32})
+
+function DigitsRow(model::DigitsModel; epoch=missing, accuracy=missing)
+    weights = collect(params(model))
+    return DigitsRow(; weights, model.config, epoch, accuracy)
+end
+
+function DigitsModel(row)
+    m = DigitsModel(row.config)
+    Flux.loadparams!(m, collect(row.weights))
+    return m
+end
+
+N_train = 10_000
+N_test = 500
+
+train_x, train_y = MNIST.traindata(Float32, 1:N_train)
+test_x,  test_y  = MNIST.testdata(Float32, 1:N_test)
+
+# Partition into batches of size 32
+batch_size = 32
+train = [(reshape(train_x[:, :, I], 28, 28, 1, :), onehotbatch(train_y[I], 0:9))
+         for I in partition(1:N_train, batch_size)]
+
+tX = reshape(test_x, 28, 28, 1, :)
+tY = onehotbatch(test_y, 0:9)
+
+function accuracy(m, x, y)
+    testmode!(m)
+    val = mean(onecold(m(x)) .== onecold(y))
+    trainmode!(m)
+    return val
+end
+
+function train_model!(m)
+    loss = (x, y) -> crossentropy(m(x), y)
+    opt = ADAM()
+    evalcb = throttle(() -> @show(accuracy(m, tX, tY)), 5)
+    Flux.@epochs 1 Flux.train!(loss, params(m), train, opt, cb = evalcb)
+    return accuracy(m, tX, tY)
+end
+
+m = DigitsModel()
+acc = train_model!(m)
+
+row = DigitsRow(m; epoch=1, accuracy=acc)
+
+testmode!(m)
+input = tX[:, :, :, 1:1]
+output = m(input)
+label = tY[:, 1]
+
+m2 = DigitsModel(row)
+testmode!(m2)
+output2 = m2(input)
+
+@test_broken output ≈ output2
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -43,3 +43,7 @@ end
     tbl = [(; weights = w)]
     @test Arrow.Table(Arrow.tobuffer(tbl)).weights[1] == w
 end
+
+@testset "Example" begin
+    include("../examples/digits.jl")
+end