From a8b336e74c629a3b77392139208b388f8c49bb0d Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Tue, 23 Apr 2024 13:17:55 +0200 Subject: [PATCH 1/4] add NeuralNetworkBinaryClassifier --- src/classifier.jl | 26 ++++++++++++++++++++++++-- src/core.jl | 6 ++++++ src/types.jl | 9 +++++++-- 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/classifier.jl b/src/classifier.jl index ed9d4cf9..d7523fc5 100644 --- a/src/classifier.jl +++ b/src/classifier.jl @@ -14,14 +14,14 @@ function MLJFlux.shape(model::NeuralNetworkClassifier, X, y) end # builds the end-to-end Flux chain needed, given the `model` and `shape`: -MLJFlux.build(model::NeuralNetworkClassifier, rng, shape) = +MLJFlux.build(model::Union{NeuralNetworkClassifier, NeuralNetworkBinaryClassifier}, rng, shape) = Flux.Chain(build(model.builder, rng, shape...), model.finaliser) # returns the model `fitresult` (see "Adding Models for General Use" # section of the MLJ manual) which must always have the form `(chain, # metadata)`, where `metadata` is anything extra needed by `predict`: -MLJFlux.fitresult(model::NeuralNetworkClassifier, chain, y) = +MLJFlux.fitresult(model::Union{NeuralNetworkClassifier, NeuralNetworkBinaryClassifier}, chain, y) = (chain, MLJModelInterface.classes(y[1])) function MLJModelInterface.predict(model::NeuralNetworkClassifier, @@ -37,3 +37,25 @@ MLJModelInterface.metadata_model(NeuralNetworkClassifier, input=Union{AbstractMatrix{Continuous},Table(Continuous)}, target=AbstractVector{<:Finite}, path="MLJFlux.NeuralNetworkClassifier") + +#### Binary Classifier + +function MLJFlux.shape(model::NeuralNetworkBinaryClassifier, X, y) + X = X isa Matrix ? Tables.table(X) : X + n_input = Tables.schema(X).names |> length + return (n_input, 1) # n_output is always 1 for a binary classifier +end + +function MLJModelInterface.predict(model::NeuralNetworkBinaryClassifier, + fitresult, + Xnew) + chain, levels = fitresult + X = reformat(Xnew) + probs = vec(chain(X)) + return MLJModelInterface.UnivariateFinite(levels, probs; augment = true) +end + +MLJModelInterface.metadata_model(NeuralNetworkBinaryClassifier, + input=Union{AbstractMatrix{Continuous},Table(Continuous)}, + target=AbstractVector{<:Finite{2}}, + path="MLJFlux.NeuralNetworkBinaryClassifier") diff --git a/src/core.jl b/src/core.jl index cca5a145..b67d8aae 100644 --- a/src/core.jl +++ b/src/core.jl @@ -234,3 +234,9 @@ function collate(model, X, y) ymatrix = reformat(y) return [_get(Xmatrix, b) for b in row_batches], [_get(ymatrix, b) for b in row_batches] end +function collate(model::NeuralNetworkBinaryClassifier, X, y) + row_batches = Base.Iterators.partition(1:nrows(y), model.batch_size) + Xmatrix = reformat(X) + yvec = (y .== classes(y)[2])' # convert to boolean + return [_get(Xmatrix, b) for b in row_batches], [_get(yvec, b) for b in row_batches] +end diff --git a/src/types.jl b/src/types.jl index c608abbf..dc4fc939 100644 --- a/src/types.jl +++ b/src/types.jl @@ -3,10 +3,15 @@ abstract type MLJFluxDeterministic <: MLJModelInterface.Deterministic end const MLJFluxModel = Union{MLJFluxProbabilistic,MLJFluxDeterministic} -for Model in [:NeuralNetworkClassifier, :ImageClassifier] +for Model in [:NeuralNetworkClassifier, :NeuralNetworkBinaryClassifier, :ImageClassifier] + # default settings that are not equal across models default_builder_ex = Model == :ImageClassifier ? :(image_builder(VGGHack)) : Short() + default_finaliser = + Model == :NeuralNetworkBinaryClassifier ? Flux.σ : Flux.softmax + default_loss = + Model == :NeuralNetworkBinaryClassifier ? Flux.binarycrossentropy : Flux.crossentropy ex = quote mutable struct $Model{B,F,O,L} <: MLJFluxProbabilistic @@ -23,7 +28,7 @@ for Model in [:NeuralNetworkClassifier, :ImageClassifier] acceleration::AbstractResource # eg, `CPU1()` or `CUDALibs()` end - function $Model(; builder::B=$default_builder_ex, finaliser::F=Flux.softmax, optimiser::O=Flux.Optimise.Adam(), loss::L=Flux.crossentropy, epochs=10, batch_size=1, lambda=0, alpha=0, rng=Random.GLOBAL_RNG, optimiser_changes_trigger_retraining=false, acceleration=CPU1() + function $Model(; builder::B=$default_builder_ex, finaliser::F=$default_finaliser, optimiser::O=Flux.Optimise.Adam(), loss::L=$default_loss, epochs=10, batch_size=1, lambda=0, alpha=0, rng=Random.GLOBAL_RNG, optimiser_changes_trigger_retraining=false, acceleration=CPU1() ) where {B,F,O,L} model = $Model{B,F,O,L}(builder, finaliser, optimiser, loss, epochs, batch_size, lambda, alpha, rng, optimiser_changes_trigger_retraining, acceleration From 5842ed9d03161a0663abcbdb7a123a5d960870c3 Mon Sep 17 00:00:00 2001 From: tiemvanderdeure Date: Thu, 30 May 2024 16:56:02 +0200 Subject: [PATCH 2/4] add some docs for the binary classifier --- docs/src/interface/Classification.md | 4 + docs/src/interface/Summary.md | 1 + src/MLJFlux.jl | 2 +- src/types.jl | 193 ++++++++++++++++++++++++++- 4 files changed, 198 insertions(+), 2 deletions(-) diff --git a/docs/src/interface/Classification.md b/docs/src/interface/Classification.md index 0491e8fc..d45d7a2b 100644 --- a/docs/src/interface/Classification.md +++ b/docs/src/interface/Classification.md @@ -1,3 +1,7 @@ ```@docs MLJFlux.NeuralNetworkClassifier +``` + +```@docs +MLJFlux.NeuralNetworkBinaryClassifier ``` \ No newline at end of file diff --git a/docs/src/interface/Summary.md b/docs/src/interface/Summary.md index ecff99d5..a8f7b383 100644 --- a/docs/src/interface/Summary.md +++ b/docs/src/interface/Summary.md @@ -12,6 +12,7 @@ Model Type | Prediction type | `scitype(X) <: _` | `scitype(y) <: _` `NeuralNetworkRegressor` | `Deterministic` | `Table(Continuous)` with `n_in` columns | `AbstractVector{<:Continuous)` (`n_out = 1`) `MultitargetNeuralNetworkRegressor` | `Deterministic` | `Table(Continuous)` with `n_in` columns | `<: Table(Continuous)` with `n_out` columns `NeuralNetworkClassifier` | `Probabilistic` | `<:Table(Continuous)` with `n_in` columns | `AbstractVector{<:Finite}` with `n_out` classes +`NeuralNetworkBinaryClassifier` | `Probabilistic` | `<:Table(Continuous)` with `n_in` columns | `AbstractVector{<:Finite{2}}` (`n_out = 2`) `ImageClassifier` | `Probabilistic` | `AbstractVector(<:Image{W,H})` with `n_in = (W, H)` | `AbstractVector{<:Finite}` with `n_out` classes diff --git a/src/MLJFlux.jl b/src/MLJFlux.jl index 5091d798..bd6011eb 100644 --- a/src/MLJFlux.jl +++ b/src/MLJFlux.jl @@ -29,7 +29,7 @@ include("image.jl") include("mlj_model_interface.jl") export NeuralNetworkRegressor, MultitargetNeuralNetworkRegressor -export NeuralNetworkClassifier, ImageClassifier +export NeuralNetworkClassifier, NeuralNetworkBinaryClassifier, ImageClassifier export CUDALibs, CPU1 diff --git a/src/types.jl b/src/types.jl index dc4fc939..3bc7ce05 100644 --- a/src/types.jl +++ b/src/types.jl @@ -282,11 +282,202 @@ plot(curve.parameter_values, ``` -See also [`ImageClassifier`](@ref). +See also [`ImageClassifier`](@ref), [`NeuralNetworkBinaryClassifier`](@ref). """ NeuralNetworkClassifier +""" +$(MMI.doc_header(NeuralNetworkBinaryClassifier)) + +`NeuralNetworkBinaryClassifier` is for training a data-dependent Flux.jl neural network +for making probabilistic predictions of a binary (`Multiclass{2}` or `OrderedFactor{2}`) target, +given a table of `Continuous` features. Users provide a recipe for constructing + the network, based on properties of the data that is encountered, by specifying + an appropriate `builder`. See MLJFlux documentation for more on builders. + +# Training data + +In MLJ or MLJBase, bind an instance `model` to data with + + mach = machine(model, X, y) + +Here: + +- `X` is either a `Matrix` or any table of input features (eg, a `DataFrame`) whose columns are of scitype + `Continuous`; check column scitypes with `schema(X)`. If `X` is a `Matrix`, + it is assumed to have columns corresponding to features and rows corresponding to observations. + +- `y` is the target, which can be any `AbstractVector` whose element scitype is `Multiclass{2}` + or `OrderedFactor{2}`; check the scitype with `scitype(y)` + +Train the machine with `fit!(mach, rows=...)`. + + +# Hyper-parameters + +- `builder=MLJFlux.Short()`: An MLJFlux builder that constructs a neural network. Possible + `builders` include: `MLJFlux.Linear`, `MLJFlux.Short`, and `MLJFlux.MLP`. See + MLJFlux.jl documentation for examples of user-defined builders. See also `finaliser` + below. + +- `optimiser::Flux.Adam()`: A `Flux.Optimise` optimiser. The optimiser performs the + updating of the weights of the network. For further reference, see [the Flux optimiser + documentation](https://fluxml.ai/Flux.jl/stable/training/optimisers/). To choose a + learning rate (the update rate of the optimizer), a good rule of thumb is to start out + at `10e-3`, and tune using powers of 10 between `1` and `1e-7`. + +- `loss=Flux.binarycrossentropy`: The loss function which the network will optimize. Should be a + function which can be called in the form `loss(yhat, y)`. Possible loss functions are + listed in [the Flux loss function + documentation](https://fluxml.ai/Flux.jl/stable/models/losses/). For a classification + task, the most natural loss functions are: + + - `Flux.binarycrossentropy`: Standard binary classification loss, also known as the log + loss. + + - `Flux.logitbinarycrossentropy`: Mathematically equal to crossentropy, but numerically more + stable than finalising the outputs with `σ` and then calculating + crossentropy. You will need to specify `finaliser=identity` to remove MLJFlux's + default sigmoid finaliser, and understand that the output of `predict` is then + unnormalized (no longer probabilistic). + + - `Flux.tversky_loss`: Used with imbalanced data to give more weight to false negatives. + + - `Flux.binary_focal_loss`: Used with highly imbalanced data. Weights harder examples more than + easier examples. + + Currently MLJ measures are not supported values of `loss`. + +- `epochs::Int=10`: The duration of training, in epochs. Typically, one epoch represents + one pass through the complete the training dataset. + +- `batch_size::int=1`: the batch size to be used for training, representing the number of + samples per update of the network weights. Typically, batch size is between 8 and + 512. Increassing batch size may accelerate training if `acceleration=CUDALibs()` and a + GPU is available. + +- `lambda::Float64=0`: The strength of the weight regularization penalty. Can be any value + in the range `[0, ∞)`. + +- `alpha::Float64=0`: The L2/L1 mix of regularization, in the range `[0, 1]`. A value of 0 + represents L2 regularization, and a value of 1 represents L1 regularization. + +- `rng::Union{AbstractRNG, Int64}`: The random number generator or seed used during + training. + +- `optimizer_changes_trigger_retraining::Bool=false`: Defines what happens when re-fitting + a machine if the associated optimiser has changed. If `true`, the associated machine + will retrain from scratch on `fit!` call, otherwise it will not. + +- `acceleration::AbstractResource=CPU1()`: Defines on what hardware training is done. For + Training on GPU, use `CUDALibs()`. + +- `finaliser=Flux.σ`: The final activation function of the neural network (applied + after the network defined by `builder`). Defaults to `Flux.σ`. + + +# Operations + +- `predict(mach, Xnew)`: return predictions of the target given new features `Xnew`, which + should have the same scitype as `X` above. Predictions are probabilistic but uncalibrated. + +- `predict_mode(mach, Xnew)`: Return the modes of the probabilistic predictions returned + above. + + +# Fitted parameters + +The fields of `fitted_params(mach)` are: + +- `chain`: The trained "chain" (Flux.jl model), namely the series of layers, + functions, and activations which make up the neural network. This includes + the final layer specified by `finaliser` (eg, `softmax`). + + +# Report + +The fields of `report(mach)` are: + +- `training_losses`: A vector of training losses (penalised if `lambda != 0`) in + historical order, of length `epochs + 1`. The first element is the pre-training loss. + +# Examples + +In this example we build a classification model using the Iris dataset. This is a very +basic example, using a default builder and no standardization. For a more advanced +illustration, see [`NeuralNetworkRegressor`](@ref) or [`ImageClassifier`](@ref), and +examples in the MLJFlux.jl documentation. + +```julia +using MLJ, Flux +import RDatasets +``` + +First, we can load the data: + +```julia +mtcars = RDatasets.dataset("datasets", "mtcars"); +y, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec])); # a vector and a table +y = categorical(y) # classifier takes catogorical input +X_f32 = Float32.(X) # To match floating point type of the neural network layers +NeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux +bclf = NeuralNetworkBinaryClassifier() +``` + +Next, we can train the model: + +```julia +mach = machine(bclf, X_f32, y) +fit!(mach) +``` + +We can train the model in an incremental fashion, altering the learning rate as we go, +provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also +change the number of (total) iterations: + +```julia +bclf.optimiser.eta = bclf.optimiser.eta * 2 +bclf.epochs = bclf.epochs + 5 + +fit!(mach, verbosity=2) # trains 5 more epochs +``` + +We can inspect the mean training loss using the `cross_entropy` function: + +```julia +training_loss = cross_entropy(predict(mach, X_f32), y) |> mean +``` + +And we can access the Flux chain (model) using `fitted_params`: + +```julia +chain = fitted_params(mach).chain +``` + +Finally, we can see how the out-of-sample performance changes over time, using MLJ's +`learning_curve` function: + +```julia +r = range(bclf, :epochs, lower=1, upper=200, scale=:log10) +curve = learning_curve(bclf, X_f32, y, + range=r, + resampling=Holdout(fraction_train=0.7), + measure=cross_entropy) +using Plots +plot(curve.parameter_values, + curve.measurements, + xlab=curve.parameter_name, + xscale=curve.parameter_scale, + ylab = "Cross Entropy") + +``` + +See also [`ImageClassifier`](@ref). + +""" +NeuralNetworkBinaryClassifier + """ $(MMI.doc_header(ImageClassifier)) From b7291f6eb226831500d131c0fe2c39d6203ac111 Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 11 Jun 2024 10:11:07 +1200 Subject: [PATCH 3/4] fix defaults for NNBinaryClassifier constructor --- src/classifier.jl | 51 ++++++++++++++++--------- src/types.jl | 4 +- test/classifier.jl | 93 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 125 insertions(+), 23 deletions(-) diff --git a/src/classifier.jl b/src/classifier.jl index d7523fc5..145eb019 100644 --- a/src/classifier.jl +++ b/src/classifier.jl @@ -3,7 +3,9 @@ """ shape(model::NeuralNetworkClassifier, X, y) -A private method that returns the shape of the input and output of the model for given data `X` and `y`. +A private method that returns the shape of the input and output of the model for given +data `X` and `y`. + """ function MLJFlux.shape(model::NeuralNetworkClassifier, X, y) X = X isa Matrix ? Tables.table(X) : X @@ -14,29 +16,38 @@ function MLJFlux.shape(model::NeuralNetworkClassifier, X, y) end # builds the end-to-end Flux chain needed, given the `model` and `shape`: -MLJFlux.build(model::Union{NeuralNetworkClassifier, NeuralNetworkBinaryClassifier}, rng, shape) = - Flux.Chain(build(model.builder, rng, shape...), - model.finaliser) +MLJFlux.build( + model::Union{NeuralNetworkClassifier, NeuralNetworkBinaryClassifier}, + rng, + shape, +) = Flux.Chain(build(model.builder, rng, shape...), model.finaliser) # returns the model `fitresult` (see "Adding Models for General Use" # section of the MLJ manual) which must always have the form `(chain, # metadata)`, where `metadata` is anything extra needed by `predict`: -MLJFlux.fitresult(model::Union{NeuralNetworkClassifier, NeuralNetworkBinaryClassifier}, chain, y) = - (chain, MLJModelInterface.classes(y[1])) +MLJFlux.fitresult( + model::Union{NeuralNetworkClassifier, NeuralNetworkBinaryClassifier}, + chain, + y, +) = (chain, MLJModelInterface.classes(y[1])) -function MLJModelInterface.predict(model::NeuralNetworkClassifier, +function MLJModelInterface.predict( + model::NeuralNetworkClassifier, fitresult, - Xnew) + Xnew, + ) chain, levels = fitresult X = reformat(Xnew) probs = vcat([chain(tomat(X[:, i]))' for i in 1:size(X, 2)]...) return MLJModelInterface.UnivariateFinite(levels, probs) end -MLJModelInterface.metadata_model(NeuralNetworkClassifier, - input=Union{AbstractMatrix{Continuous},Table(Continuous)}, - target=AbstractVector{<:Finite}, - path="MLJFlux.NeuralNetworkClassifier") +MLJModelInterface.metadata_model( + NeuralNetworkClassifier, + input_scitype=Union{AbstractMatrix{Continuous},Table(Continuous)}, + target_scitype=AbstractVector{<:Finite}, + load_path="MLJFlux.NeuralNetworkClassifier", +) #### Binary Classifier @@ -46,16 +57,20 @@ function MLJFlux.shape(model::NeuralNetworkBinaryClassifier, X, y) return (n_input, 1) # n_output is always 1 for a binary classifier end -function MLJModelInterface.predict(model::NeuralNetworkBinaryClassifier, +function MLJModelInterface.predict( + model::NeuralNetworkBinaryClassifier, fitresult, - Xnew) + Xnew, + ) chain, levels = fitresult X = reformat(Xnew) probs = vec(chain(X)) return MLJModelInterface.UnivariateFinite(levels, probs; augment = true) end -MLJModelInterface.metadata_model(NeuralNetworkBinaryClassifier, - input=Union{AbstractMatrix{Continuous},Table(Continuous)}, - target=AbstractVector{<:Finite{2}}, - path="MLJFlux.NeuralNetworkBinaryClassifier") +MLJModelInterface.metadata_model( + NeuralNetworkBinaryClassifier, + input_scitype=Union{AbstractMatrix{Continuous},Table(Continuous)}, + target_scitype=AbstractVector{<:Finite{2}}, + load_path="MLJFlux.NeuralNetworkBinaryClassifier", +) diff --git a/src/types.jl b/src/types.jl index d261ec41..89744c24 100644 --- a/src/types.jl +++ b/src/types.jl @@ -30,9 +30,9 @@ for Model in [:NeuralNetworkClassifier, :NeuralNetworkBinaryClassifier, :ImageCl function $Model( ;builder::B=$default_builder_ex, - finaliser::F=Flux.softmax, + finaliser::F=$default_finaliser, optimiser::O=Optimisers.Adam(), - loss::L=Flux.crossentropy, + loss::L=$default_loss, epochs=10, batch_size=1, lambda=0, diff --git a/test/classifier.jl b/test/classifier.jl index 81ca2023..ce167b39 100644 --- a/test/classifier.jl +++ b/test/classifier.jl @@ -1,4 +1,4 @@ -## NEURAL NETWORK CLASSIFIER +# # NEURAL NETWORK CLASSIFIER seed!(1234) N = 300 @@ -59,7 +59,7 @@ losses = [] end dist = MLJBase.UnivariateFinite(prob_given_class) loss_baseline = - StatisticalMeasures.cross_entropy(fill(dist, length(test)), y[test]) |> mean + StatisticalMeasures.cross_entropy(fill(dist, length(test)), y[test]) # check flux model is an improvement on predicting constant # distribution @@ -76,7 +76,7 @@ losses = [] first_last_training_loss = MLJBase.report(mach)[1][[1, end]] push!(losses, first_last_training_loss[2]) yhat = MLJBase.predict(mach, rows=test); - @test mean(StatisticalMeasures.cross_entropy(yhat, y[test])) < 0.95*loss_baseline + @test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95*loss_baseline optimisertest(MLJFlux.NeuralNetworkClassifier, X, @@ -91,4 +91,91 @@ end reference = losses[1] @test all(x->abs(x - reference)/reference < 1e-5, losses[2:end]) + +# # NEURAL NETWORK BINARY CLASSIFIER + +@testset "NeuralNetworkBinaryClassifier constructor" begin + model = NeuralNetworkBinaryClassifier() + @test model.loss == Flux.binarycrossentropy + @test model.builder isa MLJFlux.Short + @test model.finaliser == Flux.σ +end + +seed!(1234) +N = 300 +X = MLJBase.table(rand(Float32, N, 4)); +ycont = 2*X.x1 - X.x3 + 0.1*rand(N) +m, M = minimum(ycont), maximum(ycont) +_, a, _ = range(m, stop=M, length=3) |> collect +y = map(ycont) do η + if η < 0.9*a + 'a' + else + 'b' + end +end |> categorical; + +builder = MLJFlux.MLP(hidden=(8,)) +optimiser = Optimisers.Adam(0.03) + +@testset_accelerated "NeuralNetworkBinaryClassifier" accel begin + + # Table input: + @testset "Table input" begin + basictest( + MLJFlux.NeuralNetworkBinaryClassifier, + X, + y, + builder, + optimiser, + 0.85, + accel, + ) + end + + # Matrix input: + @testset "Matrix input" begin + basictest( + MLJFlux.NeuralNetworkBinaryClassifier, + matrix(X), + y, + builder, + optimiser, + 0.85, + accel, + ) + end + + train, test = MLJBase.partition(1:N, 0.7) + + # baseline loss (predict constant probability distribution): + dict = StatsBase.countmap(y[train]) + prob_given_class = Dict{CategoricalArrays.CategoricalValue,Float64}() + for (k, v) in dict + prob_given_class[k] = dict[k]/length(train) + end + dist = MLJBase.UnivariateFinite(prob_given_class) + loss_baseline = + StatisticalMeasures.cross_entropy(fill(dist, length(test)), y[test]) + + # check flux model is an improvement on predicting constant + # distribution + # (GPUs only support `default_rng`): + rng = Random.default_rng() + seed!(rng, 123) + model = MLJFlux.NeuralNetworkBinaryClassifier( + epochs=50, + builder=builder, + optimiser=optimiser, + acceleration=accel, + batch_size=10, + rng=rng, + ) + @time mach = fit!(machine(model, X, y), rows=train, verbosity=0) + first_last_training_loss = MLJBase.report(mach)[1][[1, end]] + yhat = MLJBase.predict(mach, rows=test); + @test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95*loss_baseline + +end + true From 7eae840ad0eebb1116a5f62517a51826df30ff7e Mon Sep 17 00:00:00 2001 From: "Anthony D. Blaom" Date: Tue, 11 Jun 2024 10:34:12 +1200 Subject: [PATCH 4/4] tweak NNBinaryClassifier to address breakages of #251 --- src/types.jl | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/src/types.jl b/src/types.jl index 89744c24..e20d152b 100644 --- a/src/types.jl +++ b/src/types.jl @@ -295,7 +295,7 @@ fit!(mach, verbosity=2) # trains 5 more epochs We can inspect the mean training loss using the `cross_entropy` function: ```julia -training_loss = cross_entropy(predict(mach, X), y) |> mean +training_loss = cross_entropy(predict(mach, X), y) ``` And we can access the Flux chain (model) using `fitted_params`: @@ -451,6 +451,7 @@ examples in the MLJFlux.jl documentation. ```julia using MLJ, Flux +import Optimisers import RDatasets ``` @@ -458,7 +459,12 @@ First, we can load the data: ```julia mtcars = RDatasets.dataset("datasets", "mtcars"); -y, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec])); # a vector and a table +y, X = unpack(mtcars, ==(:VS), in([:MPG, :Cyl, :Disp, :HP, :WT, :QSec])); +``` + +Note that `y` is a vector and `X` a table. + +```julia y = categorical(y) # classifier takes catogorical input X_f32 = Float32.(X) # To match floating point type of the neural network layers NeuralNetworkBinaryClassifier = @load NeuralNetworkBinaryClassifier pkg=MLJFlux @@ -476,8 +482,13 @@ We can train the model in an incremental fashion, altering the learning rate as provided `optimizer_changes_trigger_retraining` is `false` (the default). Here, we also change the number of (total) iterations: +```julia-repl +julia> bclf.optimiser +Adam(0.001, (0.9, 0.999), 1.0e-8) +``` + ```julia -bclf.optimiser.eta = bclf.optimiser.eta * 2 +bclf.optimiser = Optimisers.Adam(eta = bclf.optimiser.eta * 2) bclf.epochs = bclf.epochs + 5 fit!(mach, verbosity=2) # trains 5 more epochs @@ -486,7 +497,7 @@ fit!(mach, verbosity=2) # trains 5 more epochs We can inspect the mean training loss using the `cross_entropy` function: ```julia -training_loss = cross_entropy(predict(mach, X_f32), y) |> mean +training_loss = cross_entropy(predict(mach, X_f32), y) ``` And we can access the Flux chain (model) using `fitted_params`: @@ -500,16 +511,22 @@ Finally, we can see how the out-of-sample performance changes over time, using M ```julia r = range(bclf, :epochs, lower=1, upper=200, scale=:log10) -curve = learning_curve(bclf, X_f32, y, - range=r, - resampling=Holdout(fraction_train=0.7), - measure=cross_entropy) +curve = learning_curve( + bclf, + X_f32, + y, + range=r, + resampling=Holdout(fraction_train=0.7), + measure=cross_entropy, +) using Plots -plot(curve.parameter_values, - curve.measurements, - xlab=curve.parameter_name, - xscale=curve.parameter_scale, - ylab = "Cross Entropy") +plot( + curve.parameter_values, + curve.measurements, + xlab=curve.parameter_name, + xscale=curve.parameter_scale, + ylab = "Cross Entropy", +) ``` @@ -745,7 +762,7 @@ measure (loss/score): ```julia predicted_labels = predict(mach, rows=501:1000); -cross_entropy(predicted_labels, labels[501:1000]) |> mean +cross_entropy(predicted_labels, labels[501:1000]) ``` The preceding `fit!`/`predict`/evaluate workflow can be alternatively executed as follows: @@ -975,7 +992,7 @@ evaluate!(mach, resampling=CV(nfolds=5), measure=l2) # loss for `(Xtest, test)`: fit!(mach) # train on `(X, y)` yhat = predict(mach, Xtest) -l2(yhat, ytest) |> mean +l2(yhat, ytest) ``` These losses, for the pipeline model, refer to the target on the original, unstandardized, @@ -1168,7 +1185,7 @@ all data bound to `mach`) and compare this with performance on the test set: ```julia # custom MLJ loss: -multi_loss(yhat, y) = l2(MLJ.matrix(yhat), MLJ.matrix(y)) |> mean +multi_loss(yhat, y) = l2(MLJ.matrix(yhat), MLJ.matrix(y)) # CV estimate, based on `(X, y)`: evaluate!(mach, resampling=CV(nfolds=5), measure=multi_loss)