diff --git a/test/classifier.jl b/test/classifier.jl index ce167b3..4201d6c 100644 --- a/test/classifier.jl +++ b/test/classifier.jl @@ -2,14 +2,26 @@ seed!(1234) N = 300 -X = MLJBase.table(rand(Float32, N, 4)); -ycont = 2*X.x1 - X.x3 + 0.1*rand(N) +Xm = MLJBase.table(randn(Float32, N, 5)); # purely numeric +X = (; Tables.columntable(Xm)..., + Column1 = repeat([1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)), + Column2 = categorical(repeat(['a', 'b', 'c', 'd', 'e'], Int(N / 5))), + Column3 = categorical(repeat(["b", "c", "d", "f", "f"], Int(N / 5)), ordered = true), + Column4 = repeat([1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)), + Column5 = randn(N), + Column6 = categorical( + repeat(["group1", "group1", "group2", "group2", "group3"], Int(N / 5)), + ), +) + + +ycont = 2 * X.x1 - X.x3 + 0.1 * rand(N) m, M = minimum(ycont), maximum(ycont) -_, a, b, _ = range(m, stop=M, length=4) |> collect +_, a, b, _ = range(m, stop = M, length = 4) |> collect y = map(ycont) do η - if η < 0.9*a + if η < 0.9 * a 'a' - elseif η < 1.1*b + elseif η < 1.1 * b 'b' else 'c' @@ -20,7 +32,7 @@ end |> categorical; # builer instead of the default `Short()` because `Dropout()` in `Short()` does not appear # to behave the same on GPU as on a CPU, even when we use `default_rng()` for both. -builder = MLJFlux.MLP(hidden=(8,)) +builder = MLJFlux.MLP(hidden = (8,)) optimiser = Optimisers.Adam(0.03) losses = [] @@ -30,32 +42,42 @@ losses = [] # Table input: @testset "Table input" begin basictest(MLJFlux.NeuralNetworkClassifier, - X, - y, - builder, - optimiser, - 0.85, - accel) + X, + y, + builder, + optimiser, + 0.85, + accel) + end + + @testset "Table input numerical" begin + basictest(MLJFlux.NeuralNetworkClassifier, + Xm, + y, + builder, + optimiser, + 0.85, + accel) end # Matrix input: @testset "Matrix input" begin basictest(MLJFlux.NeuralNetworkClassifier, - matrix(X), - y, - builder, - optimiser, - 0.85, - accel) + matrix(Xm), + y, + builder, + optimiser, + 0.85, + accel) end train, test = MLJBase.partition(1:N, 0.7) # baseline loss (predict constant probability distribution): dict = StatsBase.countmap(y[train]) - prob_given_class = Dict{CategoricalArrays.CategoricalValue,Float64}() + prob_given_class = Dict{CategoricalArrays.CategoricalValue, Float64}() for (k, v) in dict - prob_given_class[k] = dict[k]/length(train) + prob_given_class[k] = dict[k] / length(train) end dist = MLJBase.UnivariateFinite(prob_given_class) loss_baseline = @@ -66,36 +88,36 @@ losses = [] # (GPUs only support `default_rng`): rng = Random.default_rng() seed!(rng, 123) - model = MLJFlux.NeuralNetworkClassifier(epochs=50, - builder=builder, - optimiser=optimiser, - acceleration=accel, - batch_size=10, - rng=rng) - @time mach = fit!(machine(model, X, y), rows=train, verbosity=0) + model = MLJFlux.NeuralNetworkClassifier(epochs = 50, + builder = builder, + optimiser = optimiser, + acceleration = accel, + batch_size = 10, + rng = rng) + @time mach = fit!(machine(model, X, y), rows = train, verbosity = 0) first_last_training_loss = MLJBase.report(mach)[1][[1, end]] push!(losses, first_last_training_loss[2]) - yhat = MLJBase.predict(mach, rows=test); - @test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95*loss_baseline + yhat = MLJBase.predict(mach, rows = test) + @test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95 * loss_baseline optimisertest(MLJFlux.NeuralNetworkClassifier, - X, - y, - builder, - optimiser, - accel) + X, + y, + builder, + optimiser, + accel) end # check different resources (CPU1, CUDALibs, etc)) give about the same loss: reference = losses[1] -@test all(x->abs(x - reference)/reference < 1e-5, losses[2:end]) +@test all(x -> abs(x - reference) / reference < 1e-5, losses[2:end]) # # NEURAL NETWORK BINARY CLASSIFIER @testset "NeuralNetworkBinaryClassifier constructor" begin - model = NeuralNetworkBinaryClassifier() + model = MLJFlux.NeuralNetworkBinaryClassifier() @test model.loss == Flux.binarycrossentropy @test model.builder isa MLJFlux.Short @test model.finaliser == Flux.σ @@ -104,18 +126,18 @@ end seed!(1234) N = 300 X = MLJBase.table(rand(Float32, N, 4)); -ycont = 2*X.x1 - X.x3 + 0.1*rand(N) +ycont = 2 * X.x1 - X.x3 + 0.1 * rand(N) m, M = minimum(ycont), maximum(ycont) -_, a, _ = range(m, stop=M, length=3) |> collect +_, a, _ = range(m, stop = M, length = 3) |> collect y = map(ycont) do η - if η < 0.9*a + if η < 0.9 * a 'a' else 'b' end end |> categorical; -builder = MLJFlux.MLP(hidden=(8,)) +builder = MLJFlux.MLP(hidden = (8,)) optimiser = Optimisers.Adam(0.03) @testset_accelerated "NeuralNetworkBinaryClassifier" accel begin @@ -150,9 +172,9 @@ optimiser = Optimisers.Adam(0.03) # baseline loss (predict constant probability distribution): dict = StatsBase.countmap(y[train]) - prob_given_class = Dict{CategoricalArrays.CategoricalValue,Float64}() + prob_given_class = Dict{CategoricalArrays.CategoricalValue, Float64}() for (k, v) in dict - prob_given_class[k] = dict[k]/length(train) + prob_given_class[k] = dict[k] / length(train) end dist = MLJBase.UnivariateFinite(prob_given_class) loss_baseline = @@ -164,17 +186,17 @@ optimiser = Optimisers.Adam(0.03) rng = Random.default_rng() seed!(rng, 123) model = MLJFlux.NeuralNetworkBinaryClassifier( - epochs=50, - builder=builder, - optimiser=optimiser, - acceleration=accel, - batch_size=10, - rng=rng, + epochs = 50, + builder = builder, + optimiser = optimiser, + acceleration = accel, + batch_size = 10, + rng = rng, ) - @time mach = fit!(machine(model, X, y), rows=train, verbosity=0) + @time mach = fit!(machine(model, X, y), rows = train, verbosity = 0) first_last_training_loss = MLJBase.report(mach)[1][[1, end]] - yhat = MLJBase.predict(mach, rows=test); - @test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95*loss_baseline + yhat = MLJBase.predict(mach, rows = test) + @test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95 * loss_baseline end diff --git a/test/regressor.jl b/test/regressor.jl index d17a376..74c33a8 100644 --- a/test/regressor.jl +++ b/test/regressor.jl @@ -1,9 +1,19 @@ Random.seed!(123) N = 200 -X = MLJBase.table(randn(Float32, N, 5)); +Xm = MLJBase.table(randn(Float32, N, 5)); # purely numeric +X = (; Tables.columntable(Xm)..., + Column1 = repeat([1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)), + Column2 = categorical(repeat(['a', 'b', 'c', 'd', 'e'], Int(N / 5))), + Column3 = categorical(repeat(["b", "c", "d", "f", "f"], Int(N / 5)), ordered = true), + Column4 = repeat([1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)), + Column5 = randn(N), + Column6 = categorical( + repeat(["group1", "group1", "group2", "group2", "group3"], Int(N / 5)), + ), +) -builder = MLJFlux.Short(σ=identity) +builder = MLJFlux.Short(σ = identity) optimiser = Optimisers.Adam() Random.seed!(123) @@ -25,11 +35,12 @@ train, test = MLJBase.partition(1:N, 0.7) ) end + # Matrix input: @testset "Matrix input" begin @test basictest( MLJFlux.NeuralNetworkRegressor, - matrix(X), + matrix(Xm), y, builder, optimiser, @@ -42,16 +53,16 @@ train, test = MLJBase.partition(1:N, 0.7) # (GPUs only support `default_rng` when there's `Dropout`): rng = Random.default_rng() seed!(rng, 123) - model = MLJFlux.NeuralNetworkRegressor(builder=builder, - acceleration=accel, - rng=rng) + model = MLJFlux.NeuralNetworkRegressor(builder = builder, + acceleration = accel, + rng = rng) @time fitresult, _, rpt = fit(model, 0, MLJBase.selectrows(X, train), y[train]) first_last_training_loss = rpt[1][[1, end]] -# @show first_last_training_loss + # @show first_last_training_loss yhat = predict(model, fitresult, selectrows(X, test)) truth = y[test] - goal = 0.9*model.loss(truth .- mean(truth), 0) + goal = 0.9 * model.loss(truth .- mean(truth), 0) @test model.loss(yhat, truth) < goal end @@ -73,11 +84,23 @@ y = MLJBase.table(ymatrix); accel, ) end + + @testset "Table input numerical" begin + @test basictest( + MLJFlux.MultitargetNeuralNetworkRegressor, + Xm, + y, + builder, + optimiser, + 1.0, + accel, + ) + end # Matrix input: @testset "Matrix input" begin @test basictest( MLJFlux.MultitargetNeuralNetworkRegressor, - matrix(X), + matrix(Xm), ymatrix, builder, optimiser, @@ -91,16 +114,16 @@ y = MLJBase.table(ymatrix); rng = Random.default_rng() seed!(rng, 123) model = MLJFlux.MultitargetNeuralNetworkRegressor( - acceleration=accel, - builder=builder, - rng=rng, + acceleration = accel, + builder = builder, + rng = rng, ) @time fitresult, _, rpt = fit(model, 0, MLJBase.selectrows(X, train), selectrows(y, train)) first_last_training_loss = rpt[1][[1, end]] yhat = predict(model, fitresult, selectrows(X, test)) - truth = ymatrix[test,:] - goal = 0.85*model.loss(truth .- mean(truth), 0) + truth = ymatrix[test, :] + goal = 0.85 * model.loss(truth .- mean(truth), 0) @test model.loss(Tables.matrix(yhat), truth) < goal end