Skip to content

Commit

Permalink
🌟 Introduce integration tests
Browse files Browse the repository at this point in the history
  • Loading branch information
EssamWisam committed Sep 1, 2024
1 parent f60f789 commit 5fd85a3
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 65 deletions.
124 changes: 73 additions & 51 deletions test/classifier.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,26 @@

seed!(1234)
N = 300
X = MLJBase.table(rand(Float32, N, 4));
ycont = 2*X.x1 - X.x3 + 0.1*rand(N)
Xm = MLJBase.table(randn(Float32, N, 5)); # purely numeric
X = (; Tables.columntable(Xm)...,
Column1 = repeat([1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)),
Column2 = categorical(repeat(['a', 'b', 'c', 'd', 'e'], Int(N / 5))),
Column3 = categorical(repeat(["b", "c", "d", "f", "f"], Int(N / 5)), ordered = true),
Column4 = repeat([1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)),
Column5 = randn(N),
Column6 = categorical(
repeat(["group1", "group1", "group2", "group2", "group3"], Int(N / 5)),
),
)


ycont = 2 * X.x1 - X.x3 + 0.1 * rand(N)
m, M = minimum(ycont), maximum(ycont)
_, a, b, _ = range(m, stop=M, length=4) |> collect
_, a, b, _ = range(m, stop = M, length = 4) |> collect
y = map(ycont) do η
if η < 0.9*a
if η < 0.9 * a
'a'
elseif η < 1.1*b
elseif η < 1.1 * b
'b'
else
'c'
Expand All @@ -20,7 +32,7 @@ end |> categorical;
# builer instead of the default `Short()` because `Dropout()` in `Short()` does not appear
# to behave the same on GPU as on a CPU, even when we use `default_rng()` for both.

builder = MLJFlux.MLP(hidden=(8,))
builder = MLJFlux.MLP(hidden = (8,))
optimiser = Optimisers.Adam(0.03)

losses = []
Expand All @@ -30,32 +42,42 @@ losses = []
# Table input:
@testset "Table input" begin
basictest(MLJFlux.NeuralNetworkClassifier,
X,
y,
builder,
optimiser,
0.85,
accel)
X,
y,
builder,
optimiser,
0.85,
accel)
end

@testset "Table input numerical" begin
basictest(MLJFlux.NeuralNetworkClassifier,
Xm,
y,
builder,
optimiser,
0.85,
accel)
end

# Matrix input:
@testset "Matrix input" begin
basictest(MLJFlux.NeuralNetworkClassifier,
matrix(X),
y,
builder,
optimiser,
0.85,
accel)
matrix(Xm),
y,
builder,
optimiser,
0.85,
accel)
end

train, test = MLJBase.partition(1:N, 0.7)

# baseline loss (predict constant probability distribution):
dict = StatsBase.countmap(y[train])
prob_given_class = Dict{CategoricalArrays.CategoricalValue,Float64}()
prob_given_class = Dict{CategoricalArrays.CategoricalValue, Float64}()
for (k, v) in dict
prob_given_class[k] = dict[k]/length(train)
prob_given_class[k] = dict[k] / length(train)
end
dist = MLJBase.UnivariateFinite(prob_given_class)
loss_baseline =
Expand All @@ -66,36 +88,36 @@ losses = []
# (GPUs only support `default_rng`):
rng = Random.default_rng()
seed!(rng, 123)
model = MLJFlux.NeuralNetworkClassifier(epochs=50,
builder=builder,
optimiser=optimiser,
acceleration=accel,
batch_size=10,
rng=rng)
@time mach = fit!(machine(model, X, y), rows=train, verbosity=0)
model = MLJFlux.NeuralNetworkClassifier(epochs = 50,
builder = builder,
optimiser = optimiser,
acceleration = accel,
batch_size = 10,
rng = rng)
@time mach = fit!(machine(model, X, y), rows = train, verbosity = 0)
first_last_training_loss = MLJBase.report(mach)[1][[1, end]]
push!(losses, first_last_training_loss[2])
yhat = MLJBase.predict(mach, rows=test);
@test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95*loss_baseline
yhat = MLJBase.predict(mach, rows = test)
@test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95 * loss_baseline

optimisertest(MLJFlux.NeuralNetworkClassifier,
X,
y,
builder,
optimiser,
accel)
X,
y,
builder,
optimiser,
accel)

end

# check different resources (CPU1, CUDALibs, etc)) give about the same loss:
reference = losses[1]
@test all(x->abs(x - reference)/reference < 1e-5, losses[2:end])
@test all(x -> abs(x - reference) / reference < 1e-5, losses[2:end])


# # NEURAL NETWORK BINARY CLASSIFIER

@testset "NeuralNetworkBinaryClassifier constructor" begin
model = NeuralNetworkBinaryClassifier()
model = MLJFlux.NeuralNetworkBinaryClassifier()
@test model.loss == Flux.binarycrossentropy
@test model.builder isa MLJFlux.Short
@test model.finaliser == Flux.σ
Expand All @@ -104,18 +126,18 @@ end
seed!(1234)
N = 300
X = MLJBase.table(rand(Float32, N, 4));
ycont = 2*X.x1 - X.x3 + 0.1*rand(N)
ycont = 2 * X.x1 - X.x3 + 0.1 * rand(N)
m, M = minimum(ycont), maximum(ycont)
_, a, _ = range(m, stop=M, length=3) |> collect
_, a, _ = range(m, stop = M, length = 3) |> collect
y = map(ycont) do η
if η < 0.9*a
if η < 0.9 * a
'a'
else
'b'
end
end |> categorical;

builder = MLJFlux.MLP(hidden=(8,))
builder = MLJFlux.MLP(hidden = (8,))
optimiser = Optimisers.Adam(0.03)

@testset_accelerated "NeuralNetworkBinaryClassifier" accel begin
Expand Down Expand Up @@ -150,9 +172,9 @@ optimiser = Optimisers.Adam(0.03)

# baseline loss (predict constant probability distribution):
dict = StatsBase.countmap(y[train])
prob_given_class = Dict{CategoricalArrays.CategoricalValue,Float64}()
prob_given_class = Dict{CategoricalArrays.CategoricalValue, Float64}()
for (k, v) in dict
prob_given_class[k] = dict[k]/length(train)
prob_given_class[k] = dict[k] / length(train)
end
dist = MLJBase.UnivariateFinite(prob_given_class)
loss_baseline =
Expand All @@ -164,17 +186,17 @@ optimiser = Optimisers.Adam(0.03)
rng = Random.default_rng()
seed!(rng, 123)
model = MLJFlux.NeuralNetworkBinaryClassifier(
epochs=50,
builder=builder,
optimiser=optimiser,
acceleration=accel,
batch_size=10,
rng=rng,
epochs = 50,
builder = builder,
optimiser = optimiser,
acceleration = accel,
batch_size = 10,
rng = rng,
)
@time mach = fit!(machine(model, X, y), rows=train, verbosity=0)
@time mach = fit!(machine(model, X, y), rows = train, verbosity = 0)
first_last_training_loss = MLJBase.report(mach)[1][[1, end]]
yhat = MLJBase.predict(mach, rows=test);
@test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95*loss_baseline
yhat = MLJBase.predict(mach, rows = test)
@test StatisticalMeasures.cross_entropy(yhat, y[test]) < 0.95 * loss_baseline

end

Expand Down
51 changes: 37 additions & 14 deletions test/regressor.jl
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
Random.seed!(123)

N = 200
X = MLJBase.table(randn(Float32, N, 5));
Xm = MLJBase.table(randn(Float32, N, 5)); # purely numeric
X = (; Tables.columntable(Xm)...,
Column1 = repeat([1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)),
Column2 = categorical(repeat(['a', 'b', 'c', 'd', 'e'], Int(N / 5))),
Column3 = categorical(repeat(["b", "c", "d", "f", "f"], Int(N / 5)), ordered = true),
Column4 = repeat([1.0, 2.0, 3.0, 4.0, 5.0], Int(N / 5)),
Column5 = randn(N),
Column6 = categorical(
repeat(["group1", "group1", "group2", "group2", "group3"], Int(N / 5)),
),
)

builder = MLJFlux.Short=identity)
builder = MLJFlux.Short = identity)
optimiser = Optimisers.Adam()

Random.seed!(123)
Expand All @@ -25,11 +35,12 @@ train, test = MLJBase.partition(1:N, 0.7)
)
end


# Matrix input:
@testset "Matrix input" begin
@test basictest(
MLJFlux.NeuralNetworkRegressor,
matrix(X),
matrix(Xm),
y,
builder,
optimiser,
Expand All @@ -42,16 +53,16 @@ train, test = MLJBase.partition(1:N, 0.7)
# (GPUs only support `default_rng` when there's `Dropout`):
rng = Random.default_rng()
seed!(rng, 123)
model = MLJFlux.NeuralNetworkRegressor(builder=builder,
acceleration=accel,
rng=rng)
model = MLJFlux.NeuralNetworkRegressor(builder = builder,
acceleration = accel,
rng = rng)
@time fitresult, _, rpt =
fit(model, 0, MLJBase.selectrows(X, train), y[train])
first_last_training_loss = rpt[1][[1, end]]
# @show first_last_training_loss
# @show first_last_training_loss
yhat = predict(model, fitresult, selectrows(X, test))
truth = y[test]
goal = 0.9*model.loss(truth .- mean(truth), 0)
goal = 0.9 * model.loss(truth .- mean(truth), 0)
@test model.loss(yhat, truth) < goal
end

Expand All @@ -73,11 +84,23 @@ y = MLJBase.table(ymatrix);
accel,
)
end

@testset "Table input numerical" begin
@test basictest(
MLJFlux.MultitargetNeuralNetworkRegressor,
Xm,
y,
builder,
optimiser,
1.0,
accel,
)
end
# Matrix input:
@testset "Matrix input" begin
@test basictest(
MLJFlux.MultitargetNeuralNetworkRegressor,
matrix(X),
matrix(Xm),
ymatrix,
builder,
optimiser,
Expand All @@ -91,16 +114,16 @@ y = MLJBase.table(ymatrix);
rng = Random.default_rng()
seed!(rng, 123)
model = MLJFlux.MultitargetNeuralNetworkRegressor(
acceleration=accel,
builder=builder,
rng=rng,
acceleration = accel,
builder = builder,
rng = rng,
)
@time fitresult, _, rpt =
fit(model, 0, MLJBase.selectrows(X, train), selectrows(y, train))
first_last_training_loss = rpt[1][[1, end]]
yhat = predict(model, fitresult, selectrows(X, test))
truth = ymatrix[test,:]
goal = 0.85*model.loss(truth .- mean(truth), 0)
truth = ymatrix[test, :]
goal = 0.85 * model.loss(truth .- mean(truth), 0)
@test model.loss(Tables.matrix(yhat), truth) < goal
end

Expand Down

0 comments on commit 5fd85a3

Please sign in to comment.