From 917318c40b1e122def483d9f8e8e8f22f40cbac5 Mon Sep 17 00:00:00 2001 From: salbert83 Date: Sat, 18 Jun 2022 07:04:20 -0400 Subject: [PATCH 1/7] Multithread forest application to a matrix --- src/classification/main.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classification/main.jl b/src/classification/main.jl index ce1ed4e1..f59f77b7 100644 --- a/src/classification/main.jl +++ b/src/classification/main.jl @@ -452,7 +452,7 @@ end function apply_forest(forest::Ensemble{S, T}, features::AbstractMatrix{S}) where {S, T} N = size(features,1) predictions = Array{T}(undef, N) - for i in 1:N + Threads.@threads for i in 1:N predictions[i] = apply_forest(forest, features[i, :]) end return predictions From f2b45296795ec75d6ca9c6ae662182727f484d7f Mon Sep 17 00:00:00 2001 From: salbert83 Date: Mon, 20 Jun 2022 21:07:23 -0400 Subject: [PATCH 2/7] Multithreaded support for tree application Perhaps it is best to let the client decide whether they wish to use multithreading? --- src/classification/main.jl | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/classification/main.jl b/src/classification/main.jl index f59f77b7..199cdc0c 100644 --- a/src/classification/main.jl +++ b/src/classification/main.jl @@ -449,11 +449,18 @@ function apply_forest(forest::Ensemble{S, T}, features::AbstractVector{S}) where end end -function apply_forest(forest::Ensemble{S, T}, features::AbstractMatrix{S}) where {S, T} +function apply_forest(forest::Ensemble{S, T}, features::AbstractMatrix{S} + ; use_multithreading = false) where {S, T} N = size(features,1) predictions = Array{T}(undef, N) - Threads.@threads for i in 1:N - predictions[i] = apply_forest(forest, features[i, :]) + if use_multithreading + Threads.@threads for i in 1:N + predictions[i] = apply_forest(forest, features[i, :]) + end + else + for i in 1:N + predictions[i] = apply_forest(forest, features[i, :]) + end end return predictions end From 57be976a9a2637140f2647600fe1c7a0fbfa603f Mon Sep 17 00:00:00 2001 From: salbert83 Date: Tue, 21 Jun 2022 06:45:19 -0400 Subject: [PATCH 3/7] Style change --- src/classification/main.jl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/classification/main.jl b/src/classification/main.jl index 199cdc0c..20fb59e5 100644 --- a/src/classification/main.jl +++ b/src/classification/main.jl @@ -449,8 +449,11 @@ function apply_forest(forest::Ensemble{S, T}, features::AbstractVector{S}) where end end -function apply_forest(forest::Ensemble{S, T}, features::AbstractMatrix{S} - ; use_multithreading = false) where {S, T} +function apply_forest( + forest::Ensemble{S, T}, + features::AbstractMatrix{S}; + use_multithreading = false + ) where {S, T} N = size(features,1) predictions = Array{T}(undef, N) if use_multithreading From 7b874d5e57fae04dcf444d5d76ce58385d032011 Mon Sep 17 00:00:00 2001 From: salbert83 Date: Mon, 27 Jun 2022 22:53:08 -0400 Subject: [PATCH 4/7] Update src/classification/main.jl Good idea! Co-authored-by: Okon Samuel <39421418+OkonSamuel@users.noreply.github.com> --- src/classification/main.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classification/main.jl b/src/classification/main.jl index 20fb59e5..640c170b 100644 --- a/src/classification/main.jl +++ b/src/classification/main.jl @@ -458,7 +458,7 @@ function apply_forest( predictions = Array{T}(undef, N) if use_multithreading Threads.@threads for i in 1:N - predictions[i] = apply_forest(forest, features[i, :]) + predictions[i] = apply_forest(forest, @view(features[i, :])) end else for i in 1:N From 4f1988464aceb9e2e3754d3b59b8b44f01a05d0b Mon Sep 17 00:00:00 2001 From: salbert83 Date: Mon, 27 Jun 2022 22:53:30 -0400 Subject: [PATCH 5/7] Update src/classification/main.jl Co-authored-by: Okon Samuel <39421418+OkonSamuel@users.noreply.github.com> --- src/classification/main.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/classification/main.jl b/src/classification/main.jl index 640c170b..146f3d06 100644 --- a/src/classification/main.jl +++ b/src/classification/main.jl @@ -462,7 +462,7 @@ function apply_forest( end else for i in 1:N - predictions[i] = apply_forest(forest, features[i, :]) + predictions[i] = apply_forest(forest, @view(features[i, :])) end end return predictions From f358671afa46dc3d3d234e9701c5e27749f3f64c Mon Sep 17 00:00:00 2001 From: salbert83 Date: Fri, 8 Jul 2022 20:49:39 -0400 Subject: [PATCH 6/7] Unit tests for multithreading --- test/regression/digits.jl | 4 ++++ test/regression/low_precision.jl | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/test/regression/digits.jl b/test/regression/digits.jl index fdf6105d..43c307f3 100644 --- a/test/regression/digits.jl +++ b/test/regression/digits.jl @@ -80,6 +80,10 @@ model = build_forest( preds = apply_forest(model, X) @test R2(Y, preds) > 0.8 +preds_MT = apply_forest(model, X, use_multithreading = true) +@test R2(Y, preds_MT) > 0.8 +@test sum(abs.(preds .- preds_MT)) < 1e-8 + println("\n##### 3 foldCV Regression Tree #####") n_folds = 5 r2 = nfoldCV_tree(Y, X, n_folds; rng=StableRNG(1), verbose=false); diff --git a/test/regression/low_precision.jl b/test/regression/low_precision.jl index 2a5ec11c..9b404ea0 100644 --- a/test/regression/low_precision.jl +++ b/test/regression/low_precision.jl @@ -47,6 +47,11 @@ preds = apply_forest(model, features) @test R2(labels, preds) > 0.9 @test typeof(preds) <: Vector{Float64} +preds_MT = apply_forest(model, features, use_multithreading=true) +@test R2(labels, preds_MT) > 0.9 +@test typeof(preds_MT) <: Vector{Float64} +@test sum(abs.(preds .- preds_MT)) < 1.0e-8 + println("\n##### nfoldCV Regression Tree #####") n_folds = Int32(3) pruning_purity = 1.0 @@ -102,6 +107,10 @@ model = build_forest(labels, features) preds = apply_forest(model, features) @test typeof(preds) == Vector{Float16} +preds_MT = apply_forest(model, features, use_multithreading = true) +@test typeof(preds_MT) == Vector{Float16} +@test sum(abs.(preds .- preds_MT)) < 1.0e-8 + model = build_tree(labels, features) preds = apply_tree(model, features) @test typeof(preds) == Vector{Float16} From 58110200faeae703cca4d89fcb1fb55cd6e607fc Mon Sep 17 00:00:00 2001 From: salbert83 Date: Fri, 8 Jul 2022 20:51:19 -0400 Subject: [PATCH 7/7] Unit tests for multithreading --- test/classification/adult.jl | 4 ++++ test/classification/digits.jl | 4 ++++ test/classification/heterogeneous.jl | 4 ++++ test/classification/iris.jl | 6 ++++++ test/classification/low_precision.jl | 9 +++++++++ test/classification/random.jl | 10 ++++++++++ 6 files changed, 37 insertions(+) diff --git a/test/classification/adult.jl b/test/classification/adult.jl index 00ee7878..6c61d1b3 100644 --- a/test/classification/adult.jl +++ b/test/classification/adult.jl @@ -22,6 +22,10 @@ cm = confusion_matrix(labels, preds) f1 = impurity_importance(model) p1 = permutation_importance(model, labels, features, (model, y, X)->accuracy(y, apply_forest(model, X)), rng=StableRNG(1)).mean +preds_MT = apply_forest(model, features, use_multithreading = true) +cm_MT = confusion_matrix(labels, preds_MT) +@test cm_MT.accuracy > 0.9 + n_iterations = 15 model, coeffs = build_adaboost_stumps(labels, features, n_iterations; rng=StableRNG(1)); preds = apply_adaboost_stumps(model, coeffs, features); diff --git a/test/classification/digits.jl b/test/classification/digits.jl index d8c6bc06..096f1947 100644 --- a/test/classification/digits.jl +++ b/test/classification/digits.jl @@ -86,6 +86,10 @@ preds = apply_forest(model, X) cm = confusion_matrix(Y, preds) @test cm.accuracy > 0.95 +preds_MT = apply_forest(model, X, use_multithreading = true) +cm_MT = confusion_matrix(Y, preds_MT) +@test cm_MT.accuracy > 0.95 + n_iterations = 100 model, coeffs = DecisionTree.build_adaboost_stumps( Y, X, diff --git a/test/classification/heterogeneous.jl b/test/classification/heterogeneous.jl index 004d89b2..f3afbfa9 100644 --- a/test/classification/heterogeneous.jl +++ b/test/classification/heterogeneous.jl @@ -26,6 +26,10 @@ preds = apply_forest(model, features) cm = confusion_matrix(labels, preds) @test cm.accuracy > 0.9 +preds_MT = apply_forest(model, features, use_multithreading = true) +cm_MT = confusion_matrix(labels, preds_MT) +@test cm_MT.accuracy > 0.9 + n_subfeatures = 7 model, coeffs = build_adaboost_stumps(labels, features, n_subfeatures; rng=StableRNG(1)) preds = apply_adaboost_stumps(model, coeffs, features) diff --git a/test/classification/iris.jl b/test/classification/iris.jl index a8ffb3b8..a4eb0b05 100644 --- a/test/classification/iris.jl +++ b/test/classification/iris.jl @@ -79,6 +79,12 @@ cm = confusion_matrix(labels, preds) probs = apply_forest_proba(model, features, classes) @test reshape(sum(probs, dims=2), n) ≈ ones(n) +preds_MT = apply_forest(model, features, use_multithreading = true) +cm_MT = confusion_matrix(labels, preds_MT) +@test cm_MT.accuracy > 0.95 +@test typeof(preds_MT) == Vector{String} +@test sum(preds .!= preds_MT) == 0 + # run n-fold cross validation for forests println("\n##### nfoldCV Classification Forest #####") n_subfeatures = 2 diff --git a/test/classification/low_precision.jl b/test/classification/low_precision.jl index 4bdcfbf8..4ee31de6 100644 --- a/test/classification/low_precision.jl +++ b/test/classification/low_precision.jl @@ -48,6 +48,11 @@ cm = confusion_matrix(labels, preds) @test typeof(preds) == Vector{Int32} @test cm.accuracy > 0.9 +preds_MT = apply_forest(model, features, use_multithreading = true) +cm_MT = confusion_matrix(labels, preds_MT) +@test typeof(preds_MT) == Vector{Int32} +@test cm_MT.accuracy > 0.9 + n_iterations = Int32(25) model, coeffs = build_adaboost_stumps(labels, features, n_iterations; rng=StableRNG(1)); preds = apply_adaboost_stumps(model, coeffs, features); @@ -116,6 +121,10 @@ model = build_forest(labels, features) preds = apply_forest(model, features) @test typeof(preds) == Vector{Int8} +preds_MT = apply_forest(model, features, use_multithreading = true) +@test typeof(preds_MT) == Vector{Int8} +@test sum(abs.(preds .- preds_MT)) == zero(Int8) + model = build_tree(labels, features) preds = apply_tree(model, features) @test typeof(preds) == Vector{Int8} diff --git a/test/classification/random.jl b/test/classification/random.jl index a91ebac6..8b6502ed 100644 --- a/test/classification/random.jl +++ b/test/classification/random.jl @@ -55,6 +55,12 @@ cm = confusion_matrix(labels, preds) @test cm.accuracy > 0.9 @test typeof(preds) == Vector{Int} +preds_MT = apply_forest(model, features, use_multithreading = true) +cm_MT = confusion_matrix(labels, preds_MT) +@test cm_MT.accuracy > 0.9 +@test typeof(preds_MT) == Vector{Int} +@test sum(abs.(preds .- preds_MT)) == zero(Int) + n_subfeatures = 3 n_trees = 9 partial_sampling = 0.7 @@ -77,6 +83,10 @@ cm = confusion_matrix(labels, preds) @test cm.accuracy > 0.6 @test length(model) == n_trees +preds_MT = apply_forest(model, features, use_multithreading = true) +cm_MT = confusion_matrix(labels, preds_MT) +@test cm_MT.accuracy > 0.9 + # test n_subfeatures n_subfeatures = 0 m_partial = build_forest(labels, features; rng=StableRNG(1)) # default sqrt(n_features)