Skip to content

Commit

Permalink
Merge pull request #8 from JuliaAI/mljbase-compat-bump
Browse files Browse the repository at this point in the history
Bump [compat] for MLJBase="1"
  • Loading branch information
ablaom authored Oct 6, 2023
2 parents 6ef3526 + c5b17d6 commit e6b1aaa
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.8'
- '1.7'
- '1'

os: [ubuntu-latest, windows-latest, macOS-latest]
Expand Down
6 changes: 3 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MLJBalancing"
uuid = "45f359ea-796d-4f51-95a5-deb1a414c586"
authors = ["Essam Wisam <[email protected]>", "Anthony Blaom <[email protected]> and contributors"]
version = "0.1.0"
version = "0.1.1"

[deps]
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
Expand All @@ -12,12 +12,12 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

[compat]
MLJBase = "0.21"
MLJBase = "1"
OrderedCollections = "1.6"
julia = "1.6"
MLJModelInterface = "1.9"
MLUtils = "0.4"
StatsBase = "0.34"
julia = "1.7"

[extras]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Expand Down
40 changes: 21 additions & 19 deletions src/balanced_bagging.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

"""
Return a dictionary `result` mapping each unique value in a given abstract vector `y`
to the vector of indices where that value occurs.
Expand All @@ -16,18 +15,21 @@ function group_inds(y::AbstractVector{T}) where {T}
return freeze(result)
end

const ERR_MULTICLASS_UNSUPP(num_classes) =
"Only binary classification supported by BalancedBaggingClassifier. Got $num_classes classes"
const ERR_MULTICLASS_UNSUPP(num_classes) = ArgumentError(
"Only binary classification supported by BalancedBaggingClassifier. "*
"Got $num_classes classes"
)

"""
Given an abstract vector `y` where any element takes one of two values, return the indices of the
most frequent of them, the indices of the least frequent of them, and the counts of each.
Given an abstract vector `y` where any element takes one of two values, return the
indices of the most frequent of them, the indices of the least frequent of them, and the
counts of each.
"""
function get_majority_minority_inds_counts(y)
# a tuple mapping each class to its indices
labels_inds = collect(group_inds(y))
num_classes = length(labels_inds)
num_classes == 2 || throw(ArgumentError(ERR_MULTICLASS_UNSUPP(num_classes)))
num_classes == 2 || throw(ERR_MULTICLASS_UNSUPP(num_classes))
# get the length of each class
first_class_count = length(labels_inds[1][2])
second_class_count = length(labels_inds[2][2])
Expand All @@ -42,9 +44,9 @@ function get_majority_minority_inds_counts(y)
end

"""
Given data `X`, `y` where `X` is a table and `y` is an abstract vector (which may be wrapped in nodes),
Given data `X`, `y` where `X` is a table and `y` is an abstract vector (which may be wrapped in nodes),
the indices and counts of the majority and minority classes and abstract rng,
return `X_sub`, `y_sub`, in the form of nodes, which are the result of randomly undersampling
return `X_sub`, `y_sub`, in the form of nodes, which are the result of randomly undersampling
the majority class data in `X`, `y` so that both classes occur equally frequently.
"""
function get_some_balanced_subset(
Expand Down Expand Up @@ -89,8 +91,8 @@ function BalancedBaggingClassifier(;
rng = Random.default_rng(),
)
model === nothing && error(ERR_MISSING_CLF)
T < 0 && error(ERR_BAD_T)
rng = rng_handler(rng)
T < 0 && error(ERR_BAD_T)
rng = rng_handler(rng)
return BalancedBaggingClassifier(model, T, rng)
end

Expand Down Expand Up @@ -178,8 +180,8 @@ Construct an instance with default hyper-parameters using the syntax `bagging_mo
Given a probablistic classifier.`BalancedBaggingClassifier` performs bagging by undersampling
only majority data in each bag so that its includes as much samples as in the minority data.
This is proposed with an Adaboost classifier where the output scores are averaged in the paper
Xu-Ying Liu, Jianxin Wu, & Zhi-Hua Zhou. (2009). Exploratory Undersampling for Class-Imbalance Learning.
IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), 39 (2), 539–5501
Xu-Ying Liu, Jianxin Wu, & Zhi-Hua Zhou. (2009). Exploratory Undersampling for Class-Imbalance Learning.
IEEE Transactions on Systems, Man, and Cybernetics, Part B (Cybernetics), 39 (2), 539–5501
# Training data
Expand All @@ -206,7 +208,7 @@ Train the machine with `fit!(mach, rows=...)`.
- `T::Integer=0`: The number of bags to be used in the ensemble. If not given, will be set as
the ratio between the frequency of the majority and minority classes. Can be later found in `report(mach)`.
- `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer`
- `rng::Union{AbstractRNG, Integer}=default_rng()`: Either an `AbstractRNG` object or an `Integer`
seed to be used with `Xoshiro`
# Operations
Expand Down Expand Up @@ -234,13 +236,13 @@ logistic_model = LogisticClassifier()
model = BalancedBaggingClassifier(model=logistic_model, T=5)
# Load the data and train the BalancedBaggingClassifier
X, y = Imbalance.generate_imbalanced_data(100, 5; num_vals_per_category = [3, 2],
class_probs = [0.9, 0.1],
type = "ColTable",
X, y = Imbalance.generate_imbalanced_data(100, 5; num_vals_per_category = [3, 2],
class_probs = [0.9, 0.1],
type = "ColTable",
rng=42)
julia> Imbalance.checkbalance(y)
1: ▇▇▇▇▇▇▇▇▇▇ 16 (19.0%)
0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 84 (100.0%)
1: ▇▇▇▇▇▇▇▇▇▇ 16 (19.0%)
0: ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 84 (100.0%)
mach = machine(model, X, y) |> fit!
Expand All @@ -250,4 +252,4 @@ yhat = predict(mach, X) # probabilistic predictions
predict_mode(mach, X) # point predictions
```
"""
BalancedBaggingClassifier
BalancedBaggingClassifier
6 changes: 3 additions & 3 deletions src/balanced_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ for model_type in SUPPORTED_MODEL_TYPES
eval(ex)
end

const ERR_NO_PROP = "trying to access property $name which does not exist"
const ERR_NO_PROP = ArgumentError("trying to access property $name which does not exist")
# overload set property to set the property from the vector in the struct
for model_type in SUPPORTED_MODEL_TYPES
struct_name = MODELTYPE_TO_COMPOSITETYPE[model_type]
Expand All @@ -128,7 +128,7 @@ for model_type in SUPPORTED_MODEL_TYPES
!isnothing(idx) && return getfield(b, :balancers)[idx] = val
# the other only option is model
name === :model && return setfield(b, :model, val)
error(ERR_NO_PROP)
throw(ERR_NO_PROP)
end
end
eval(ex)
Expand Down Expand Up @@ -198,4 +198,4 @@ for composite_type in COMPOSITE_TYPES
MMI.$trait(::Type{<:$composite_type{balancernames, M}}) where {balancernames, M} = MMI.$trait(M)
end |> eval
end
end
end
5 changes: 3 additions & 2 deletions test/balanced_bagging.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
@test MLJBalancing.get_majority_minority_inds_counts(y) ==
([1, 2, 3, 4, 8], [5, 6, 7], 5, 3)
y = [0, 0, 0, 0, 1, 1, 1, 0, 2, 2, 2]
@test_throws MLJBalancing.ERR_MULTICLASS_UNSUPP(3) MLJBalancing.get_majority_minority_inds_counts(
y,
@test_throws(
MLJBalancing.ERR_MULTICLASS_UNSUPP(3),
MLJBalancing.get_majority_minority_inds_counts(y),
)
end

Expand Down
14 changes: 8 additions & 6 deletions test/balanced_model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@
@test_throws MLJBalancing.ERR_MODEL_UNSPECIFIED begin
BalancedModel(b1 = balancer1, b2 = balancer2, b3 = balancer3)
end
@test_throws "ArgumentError: Only these model supertypes support wrapping: `Probabilistic`, `Deterministic`, and `Interval`.\nModel provided has type `Int64`." begin
BalancedModel(model = 1, b1 = balancer1, b2 = balancer2, b3 = balancer3)
end
@test_throws(
MLJBalancing.ERR_UNSUPPORTED_MODEL(1),
BalancedModel(model = 1, b1 = balancer1, b2 = balancer2, b3 = balancer3),
)
@test_logs (:warn, MLJBalancing.WRN_BALANCER_UNSPECIFIED) begin
BalancedModel(model = model_prob)
end
Expand Down Expand Up @@ -80,7 +81,8 @@
Base.getproperty(balanced_model, :b1) == balancer1
Base.setproperty!(balanced_model, :b1, balancer2)
Base.getproperty(balanced_model, :b1) == balancer2
@test_throws MLJBalancing.ERR_NO_PROP begin
Base.setproperty!(balanced_model, :name11, balancer2)
end
@test_throws(
MLJBalancing.ERR_NO_PROP,
Base.setproperty!(balanced_model, :name11, balancer2),
)
end

0 comments on commit e6b1aaa

Please sign in to comment.