Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added hdbscan and optics learners #56

Merged
merged 2 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
^LICENSE$
.ignore
.editorconfig
.gitignore
Expand Down
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,14 @@ Collate:
'LearnerClustFanny.R'
'LearnerClustFarthestFirst.R'
'LearnerClustFeatureless.R'
'LearnerClustHDBSCAN.R'
'LearnerClustHclust.R'
'LearnerClustKKMeans.R'
'LearnerClustKMeans.R'
'LearnerClustMclust.R'
'LearnerClustMeanShift.R'
'LearnerClustMiniBatchKMeans.R'
'LearnerClustOPTICS.R'
'LearnerClustPAM.R'
'LearnerClustSimpleKMeans.R'
'LearnerClustXMeans.R'
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@ export(LearnerClustEM)
export(LearnerClustFanny)
export(LearnerClustFarthestFirst)
export(LearnerClustFeatureless)
export(LearnerClustHDBSCAN)
export(LearnerClustHclust)
export(LearnerClustKKMeans)
export(LearnerClustKMeans)
export(LearnerClustMclust)
export(LearnerClustMeanShift)
export(LearnerClustMiniBatchKMeans)
export(LearnerClustOPTICS)
export(LearnerClustPAM)
export(LearnerClustSimpleKMeans)
export(LearnerClustXMeans)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# mlr3cluster (development version)

* Add DBSCAN learner from 'fpc' package
* Add HDBSCAN learner from 'dbscan' package
* Add OPTICS learner from 'dbscan' package

# mlr3cluster 0.1.8

Expand Down
5 changes: 2 additions & 3 deletions R/LearnerClustAffinityPropagation.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Affinity Propagation Clustering Learner
#'
#' @name mlr_learners_clust.ap
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for Affinity Propagation clustering implemented in [apcluster::apcluster()].
Expand All @@ -26,7 +24,7 @@ LearnerClustAP = R6Class("LearnerClustAP",
initialize = function() {
param_set = ps(
s = p_uty(tags = c("required", "train")),
p = p_uty(default = NA, tags = "train", custom_check = crate(function(x) check_numeric(x))),
p = p_uty(default = NA, tags = "train", custom_check = check_numeric),
q = p_dbl(0, 1, tags = "train"),
maxits = p_int(1L, default = 1000L, tags = "train"),
convits = p_int(1L, default = 100L, tags = "train"),
Expand Down Expand Up @@ -79,4 +77,5 @@ LearnerClustAP = R6Class("LearnerClustAP",
)
)

#' @include aaa.R
learners[["clust.ap"]] = LearnerClustAP
3 changes: 1 addition & 2 deletions R/LearnerClustAgnes.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Agglomerative Hierarchical Clustering Learner
#'
#' @name mlr_learners_clust.agnes
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for agglomerative hierarchical clustering implemented in [cluster::agnes()].
Expand Down Expand Up @@ -83,4 +81,5 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes",
)
)

#' @include aaa.R
learners[["clust.agnes"]] = LearnerClustAgnes
5 changes: 2 additions & 3 deletions R/LearnerClustCMeans.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Fuzzy C-Means Clustering Learner
#'
#' @name mlr_learners_clust.cmeans
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for fuzzy clustering implemented in [e1071::cmeans()].
Expand All @@ -24,7 +22,7 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans",
initialize = function() {
param_set = ps(
centers = p_uty(
tags = c("required", "train"), default = 2L, custom_check = crate(check_centers)
tags = c("required", "train"), default = 2L, custom_check = check_centers
),
iter.max = p_int(1L, default = 100L, tags = "train"),
verbose = p_lgl(default = FALSE, tags = "train"),
Expand Down Expand Up @@ -81,4 +79,5 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans",
)
)

#' @include aaa.R
learners[["clust.cmeans"]] = LearnerClustCMeans
3 changes: 1 addition & 2 deletions R/LearnerClustCobweb.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Cobweb Clustering Learner
#'
#' @name mlr_learners_clust.cobweb
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for Cobweb clustering implemented in [RWeka::Cobweb()].
Expand Down Expand Up @@ -57,4 +55,5 @@ LearnerClustCobweb = R6Class("LearnerClustCobweb",
)
)

#' @include aaa.R
learners[["clust.cobweb"]] = LearnerClustCobweb
25 changes: 12 additions & 13 deletions R/LearnerClustDBSCAN.R
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
#' @title Density-Based Clustering Learner
#' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner
#'
#' @name mlr_learners_clust.dbscan
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for density-based clustering implemented in [dbscan::dbscan()].
#' The predict method uses [dbscan::predict.dbscan_fast()] to compute the
#' cluster memberships for new data.
#' DBSCAN (Density-based spatial clustering of applications with noise) clustering.
#' Calls [dbscan::dbscan()] from \CRANpkg{dbscan}.
#'
#' @templateVar id clust.dbscan
#' @template learner
#' @template example
#'
#' @references
#' `r format_bib("ester1996density")`
#'
#' @export
#' @template seealso_learner
#' @template example
LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
inherit = LearnerClust,
public = list(
Expand All @@ -24,7 +25,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
eps = p_dbl(0, tags = c("required", "train")),
minPts = p_int(0L, default = 5L, tags = "train"),
borderPoints = p_lgl(default = TRUE, tags = "train"),
weights = p_uty(tags = "train", custom_check = crate(function(x) check_numeric(x))),
weights = p_uty(tags = "train", custom_check = check_numeric),
search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"),
bucketSize = p_int(1L, default = 10L, tags = "train"),
splitRule = p_fct(
Expand Down Expand Up @@ -52,10 +53,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
.train = function(task) {
pv = self$param_set$get_values(tags = "train")
m = invoke(dbscan::dbscan, x = task$data(), .args = pv)
m = set_class(
list(cluster = m$cluster, eps = m$eps, minPts = m$minPts, data = task$data(), dist = m$dist),
c("dbscan_fast", "dbscan")
)
m = insert_named(m, list(data = task$data()))
if (self$save_assignments) {
self$assignments = m$cluster
}
Expand All @@ -64,10 +62,11 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN",
},

.predict = function(task) {
partition = predict(self$model, newdata = task$data(), self$model$data)
partition = invoke(predict, self$model, newdata = task$data(), data = self$model$data)
PredictionClust$new(task = task, partition = partition)
}
)
)

#' @include aaa.R
learners[["clust.dbscan"]] = LearnerClustDBSCAN
21 changes: 11 additions & 10 deletions R/LearnerClustDBSCANfpc.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
#' @title Density-Based Clustering Learner with fpc
#' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner
#'
#' @name mlr_learners_clust.dbscan_fpc
#'
#' @description
#' A [LearnerClust] for density-based clustering implemented in [fpc::dbscan()].
#' The predict method uses [fpc::predict.dbscan()] to compute the
#' cluster memberships for new data.
#' DBSCAN (Density-based spatial clustering of applications with noise) clustering.
#' Calls [fpc::dbscan()] from \CRANpkg{fpc}.
#'
#' @templateVar id clust.dbscan_fpc
#' @template learner
#' @template example
#'
#' @references
#' `r format_bib("ester1996density")`
#'
#' @export
#' @template seealso_learner
#' @template example
LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
inherit = LearnerClust,
public = list(
Expand Down Expand Up @@ -60,10 +63,7 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
.train = function(task) {
pars = self$param_set$get_values(tags = "train")
m = invoke(fpc::dbscan, data = task$data(), .args = pars)
m = set_class(
list(cluster = m$cluster, eps = m$eps, MinPts = m$MinPts, isseed = m$isseed, data = task$data()),
"dbscan"
)
m = insert_named(m, list(data = task$data()))
if (self$save_assignments) {
self$assignments = m$cluster
}
Expand All @@ -72,10 +72,11 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc",
},

.predict = function(task) {
partition = as.integer(predict(self$model, data = self$model$data, newdata = task$data()))
partition = as.integer(invoke(predict, self$model, data = self$model$data), newdata = task$data())
PredictionClust$new(task = task, partition = partition)
}
)
)

#' @include aaa.R
learners[["clust.dbscan_fpc"]] = LearnerClustDBSCANfpc
3 changes: 1 addition & 2 deletions R/LearnerClustDiana.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Divisive Hierarchical Clustering Learner
#'
#' @name mlr_learners_clust.diana
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for divisive hierarchical clustering implemented in [cluster::diana()].
Expand Down Expand Up @@ -64,4 +62,5 @@ LearnerClustDiana = R6Class("LearnerClustDiana",
)
)

#' @include aaa.R
learners[["clust.diana"]] = LearnerClustDiana
3 changes: 1 addition & 2 deletions R/LearnerClustEM.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Expectation-Maximization Clustering Learner
#'
#' @name mlr_learners_clust.em
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for Expectation-Maximization clustering implemented in
Expand Down Expand Up @@ -68,4 +66,5 @@ LearnerClustEM = R6Class("LearnerClustEM",
)
)

#' @include aaa.R
learners[["clust.em"]] = LearnerClustEM
3 changes: 1 addition & 2 deletions R/LearnerClustFanny.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Fuzzy Analysis Clustering Learner
#'
#' @name mlr_learners_clust.fanny
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for fuzzy clustering implemented in [cluster::fanny()].
Expand Down Expand Up @@ -71,4 +69,5 @@ LearnerClustFanny = R6Class("LearnerClustFanny",
)
)

#' @include aaa.R
learners[["clust.fanny"]] = LearnerClustFanny
3 changes: 1 addition & 2 deletions R/LearnerClustFarthestFirst.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Farthest First Clustering Learner
#'
#' @name mlr_learners_clust.ff
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for Farthest First clustering implemented in [RWeka::FarthestFirst()].
Expand Down Expand Up @@ -67,4 +65,5 @@ LearnerClustFarthestFirst = R6Class("LearnerClustFF",
)
)

#' @include aaa.R
learners[["clust.ff"]] = LearnerClustFarthestFirst
3 changes: 1 addition & 2 deletions R/LearnerClustFeatureless.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Featureless Clustering Learner
#'
#' @name mlr_learners_clust.featureless
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A simple [LearnerClust] which randomly (but evenly) assigns observations to
Expand Down Expand Up @@ -80,4 +78,5 @@ LearnerClustFeatureless = R6Class("LearnerClustFeatureless",
)
)

#' @include aaa.R
learners[["clust.featureless"]] = LearnerClustFeatureless
63 changes: 63 additions & 0 deletions R/LearnerClustHDBSCAN.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#' @title Hierarchical DBSCAN (HDBSCAN) Clustering Learner
#'
#' @name mlr_learners_clust.hdbscan
#'
#' @description
#' HDBSCAN (Hierarchical DBSCAN) clustering.
#' Calls [dbscan::hdbscan()] from \CRANpkg{dbscan}.
#'
#' @templateVar id clust.hdbscan
#' @template learner
#'
#' @references
#' `r format_bib("campello2013density")`
#'
#' @export
#' @template seealso_learner
#' @template example
LearnerClustHDBSCAN = R6Class("LearnerClustHDBSCAN",
inherit = LearnerClust,
public = list(
#' @description
#' Creates a new instance of this [R6][R6::R6Class] class.
initialize = function() {
param_set = ps(
minPts = p_int(0L, tags = c("required", "train")),
gen_hdbscan_tree = p_lgl(default = FALSE, tags = "train"),
gen_simplified_tree = p_lgl(default = FALSE, tags = "train")
)

super$initialize(
id = "clust.hdbscan",
feature_types = c("logical", "integer", "numeric"),
predict_types = "partition",
param_set = param_set,
properties = c("partitional", "exclusive", "complete"),
packages = "dbscan",
man = "mlr3cluster::mlr_learners_clust.hdbscan",
label = "HDBSCAN Clustering"
)
}
),
private = list(
.train = function(task) {
pv = self$param_set$get_values(tags = "train")
m = invoke(dbscan::hdbscan, x = task$data(), .args = pv)
m = insert_named(m, list(data = task$data()))

if (self$save_assignments) {
self$assignments = m$cluster
}

return(m)
},

.predict = function(task) {
partition = as.integer(invoke(predict, self$model, newdata = task$data(), data = self$model$data))
PredictionClust$new(task = task, partition = partition)
}
)
)

#' @include aaa.R
learners[["clust.hdbscan"]] = LearnerClustHDBSCAN
3 changes: 1 addition & 2 deletions R/LearnerClustHclust.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
#' @title Agglomerative Hierarchical Clustering Learner
#'
#' @name mlr_learners_clust.hclust
#' @include LearnerClust.R
#' @include aaa.R
#'
#' @description
#' A [LearnerClust] for agglomerative hierarchical clustering implemented in [stats::hclust()].
Expand Down Expand Up @@ -82,4 +80,5 @@ LearnerClustHclust = R6Class("LearnerClustHclust",
)
)

#' @include aaa.R
learners[["clust.hclust"]] = LearnerClustHclust
Loading