diff --git a/.Rbuildignore b/.Rbuildignore index 7874b7e9..444dec87 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,4 @@ +^LICENSE$ .ignore .editorconfig .gitignore diff --git a/DESCRIPTION b/DESCRIPTION index bcf78f8a..7ed5ff0e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -58,12 +58,14 @@ Collate: 'LearnerClustFanny.R' 'LearnerClustFarthestFirst.R' 'LearnerClustFeatureless.R' + 'LearnerClustHDBSCAN.R' 'LearnerClustHclust.R' 'LearnerClustKKMeans.R' 'LearnerClustKMeans.R' 'LearnerClustMclust.R' 'LearnerClustMeanShift.R' 'LearnerClustMiniBatchKMeans.R' + 'LearnerClustOPTICS.R' 'LearnerClustPAM.R' 'LearnerClustSimpleKMeans.R' 'LearnerClustXMeans.R' diff --git a/NAMESPACE b/NAMESPACE index 832ca558..d1f5562a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,12 +24,14 @@ export(LearnerClustEM) export(LearnerClustFanny) export(LearnerClustFarthestFirst) export(LearnerClustFeatureless) +export(LearnerClustHDBSCAN) export(LearnerClustHclust) export(LearnerClustKKMeans) export(LearnerClustKMeans) export(LearnerClustMclust) export(LearnerClustMeanShift) export(LearnerClustMiniBatchKMeans) +export(LearnerClustOPTICS) export(LearnerClustPAM) export(LearnerClustSimpleKMeans) export(LearnerClustXMeans) diff --git a/NEWS.md b/NEWS.md index a6af2ffd..f2f9004b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,8 @@ # mlr3cluster (development version) * Add DBSCAN learner from 'fpc' package +* Add HDBSCAN learner from 'dbscan' package +* Add OPTICS learner from 'dbscan' package # mlr3cluster 0.1.8 diff --git a/R/LearnerClustAffinityPropagation.R b/R/LearnerClustAffinityPropagation.R index fd817131..e3f2332c 100644 --- a/R/LearnerClustAffinityPropagation.R +++ b/R/LearnerClustAffinityPropagation.R @@ -1,8 +1,6 @@ #' @title Affinity Propagation Clustering Learner #' #' @name mlr_learners_clust.ap -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for Affinity Propagation clustering implemented in [apcluster::apcluster()]. @@ -26,7 +24,7 @@ LearnerClustAP = R6Class("LearnerClustAP", initialize = function() { param_set = ps( s = p_uty(tags = c("required", "train")), - p = p_uty(default = NA, tags = "train", custom_check = crate(function(x) check_numeric(x))), + p = p_uty(default = NA, tags = "train", custom_check = check_numeric), q = p_dbl(0, 1, tags = "train"), maxits = p_int(1L, default = 1000L, tags = "train"), convits = p_int(1L, default = 100L, tags = "train"), @@ -79,4 +77,5 @@ LearnerClustAP = R6Class("LearnerClustAP", ) ) +#' @include aaa.R learners[["clust.ap"]] = LearnerClustAP diff --git a/R/LearnerClustAgnes.R b/R/LearnerClustAgnes.R index a12f0534..51a99ae0 100644 --- a/R/LearnerClustAgnes.R +++ b/R/LearnerClustAgnes.R @@ -1,8 +1,6 @@ #' @title Agglomerative Hierarchical Clustering Learner #' #' @name mlr_learners_clust.agnes -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for agglomerative hierarchical clustering implemented in [cluster::agnes()]. @@ -83,4 +81,5 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes", ) ) +#' @include aaa.R learners[["clust.agnes"]] = LearnerClustAgnes diff --git a/R/LearnerClustCMeans.R b/R/LearnerClustCMeans.R index 2070a3a2..c9e8bd3b 100644 --- a/R/LearnerClustCMeans.R +++ b/R/LearnerClustCMeans.R @@ -1,8 +1,6 @@ #' @title Fuzzy C-Means Clustering Learner #' #' @name mlr_learners_clust.cmeans -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for fuzzy clustering implemented in [e1071::cmeans()]. @@ -24,7 +22,7 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans", initialize = function() { param_set = ps( centers = p_uty( - tags = c("required", "train"), default = 2L, custom_check = crate(check_centers) + tags = c("required", "train"), default = 2L, custom_check = check_centers ), iter.max = p_int(1L, default = 100L, tags = "train"), verbose = p_lgl(default = FALSE, tags = "train"), @@ -81,4 +79,5 @@ LearnerClustCMeans = R6Class("LearnerClustCMeans", ) ) +#' @include aaa.R learners[["clust.cmeans"]] = LearnerClustCMeans diff --git a/R/LearnerClustCobweb.R b/R/LearnerClustCobweb.R index 11c6bf80..3e8a682f 100644 --- a/R/LearnerClustCobweb.R +++ b/R/LearnerClustCobweb.R @@ -1,8 +1,6 @@ #' @title Cobweb Clustering Learner #' #' @name mlr_learners_clust.cobweb -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for Cobweb clustering implemented in [RWeka::Cobweb()]. @@ -57,4 +55,5 @@ LearnerClustCobweb = R6Class("LearnerClustCobweb", ) ) +#' @include aaa.R learners[["clust.cobweb"]] = LearnerClustCobweb diff --git a/R/LearnerClustDBSCAN.R b/R/LearnerClustDBSCAN.R index 4a491561..8d08b1bf 100644 --- a/R/LearnerClustDBSCAN.R +++ b/R/LearnerClustDBSCAN.R @@ -1,19 +1,20 @@ -#' @title Density-Based Clustering Learner +#' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner #' #' @name mlr_learners_clust.dbscan -#' @include LearnerClust.R -#' @include aaa.R #' #' @description -#' A [LearnerClust] for density-based clustering implemented in [dbscan::dbscan()]. -#' The predict method uses [dbscan::predict.dbscan_fast()] to compute the -#' cluster memberships for new data. +#' DBSCAN (Density-based spatial clustering of applications with noise) clustering. +#' Calls [dbscan::dbscan()] from \CRANpkg{dbscan}. #' #' @templateVar id clust.dbscan #' @template learner -#' @template example +#' +#' @references +#' `r format_bib("ester1996density")` #' #' @export +#' @template seealso_learner +#' @template example LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", inherit = LearnerClust, public = list( @@ -24,7 +25,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", eps = p_dbl(0, tags = c("required", "train")), minPts = p_int(0L, default = 5L, tags = "train"), borderPoints = p_lgl(default = TRUE, tags = "train"), - weights = p_uty(tags = "train", custom_check = crate(function(x) check_numeric(x))), + weights = p_uty(tags = "train", custom_check = check_numeric), search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"), bucketSize = p_int(1L, default = 10L, tags = "train"), splitRule = p_fct( @@ -52,10 +53,7 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", .train = function(task) { pv = self$param_set$get_values(tags = "train") m = invoke(dbscan::dbscan, x = task$data(), .args = pv) - m = set_class( - list(cluster = m$cluster, eps = m$eps, minPts = m$minPts, data = task$data(), dist = m$dist), - c("dbscan_fast", "dbscan") - ) + m = insert_named(m, list(data = task$data())) if (self$save_assignments) { self$assignments = m$cluster } @@ -64,10 +62,11 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", }, .predict = function(task) { - partition = predict(self$model, newdata = task$data(), self$model$data) + partition = invoke(predict, self$model, newdata = task$data(), data = self$model$data) PredictionClust$new(task = task, partition = partition) } ) ) +#' @include aaa.R learners[["clust.dbscan"]] = LearnerClustDBSCAN diff --git a/R/LearnerClustDBSCANfpc.R b/R/LearnerClustDBSCANfpc.R index 95787117..acc7da4f 100644 --- a/R/LearnerClustDBSCANfpc.R +++ b/R/LearnerClustDBSCANfpc.R @@ -1,17 +1,20 @@ -#' @title Density-Based Clustering Learner with fpc +#' @title Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner #' #' @name mlr_learners_clust.dbscan_fpc #' #' @description -#' A [LearnerClust] for density-based clustering implemented in [fpc::dbscan()]. -#' The predict method uses [fpc::predict.dbscan()] to compute the -#' cluster memberships for new data. +#' DBSCAN (Density-based spatial clustering of applications with noise) clustering. +#' Calls [fpc::dbscan()] from \CRANpkg{fpc}. #' #' @templateVar id clust.dbscan_fpc #' @template learner -#' @template example +#' +#' @references +#' `r format_bib("ester1996density")` #' #' @export +#' @template seealso_learner +#' @template example LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc", inherit = LearnerClust, public = list( @@ -60,10 +63,7 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc", .train = function(task) { pars = self$param_set$get_values(tags = "train") m = invoke(fpc::dbscan, data = task$data(), .args = pars) - m = set_class( - list(cluster = m$cluster, eps = m$eps, MinPts = m$MinPts, isseed = m$isseed, data = task$data()), - "dbscan" - ) + m = insert_named(m, list(data = task$data())) if (self$save_assignments) { self$assignments = m$cluster } @@ -72,10 +72,11 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc", }, .predict = function(task) { - partition = as.integer(predict(self$model, data = self$model$data, newdata = task$data())) + partition = as.integer(invoke(predict, self$model, data = self$model$data), newdata = task$data()) PredictionClust$new(task = task, partition = partition) } ) ) +#' @include aaa.R learners[["clust.dbscan_fpc"]] = LearnerClustDBSCANfpc diff --git a/R/LearnerClustDiana.R b/R/LearnerClustDiana.R index ef682755..de7a7711 100644 --- a/R/LearnerClustDiana.R +++ b/R/LearnerClustDiana.R @@ -1,8 +1,6 @@ #' @title Divisive Hierarchical Clustering Learner #' #' @name mlr_learners_clust.diana -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for divisive hierarchical clustering implemented in [cluster::diana()]. @@ -64,4 +62,5 @@ LearnerClustDiana = R6Class("LearnerClustDiana", ) ) +#' @include aaa.R learners[["clust.diana"]] = LearnerClustDiana diff --git a/R/LearnerClustEM.R b/R/LearnerClustEM.R index 668a8d7c..25ca9f3a 100644 --- a/R/LearnerClustEM.R +++ b/R/LearnerClustEM.R @@ -1,8 +1,6 @@ #' @title Expectation-Maximization Clustering Learner #' #' @name mlr_learners_clust.em -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for Expectation-Maximization clustering implemented in @@ -68,4 +66,5 @@ LearnerClustEM = R6Class("LearnerClustEM", ) ) +#' @include aaa.R learners[["clust.em"]] = LearnerClustEM diff --git a/R/LearnerClustFanny.R b/R/LearnerClustFanny.R index c409f0af..df4623de 100644 --- a/R/LearnerClustFanny.R +++ b/R/LearnerClustFanny.R @@ -1,8 +1,6 @@ #' @title Fuzzy Analysis Clustering Learner #' #' @name mlr_learners_clust.fanny -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for fuzzy clustering implemented in [cluster::fanny()]. @@ -71,4 +69,5 @@ LearnerClustFanny = R6Class("LearnerClustFanny", ) ) +#' @include aaa.R learners[["clust.fanny"]] = LearnerClustFanny diff --git a/R/LearnerClustFarthestFirst.R b/R/LearnerClustFarthestFirst.R index efe7bddc..b8dc7150 100644 --- a/R/LearnerClustFarthestFirst.R +++ b/R/LearnerClustFarthestFirst.R @@ -1,8 +1,6 @@ #' @title Farthest First Clustering Learner #' #' @name mlr_learners_clust.ff -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for Farthest First clustering implemented in [RWeka::FarthestFirst()]. @@ -67,4 +65,5 @@ LearnerClustFarthestFirst = R6Class("LearnerClustFF", ) ) +#' @include aaa.R learners[["clust.ff"]] = LearnerClustFarthestFirst diff --git a/R/LearnerClustFeatureless.R b/R/LearnerClustFeatureless.R index ebf52d5a..121be147 100644 --- a/R/LearnerClustFeatureless.R +++ b/R/LearnerClustFeatureless.R @@ -1,8 +1,6 @@ #' @title Featureless Clustering Learner #' #' @name mlr_learners_clust.featureless -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A simple [LearnerClust] which randomly (but evenly) assigns observations to @@ -80,4 +78,5 @@ LearnerClustFeatureless = R6Class("LearnerClustFeatureless", ) ) +#' @include aaa.R learners[["clust.featureless"]] = LearnerClustFeatureless diff --git a/R/LearnerClustHDBSCAN.R b/R/LearnerClustHDBSCAN.R new file mode 100644 index 00000000..5e7184bd --- /dev/null +++ b/R/LearnerClustHDBSCAN.R @@ -0,0 +1,63 @@ +#' @title Hierarchical DBSCAN (HDBSCAN) Clustering Learner +#' +#' @name mlr_learners_clust.hdbscan +#' +#' @description +#' HDBSCAN (Hierarchical DBSCAN) clustering. +#' Calls [dbscan::hdbscan()] from \CRANpkg{dbscan}. +#' +#' @templateVar id clust.hdbscan +#' @template learner +#' +#' @references +#' `r format_bib("campello2013density")` +#' +#' @export +#' @template seealso_learner +#' @template example +LearnerClustHDBSCAN = R6Class("LearnerClustHDBSCAN", + inherit = LearnerClust, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + param_set = ps( + minPts = p_int(0L, tags = c("required", "train")), + gen_hdbscan_tree = p_lgl(default = FALSE, tags = "train"), + gen_simplified_tree = p_lgl(default = FALSE, tags = "train") + ) + + super$initialize( + id = "clust.hdbscan", + feature_types = c("logical", "integer", "numeric"), + predict_types = "partition", + param_set = param_set, + properties = c("partitional", "exclusive", "complete"), + packages = "dbscan", + man = "mlr3cluster::mlr_learners_clust.hdbscan", + label = "HDBSCAN Clustering" + ) + } + ), + private = list( + .train = function(task) { + pv = self$param_set$get_values(tags = "train") + m = invoke(dbscan::hdbscan, x = task$data(), .args = pv) + m = insert_named(m, list(data = task$data())) + + if (self$save_assignments) { + self$assignments = m$cluster + } + + return(m) + }, + + .predict = function(task) { + partition = as.integer(invoke(predict, self$model, newdata = task$data(), data = self$model$data)) + PredictionClust$new(task = task, partition = partition) + } + ) +) + +#' @include aaa.R +learners[["clust.hdbscan"]] = LearnerClustHDBSCAN diff --git a/R/LearnerClustHclust.R b/R/LearnerClustHclust.R index 4f9f2986..542f6d95 100644 --- a/R/LearnerClustHclust.R +++ b/R/LearnerClustHclust.R @@ -1,8 +1,6 @@ #' @title Agglomerative Hierarchical Clustering Learner #' #' @name mlr_learners_clust.hclust -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for agglomerative hierarchical clustering implemented in [stats::hclust()]. @@ -82,4 +80,5 @@ LearnerClustHclust = R6Class("LearnerClustHclust", ) ) +#' @include aaa.R learners[["clust.hclust"]] = LearnerClustHclust diff --git a/R/LearnerClustKKMeans.R b/R/LearnerClustKKMeans.R index a90631b6..acaf58b5 100644 --- a/R/LearnerClustKKMeans.R +++ b/R/LearnerClustKKMeans.R @@ -1,8 +1,6 @@ #' @title Kernel K-Means Clustering Learner #' #' @name mlr_learners_clust.kkmeans -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for kernel k-means clustering implemented in [kernlab::kkmeans()]. @@ -25,7 +23,7 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans", initialize = function() { param_set = ps( centers = p_uty( - tags = c("required", "train"), default = 2L, custom_check = crate(check_centers) + tags = c("required", "train"), default = 2L, custom_check = check_centers ), kernel = p_fct( default = "rbfdot", @@ -100,4 +98,5 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans", ) ) +#' @include aaa.R learners[["clust.kkmeans"]] = LearnerClustKKMeans diff --git a/R/LearnerClustKMeans.R b/R/LearnerClustKMeans.R index b9e23156..e511b595 100644 --- a/R/LearnerClustKMeans.R +++ b/R/LearnerClustKMeans.R @@ -1,8 +1,6 @@ #' @title K-Means Clustering Learner #' #' @name mlr_learners_clust.kmeans -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for k-means clustering implemented in [stats::kmeans()]. @@ -24,7 +22,7 @@ LearnerClustKMeans = R6Class("LearnerClustKMeans", initialize = function() { param_set = ps( centers = p_uty( - tags = c("required", "train"), default = 2L, custom_check = crate(check_centers) + tags = c("required", "train"), default = 2L, custom_check = check_centers ), iter.max = p_int(1L, default = 10L, tags = "train"), algorithm = p_fct( @@ -72,4 +70,5 @@ LearnerClustKMeans = R6Class("LearnerClustKMeans", ) ) +#' @include aaa.R learners[["clust.kmeans"]] = LearnerClustKMeans diff --git a/R/LearnerClustMclust.R b/R/LearnerClustMclust.R index 93c2dbcb..8b09437a 100644 --- a/R/LearnerClustMclust.R +++ b/R/LearnerClustMclust.R @@ -1,8 +1,6 @@ #' @title Gaussian Mixture Models-Based Clustering Learner #' #' @name mlr_learners_clust.mclust -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for model-based clustering implemented in [mclust::Mclust()]. @@ -21,11 +19,11 @@ LearnerClustMclust = R6Class("LearnerClustMclust", #' Creates a new instance of this [R6][R6::R6Class] class. initialize = function() { parma_set = ps( - G = p_uty(default = 1:9, tags = "train", custom_check = crate(function(x) check_numeric(x))), - modelNames = p_uty(tags = "train", custom_check = crate(function(x) check_character(x))), - prior = p_uty(tags = "train", custom_check = crate(function(x) check_list(x))), - control = p_uty(default = mclust::emControl(), tags = "train", custom_check = crate(function(x) check_list(x))), - initialization = p_uty(tags = "train", custom_check = crate(function(x) check_list(x))), + G = p_uty(default = 1:9, tags = "train", custom_check = check_numeric), + modelNames = p_uty(tags = "train", custom_check = check_character), + prior = p_uty(tags = "train", custom_check = check_list), + control = p_uty(default = mclust::emControl(), tags = "train", custom_check = check_list), + initialization = p_uty(tags = "train", custom_check = check_list), x = p_uty(tags = "train", custom_check = crate(function(x) check_class(x, "mclustBIC"))) ) @@ -63,4 +61,5 @@ LearnerClustMclust = R6Class("LearnerClustMclust", ) ) +#' @include aaa.R learners[["clust.mclust"]] = LearnerClustMclust diff --git a/R/LearnerClustMeanShift.R b/R/LearnerClustMeanShift.R index 0b667476..5a4d2d2d 100644 --- a/R/LearnerClustMeanShift.R +++ b/R/LearnerClustMeanShift.R @@ -1,8 +1,6 @@ #' @title Mean Shift Clustering Learner #' #' @name mlr_learners_clust.meanshift -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for Mean Shift clustering implemented in [LPCM::ms()]. @@ -28,7 +26,7 @@ LearnerClustMeanShift = R6Class("LearnerClustMeanShift", "`h` must be either integer or numeric vector" } })), - subset = p_uty(tags = "train", custom_check = crate(function(x) check_numeric(x))), + subset = p_uty(tags = "train", custom_check = check_numeric), scaled = p_int(0L, default = 1, tags = "train"), iter = p_int(1L, default = 200L, tags = "train"), thr = p_dbl(default = 0.01, tags = "train") @@ -69,4 +67,5 @@ LearnerClustMeanShift = R6Class("LearnerClustMeanShift", ) ) +#' @include aaa.R learners[["clust.meanshift"]] = LearnerClustMeanShift diff --git a/R/LearnerClustMiniBatchKMeans.R b/R/LearnerClustMiniBatchKMeans.R index 82461877..d98315b6 100644 --- a/R/LearnerClustMiniBatchKMeans.R +++ b/R/LearnerClustMiniBatchKMeans.R @@ -1,8 +1,6 @@ #' @title Mini Batch K-Means Clustering Learner #' #' @name mlr_learners_clust.MBatchKMeans -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for mini batch k-means clustering implemented in [ClusterR::MiniBatchKmeans()]. @@ -106,4 +104,5 @@ LearnerClustMiniBatchKMeans = R6Class("LearnerClustMiniBatchKMeans", ) ) +#' @include aaa.R learners[["clust.MBatchKMeans"]] = LearnerClustMiniBatchKMeans diff --git a/R/LearnerClustOPTICS.R b/R/LearnerClustOPTICS.R new file mode 100644 index 00000000..91949f19 --- /dev/null +++ b/R/LearnerClustOPTICS.R @@ -0,0 +1,73 @@ +#' @title Ordering Points to Identify the Clustering Structure (OPTICS) Clustering Learner +#' +#' @name mlr_learners_clust.optics +#' +#' @description +#' OPTICS (Ordering points to identify the clustering structure) point ordering clustering. +#' Calls [dbscan::optics()] from \CRANpkg{dbscan}. +#' +#' @templateVar id clust.optics +#' @template learner +#' +#' @references +#' `r format_bib("ankerst1999optics")` +#' +#' @export +#' @template seealso_learner +#' @template example +LearnerClustOPTICS = R6Class("LearnerClustOPTICS", + inherit = LearnerClust, + public = list( + #' @description + #' Creates a new instance of this [R6][R6::R6Class] class. + initialize = function() { + param_set = ps( + eps = p_dbl(0, special_vals = list(NULL), default = NULL, tags = "train"), + minPts = p_int(0L, default = 5L, tags = "train"), + search = p_fct(levels = c("kdtree", "linear", "dist"), default = "kdtree", tags = "train"), + bucketSize = p_int(1L, default = 10L, tags = "train"), + splitRule = p_fct( + levels = c("STD", "MIDPT", "FAIR", "SL_MIDPT", "SL_FAIR", "SUGGEST"), default = "SUGGEST", tags = "train" + ), + approx = p_dbl(default = 0, tags = "train"), + eps_cl = p_dbl(0, tags = c("required", "train")) + ) + # add deps + param_set$add_dep("bucketSize", "search", CondEqual$new("kdtree")) + param_set$add_dep("splitRule", "search", CondEqual$new("kdtree")) + + super$initialize( + id = "clust.optics", + feature_types = c("logical", "integer", "numeric"), + predict_types = "partition", + param_set = param_set, + properties = c("partitional", "exclusive", "complete"), + packages = "dbscan", + man = "mlr3cluster::mlr_learners_clust.optics", + label = "OPTICS Clustering" + ) + } + ), + private = list( + .train = function(task) { + pv = self$param_set$get_values(tags = "train") + m = invoke(dbscan::optics, x = task$data(), .args = remove_named(pv, "eps_cl")) + m = insert_named(m, list(data = task$data())) + m = invoke(dbscan::extractDBSCAN, object = m, eps_cl = pv$eps_cl) + + if (self$save_assignments) { + self$assignments = m$cluster + } + + return(m) + }, + + .predict = function(task) { + partition = invoke(predict, self$model, newdata = task$data(), data = self$model$data) + PredictionClust$new(task = task, partition = partition) + } + ) +) + +#' @include aaa.R +learners[["clust.optics"]] = LearnerClustOPTICS diff --git a/R/LearnerClustPAM.R b/R/LearnerClustPAM.R index 0003ab97..56ae612b 100644 --- a/R/LearnerClustPAM.R +++ b/R/LearnerClustPAM.R @@ -1,8 +1,6 @@ #' @title Partitioning Around Medoids Clustering Learner #' #' @name mlr_learners_clust.pam -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for PAM clustering implemented in [cluster::pam()]. @@ -80,4 +78,5 @@ LearnerClustPAM = R6Class("LearnerClustPAM", ) ) +#' @include aaa.R learners[["clust.pam"]] = LearnerClustPAM diff --git a/R/LearnerClustSimpleKMeans.R b/R/LearnerClustSimpleKMeans.R index b96f956f..c16ca881 100644 --- a/R/LearnerClustSimpleKMeans.R +++ b/R/LearnerClustSimpleKMeans.R @@ -1,8 +1,6 @@ #' @title K-Means Clustering Learner from Weka #' #' @name mlr_learners_clust.SimpleKMeans -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for Simple K Means clustering implemented in [RWeka::SimpleKMeans()]. @@ -81,4 +79,5 @@ LearnerClustSimpleKMeans = R6Class("LearnerClustSimpleKMeans", ) ) +#' @include aaa.R learners[["clust.SimpleKMeans"]] = LearnerClustSimpleKMeans diff --git a/R/LearnerClustXMeans.R b/R/LearnerClustXMeans.R index 01151da4..255c3c7d 100644 --- a/R/LearnerClustXMeans.R +++ b/R/LearnerClustXMeans.R @@ -1,8 +1,6 @@ #' @title X-means Clustering Learner #' #' @name mlr_learners_clust.xmeans -#' @include LearnerClust.R -#' @include aaa.R #' #' @description #' A [LearnerClust] for X-means clustering implemented in [RWeka::XMeans()]. @@ -71,4 +69,5 @@ LearnerClustXMeans = R6Class("LearnerClustXMeans", ) ) +#' @include aaa.R learners[["clust.xmeans"]] = LearnerClustXMeans diff --git a/R/bibentries.R b/R/bibentries.R index f976171f..691d8477 100644 --- a/R/bibentries.R +++ b/R/bibentries.R @@ -1,13 +1,40 @@ #' @importFrom utils bibentry bibentries = c( ruspini_1970 = bibentry("article", - doi = "10.1016/S0020-0255(70)80056-1", - year = "1970", - volume = "2", - number = "3", - pages = "319-350", - author = "Enrique H. Ruspini", - title = "Numerical methods for fuzzy clustering", - journal = "Information Sciences" + doi = "10.1016/S0020-0255(70)80056-1", + year = "1970", + volume = "2", + number = "3", + pages = "319-350", + author = "Enrique H. Ruspini", + title = "Numerical methods for fuzzy clustering", + journal = "Information Sciences" + ), + ester1996density = bibentry("inproceedings", + title = "A density-based algorithm for discovering clusters in large spatial databases with noise", + author = "Ester, Martin and Kriegel, Hans-Peter and Sander, J\u00F6rg and Xu, Xiaowei and others", + booktitle = "kdd", + volume = "96", + number = "34", + pages = "226--231", + year = "1996" + ), + campello2013density = bibentry("inproceedings", + title = "Density-based clustering based on hierarchical density estimates", + author = "Campello, Ricardo JGB and Moulavi, Davoud and Sander, J\u00F6rg", + booktitle = "Pacific-Asia conference on knowledge discovery and data mining", + pages = "160--172", + year = "2013", + organization = "Springer" + ), + ankerst1999optics = bibentry("article", + title = "OPTICS: Ordering points to identify the clustering structure", + author = "Ankerst, Mihael and Breunig, Markus M and Kriegel, Hans-Peter and Sander, J\u00F6rg", + journal = "ACM Sigmod record", + volume = "28", + number = "2", + pages = "49--60", + year = "1999", + publisher = "ACM New York, NY, USA" ) ) diff --git a/README.md b/README.md index 3bc8dc0d..bf5b4232 100644 --- a/README.md +++ b/README.md @@ -53,12 +53,14 @@ Also, the package is integrated with **[mlr3viz](https://github.com/mlr-org/mlr3 | [clust.fanny](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.fanny.html) | Fuzzy Clustering | [cluster](https://CRAN.R-project.org/package=cluster) | | [clust.featureless](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.featureless.html) | Simple Featureless Clustering | [mlr3cluster](https://github.com/mlr-org/mlr3cluster) | | [clust.ff](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.FF.html) | FarthestFirst Clustering Algorithm | [RWeka](https://CRAN.R-project.org/package=RWeka) | +| [clust.hdbscan](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.hdbscan.html) | HDBSCAN Clustering | [dbscan](https://CRAN.R-project.org/package=dbscan) | | [clust.hclust](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.hclust.html) | Agglomerative Hierarchical Clustering | [stats](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/stats-package.html) | | [clust.kkmeans](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.kkmeans.html) | Kernel K-Means Clustering | [kernlab](https://CRAN.R-project.org/package=kernlab) | | [clust.kmeans](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.kmeans.html) | K-Means Clustering | [stats](https://stat.ethz.ch/R-manual/R-devel/library/stats/html/stats-package.html) | | [clust.mclust](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.mclust.html) | Gaussian Mixture Models-Based Clustering | [mclust](https://cran.r-project.org/package=mclust) | | [clust.MBatchKMeans](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.MiniBatchKMeans.html) | Mini Batch K-Means Clustering | [ClusterR](https://CRAN.R-project.org/package=ClusterR) | | [clust.meanshift](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.meanshift.html) | Mean Shift Clustering | [LPCM](https://CRAN.R-project.org/package=LPCM) | +| [clust.optics](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.optics.html) | OPTICS Clustering | [dbscan](https://CRAN.R-project.org/package=dbscan) | | [clust.pam](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.pam.html) | Clustering Around Medoids | [cluster](https://CRAN.R-project.org/package=cluster) | | [clust.SimpleKMeans](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.SimpleKMeans.html) | K-Means Clustering (WEKA) | [RWeka](https://CRAN.R-project.org/package=RWeka) | | [clust.xmeans](https://mlr3cluster.mlr-org.com/reference/mlr_learners_clust.xmeans.html) | K-Means with Automatic Determination of k | [RWeka](https://CRAN.R-project.org/package=RWeka) | diff --git a/man-roxygen/seealso_learner.R b/man-roxygen/seealso_learner.R new file mode 100644 index 00000000..39a5ab24 --- /dev/null +++ b/man-roxygen/seealso_learner.R @@ -0,0 +1,15 @@ +#' @seealso +#' +#' * Chapter in the [mlr3book](https://mlr3book.mlr-org.com/): +#' \url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} +#' * Package [mlr3extralearners](https://github.com/mlr-org/mlr3extralearners) for more learners. +#' * [Dictionary][mlr3misc::Dictionary] of [Learners][Learner]: [mlr_learners] +#' * `as.data.table(mlr_learners)` for a table of available [Learners][Learner] in the running session (depending on the loaded packages). +#' * \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. +#' * Extension packages for additional task types: +#' * \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. +#' * \CRANpkg{mlr3cluster} for unsupervised clustering. +#' * \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} +#' for established default tuning spaces. +#' +#' @family Learner diff --git a/man/mlr_learners_clust.dbscan.Rd b/man/mlr_learners_clust.dbscan.Rd index 96b01d17..3794d093 100644 --- a/man/mlr_learners_clust.dbscan.Rd +++ b/man/mlr_learners_clust.dbscan.Rd @@ -3,11 +3,10 @@ \name{mlr_learners_clust.dbscan} \alias{mlr_learners_clust.dbscan} \alias{LearnerClustDBSCAN} -\title{Density-Based Clustering Learner} +\title{Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner} \description{ -A \link{LearnerClust} for density-based clustering implemented in \code{\link[dbscan:dbscan]{dbscan::dbscan()}}. -The predict method uses \code{\link[dbscan:dbscan]{dbscan::predict.dbscan_fast()}} to compute the -cluster memberships for new data. +DBSCAN (Density-based spatial clustering of applications with noise) clustering. +Calls \code{\link[dbscan:dbscan]{dbscan::dbscan()}} from \CRANpkg{dbscan}. } \section{Dictionary}{ @@ -51,6 +50,34 @@ if (requireNamespace("dbscan")) { learner$param_set$ids() } } +\references{ +Ester, Martin, Kriegel, Hans-Peter, Sander, Jörg, Xu, Xiaowei, others (1996). +\dQuote{A density-based algorithm for discovering clusters in large spatial databases with noise.} +In \emph{kdd}, volume 96 number 34, 226--231. +} +\seealso{ +\itemize{ +\item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: +\url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} +\item Package \href{https://github.com/mlr-org/mlr3extralearners}{mlr3extralearners} for more learners. +\item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Learner]{Learners}: \link{mlr_learners} +\item \code{as.data.table(mlr_learners)} for a table of available \link[=Learner]{Learners} in the running session (depending on the loaded packages). +\item \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. +\item Extension packages for additional task types: +\itemize{ +\item \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. +\item \CRANpkg{mlr3cluster} for unsupervised clustering. +} +\item \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} +for established default tuning spaces. +} + +Other Learner: +\code{\link{mlr_learners_clust.dbscan_fpc}}, +\code{\link{mlr_learners_clust.hdbscan}}, +\code{\link{mlr_learners_clust.optics}} +} +\concept{Learner} \section{Super classes}{ \code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3cluster:LearnerClust]{mlr3cluster::LearnerClust}} -> \code{LearnerClustDBSCAN} } diff --git a/man/mlr_learners_clust.dbscan_fpc.Rd b/man/mlr_learners_clust.dbscan_fpc.Rd index 895d028c..c7cff906 100644 --- a/man/mlr_learners_clust.dbscan_fpc.Rd +++ b/man/mlr_learners_clust.dbscan_fpc.Rd @@ -3,11 +3,10 @@ \name{mlr_learners_clust.dbscan_fpc} \alias{mlr_learners_clust.dbscan_fpc} \alias{LearnerClustDBSCANfpc} -\title{Density-Based Clustering Learner with fpc} +\title{Density-based Spatial Clustering of Applications with Noise (DBSCAN) Clustering Learner} \description{ -A \link{LearnerClust} for density-based clustering implemented in \code{\link[fpc:dbscan]{fpc::dbscan()}}. -The predict method uses \code{\link[fpc:dbscan]{fpc::predict.dbscan()}} to compute the -cluster memberships for new data. +DBSCAN (Density-based spatial clustering of applications with noise) clustering. +Calls \code{\link[fpc:dbscan]{fpc::dbscan()}} from \CRANpkg{fpc}. } \section{Dictionary}{ @@ -50,6 +49,34 @@ if (requireNamespace("fpc")) { learner$param_set$ids() } } +\references{ +Ester, Martin, Kriegel, Hans-Peter, Sander, Jörg, Xu, Xiaowei, others (1996). +\dQuote{A density-based algorithm for discovering clusters in large spatial databases with noise.} +In \emph{kdd}, volume 96 number 34, 226--231. +} +\seealso{ +\itemize{ +\item Chapter in the \href{https://mlr3book.mlr-org.com/}{mlr3book}: +\url{https://mlr3book.mlr-org.com/chapters/chapter2/data_and_basic_modeling.html#sec-learners} +\item Package \href{https://github.com/mlr-org/mlr3extralearners}{mlr3extralearners} for more learners. +\item \link[mlr3misc:Dictionary]{Dictionary} of \link[=Learner]{Learners}: \link{mlr_learners} +\item \code{as.data.table(mlr_learners)} for a table of available \link[=Learner]{Learners} in the running session (depending on the loaded packages). +\item \CRANpkg{mlr3pipelines} to combine learners with pre- and postprocessing steps. +\item Extension packages for additional task types: +\itemize{ +\item \CRANpkg{mlr3proba} for probabilistic supervised regression and survival analysis. +\item \CRANpkg{mlr3cluster} for unsupervised clustering. +} +\item \CRANpkg{mlr3tuning} for tuning of hyperparameters, \CRANpkg{mlr3tuningspaces} +for established default tuning spaces. +} + +Other Learner: +\code{\link{mlr_learners_clust.dbscan}}, +\code{\link{mlr_learners_clust.hdbscan}}, +\code{\link{mlr_learners_clust.optics}} +} +\concept{Learner} \section{Super classes}{ \code{\link[mlr3:Learner]{mlr3::Learner}} -> \code{\link[mlr3cluster:LearnerClust]{mlr3cluster::LearnerClust}} -> \code{LearnerClustDBSCANfpc} } diff --git a/man/mlr_learners_clust.hdbscan.Rd b/man/mlr_learners_clust.hdbscan.Rd new file mode 100644 index 00000000..9b623c1f --- /dev/null +++ b/man/mlr_learners_clust.hdbscan.Rd @@ -0,0 +1,128 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/LearnerClustHDBSCAN.R +\name{mlr_learners_clust.hdbscan} +\alias{mlr_learners_clust.hdbscan} +\alias{LearnerClustHDBSCAN} +\title{Hierarchical DBSCAN (HDBSCAN) Clustering Learner} +\description{ +HDBSCAN (Hierarchical DBSCAN) clustering. +Calls \code{\link[dbscan:hdbscan]{dbscan::hdbscan()}} from \CRANpkg{dbscan}. +} +\section{Dictionary}{ + +This \link{Learner} can be instantiated via the \link[mlr3misc:Dictionary]{dictionary} \link{mlr_learners} or with the associated sugar function \code{\link[=lrn]{lrn()}}: + +\if{html}{\out{