From d897cadbf7e3dd9c949a52c846a749a4727213b5 Mon Sep 17 00:00:00 2001 From: Paul Lietar Date: Wed, 21 Feb 2024 16:12:34 +0000 Subject: [PATCH] Switch Bitset from an R6 class to a named list. R6 object have significant overhead, especially when instantiating the objects. While this is acceptable for long-lived objects such as events or variables, bitsets are created and destroyed very regularly during simulations. We can replace our use of an R6 class for `Bitset` with named lists that are intended to look and feel just like the original API, but which significant performance improvement. The reference semantics provided by R6 don't matter in our case, since all mutability happens behind the external pointer. On malariasimulation, I get a 30-35% performance improvement when using this new implementation of Bitset on population sizes under 10k, and about 10% speedup at 100k. The object-oriented named list based interface still adds a bit of overhead compared to using the externalptr and Rcpp functions directly, but doing so requires intrusive changes in the use site. --- R/bitset.R | 238 +++++++++++++++---------------- tests/performance/bench-bitset.R | 22 ++- 2 files changed, 140 insertions(+), 120 deletions(-) diff --git a/R/bitset.R b/R/bitset.R index e6006801..8a2d623c 100644 --- a/R/bitset.R +++ b/R/bitset.R @@ -5,126 +5,126 @@ #' difference, set difference). #' WARNING: All operations are in-place so please use \code{$copy} #' if you would like to perform an operation without destroying your current bitset. -#' @importFrom R6 R6Class +#' +#' This class is defined as a named list for performance reasons, but for most +#' intents and purposes it behaves just like an R6 class. +#' #' @export -Bitset <- R6Class( - 'Bitset', - public = list( - #' @field .bitset a pointer to the underlying IterableBitset. - .bitset = NULL, - - #' @field max_size the maximum size of the bitset. - max_size = 0, - - #' @description create a bitset. - #' @param size the size of the bitset. - #' @param from pointer to an existing IterableBitset to use; if \code{NULL} - #' make empty bitset, otherwise copy existing bitset. - initialize = function(size, from = NULL) { - if (is.null(from)) { - self$.bitset <- create_bitset(size) - } else { - stopifnot(inherits(from, "externalptr")) - self$.bitset <- from - } - self$max_size <- bitset_max_size(self$.bitset) - }, - - #' @description insert into the bitset. - #' @param v an integer vector of elements to insert. - insert = function(v) { - bitset_insert(self$.bitset, v) - self - }, - - #' @description remove from the bitset. - #' @param v an integer vector of elements (not indices) to remove. - remove = function(v) { - bitset_remove(self$.bitset, v) - self - }, - - #' @description clear the bitset. - clear = function() { - bitset_clear(self$.bitset) - self - }, - - #' @description get the number of elements in the set. - size = function() bitset_size(self$.bitset), - - #' @description to "bitwise or" or union two bitsets. - #' @param other the other bitset. - or = function(other) { - bitset_or(self$.bitset, other$.bitset) - self - }, - - #' @description to "bitwise and" or intersect two bitsets. - #' @param other the other bitset. - and = function(other) { - bitset_and(self$.bitset, other$.bitset) - self - }, - - #' @description to "bitwise not" or complement a bitset. - #' @param inplace whether to overwrite the current bitset, default = TRUE - not = function(inplace = TRUE) { - Bitset$new(from = bitset_not(self$.bitset, inplace)) - }, - - #' @description to "bitwise xor" or get the symmetric difference of two bitset - #' (keep elements in either bitset but not in their intersection). - #' @param other the other bitset. - xor = function(other){ - bitset_xor(self$.bitset, other$.bitset) - self - }, - - #' @description Take the set difference of this bitset with another - #' (keep elements of this bitset which are not in \code{other}). - #' @param other the other bitset. - set_difference = function(other){ - bitset_set_difference(self$.bitset, other$.bitset) - self - }, - - #' @description sample a bitset. - #' @param rate the success probability for keeping each element, can be - #' a single value for all elements or a vector of unique - #' probabilities for keeping each element. - sample = function(rate) { - stopifnot(is.finite(rate), !is.null(rate)) - if (length(rate) == 1) { - bitset_sample(self$.bitset, rate) - } else { - bitset_sample_vector(self$.bitset, rate) - } - self - }, - - #' @description choose k random items in the bitset - #' @param k the number of items in the bitset to keep. The selection of - #' these k items from N total items in the bitset is random, and - #' k should be chosen such that \eqn{0 \le k \le N}. - choose = function(k) { - stopifnot(is.finite(k)) - stopifnot(k <= bitset_size(self$.bitset)) - stopifnot(k >= 0) - if (k < self$max_size) { - bitset_choose(self$.bitset, as.integer(k)) - } - self - }, - - #' @description returns a copy the bitset. - copy = function() Bitset$new(from = bitset_copy(self$.bitset)), - - #' @description return an integer vector of the elements - #' stored in this bitset. - to_vector = function() bitset_to_vector(self$.bitset) - - ) +Bitset <- list( + #' @description create a bitset. + #' @param size the size of the bitset. + #' @param from pointer to an existing IterableBitset to use; if \code{NULL} + #' make empty bitset, otherwise copy existing bitset. + new = function(size, from = NULL) { + if (is.null(from)) { + bitset <- create_bitset(size) + } else { + stopifnot(inherits(from, "externalptr")) + bitset <- from + } + max_size <- bitset_max_size(bitset) + + self <- list( + .bitset = bitset, + max_size = max_size, + + #' @description insert into the bitset. + #' @param v an integer vector of elements to insert. + insert = function(v) { + bitset_insert(self$.bitset, v) + self + }, + + #' @description remove from the bitset. + #' @param v an integer vector of elements (not indices) to remove. + remove = function(v) { + bitset_remove(self$.bitset, v) + self + }, + + #' @description clear the bitset. + clear = function() { + bitset_clear(self$.bitset) + self + }, + + #' @description get the number of elements in the set. + size = function() bitset_size(self$.bitset), + + #' @description to "bitwise or" or union two bitsets. + #' @param other the other bitset. + or = function(other) { + bitset_or(self$.bitset, other$.bitset) + self + }, + + #' @description to "bitwise and" or intersect two bitsets. + #' @param other the other bitset. + and = function(other) { + bitset_and(self$.bitset, other$.bitset) + self + }, + + #' @description to "bitwise not" or complement a bitset. + #' @param inplace whether to overwrite the current bitset, default = TRUE + not = function(inplace = TRUE) { + Bitset$new(from = bitset_not(self$.bitset, inplace)) + }, + + #' @description to "bitwise xor" or get the symmetric difference of two bitset + #' (keep elements in either bitset but not in their intersection). + #' @param other the other bitset. + xor = function(other){ + bitset_xor(self$.bitset, other$.bitset) + self + }, + + #' @description Take the set difference of this bitset with another + #' (keep elements of this bitset which are not in \code{other}). + #' @param other the other bitset. + set_difference = function(other){ + bitset_set_difference(self$.bitset, other$.bitset) + self + }, + + #' @description sample a bitset. + #' @param rate the success probability for keeping each element, can be + #' a single value for all elements or a vector of unique + #' probabilities for keeping each element. + sample = function(rate) { + stopifnot(is.finite(rate), !is.null(rate)) + if (length(rate) == 1) { + bitset_sample(self$.bitset, rate) + } else { + bitset_sample_vector(self$.bitset, rate) + } + self + }, + + #' @description choose k random items in the bitset + #' @param k the number of items in the bitset to keep. The selection of + #' these k items from N total items in the bitset is random, and + #' k should be chosen such that \eqn{0 \le k \le N}. + choose = function(k) { + stopifnot(is.finite(k)) + stopifnot(k <= bitset_size(self$.bitset)) + stopifnot(k >= 0) + if (k < self$max_size) { + bitset_choose(self$.bitset, as.integer(k)) + } + self + }, + + #' @description returns a copy the bitset. + copy = function() Bitset$new(from = bitset_copy(self$.bitset)), + + #' @description return an integer vector of the elements + #' stored in this bitset. + to_vector = function() bitset_to_vector(self$.bitset) + ) + class(self) <- 'Bitset' + self + } ) #' @title Filter a bitset diff --git a/tests/performance/bench-bitset.R b/tests/performance/bench-bitset.R index 87899a11..1f2abff1 100644 --- a/tests/performance/bench-bitset.R +++ b/tests/performance/bench-bitset.R @@ -88,6 +88,20 @@ ggplot(data = core_ops_bset) + limit_args_grid <- data.frame(limit = 10^(3:8)) +create_bset <- bench::press( + { + bench::mark( + min_iterations = 100, + check = FALSE, + filter_gc = TRUE, + {Bitset$new(size = limit)} + ) + }, + .grid = limit_args_grid +) + +create_bset <- simplify_bench_output(create_bset) + # clear clear_bset <- bench::press( { @@ -119,6 +133,12 @@ not_bset <- bench::press( not_bset <- simplify_bench_output(out = not_bset) +ggplot(data = create_bset) + + geom_violin(aes(x = expression, y = time, color = expression, fill = expression)) + + facet_wrap(. ~ limit, scales = "free") + + coord_flip() + + ggtitle("Create benchmark") + ggplot(data = clear_bset) + geom_violin(aes(x = expression, y = time, color = expression, fill = expression)) + facet_wrap(. ~ limit, scales = "free") + @@ -257,4 +277,4 @@ filter_bset <- simplify_bench_output(filter_bset) ggplot(data = filter_bset) + geom_violin(aes(x = as.factor(expression), y = time, color = expression, fill = expression)) + facet_wrap(size ~ limit, scales = "free") + - ggtitle("Sampling operations benchmark: filter") \ No newline at end of file + ggtitle("Sampling operations benchmark: filter")