Skip to content

Commit

Permalink
Switch Bitset from an R6 class to a named list.
Browse files Browse the repository at this point in the history
R6 object have significant overhead, especially when instantiating the
objects. While this is acceptable for long-lived objects such as events
or variables, bitsets are created and destroyed very regularly during
simulations.

We can replace our use of an R6 class for `Bitset` with named lists that
are intended to look and feel just like the original API, but which
significant performance improvement. The reference semantics provided by
R6 don't matter in our case, since all mutability happens behind the
external pointer.

On malariasimulation, I get a 30-35% performance improvement when using
this new implementation of Bitset on population sizes under 10k, and
about 10% speedup at 100k.

The object-oriented named list based interface still adds a bit of
overhead compared to using the externalptr and Rcpp functions directly,
but doing so requires intrusive changes in the use site.
  • Loading branch information
plietar committed Feb 21, 2024
1 parent 29dc47a commit d897cad
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 120 deletions.
238 changes: 119 additions & 119 deletions R/bitset.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,126 +5,126 @@
#' difference, set difference).
#' WARNING: All operations are in-place so please use \code{$copy}
#' if you would like to perform an operation without destroying your current bitset.
#' @importFrom R6 R6Class
#'
#' This class is defined as a named list for performance reasons, but for most
#' intents and purposes it behaves just like an R6 class.
#'
#' @export
Bitset <- R6Class(
'Bitset',
public = list(
#' @field .bitset a pointer to the underlying IterableBitset.
.bitset = NULL,

#' @field max_size the maximum size of the bitset.
max_size = 0,

#' @description create a bitset.
#' @param size the size of the bitset.
#' @param from pointer to an existing IterableBitset to use; if \code{NULL}
#' make empty bitset, otherwise copy existing bitset.
initialize = function(size, from = NULL) {
if (is.null(from)) {
self$.bitset <- create_bitset(size)
} else {
stopifnot(inherits(from, "externalptr"))
self$.bitset <- from
}
self$max_size <- bitset_max_size(self$.bitset)
},

#' @description insert into the bitset.
#' @param v an integer vector of elements to insert.
insert = function(v) {
bitset_insert(self$.bitset, v)
self
},

#' @description remove from the bitset.
#' @param v an integer vector of elements (not indices) to remove.
remove = function(v) {
bitset_remove(self$.bitset, v)
self
},

#' @description clear the bitset.
clear = function() {
bitset_clear(self$.bitset)
self
},

#' @description get the number of elements in the set.
size = function() bitset_size(self$.bitset),

#' @description to "bitwise or" or union two bitsets.
#' @param other the other bitset.
or = function(other) {
bitset_or(self$.bitset, other$.bitset)
self
},

#' @description to "bitwise and" or intersect two bitsets.
#' @param other the other bitset.
and = function(other) {
bitset_and(self$.bitset, other$.bitset)
self
},

#' @description to "bitwise not" or complement a bitset.
#' @param inplace whether to overwrite the current bitset, default = TRUE
not = function(inplace = TRUE) {
Bitset$new(from = bitset_not(self$.bitset, inplace))
},

#' @description to "bitwise xor" or get the symmetric difference of two bitset
#' (keep elements in either bitset but not in their intersection).
#' @param other the other bitset.
xor = function(other){
bitset_xor(self$.bitset, other$.bitset)
self
},

#' @description Take the set difference of this bitset with another
#' (keep elements of this bitset which are not in \code{other}).
#' @param other the other bitset.
set_difference = function(other){
bitset_set_difference(self$.bitset, other$.bitset)
self
},

#' @description sample a bitset.
#' @param rate the success probability for keeping each element, can be
#' a single value for all elements or a vector of unique
#' probabilities for keeping each element.
sample = function(rate) {
stopifnot(is.finite(rate), !is.null(rate))
if (length(rate) == 1) {
bitset_sample(self$.bitset, rate)
} else {
bitset_sample_vector(self$.bitset, rate)
}
self
},

#' @description choose k random items in the bitset
#' @param k the number of items in the bitset to keep. The selection of
#' these k items from N total items in the bitset is random, and
#' k should be chosen such that \eqn{0 \le k \le N}.
choose = function(k) {
stopifnot(is.finite(k))
stopifnot(k <= bitset_size(self$.bitset))
stopifnot(k >= 0)
if (k < self$max_size) {
bitset_choose(self$.bitset, as.integer(k))
}
self
},

#' @description returns a copy the bitset.
copy = function() Bitset$new(from = bitset_copy(self$.bitset)),

#' @description return an integer vector of the elements
#' stored in this bitset.
to_vector = function() bitset_to_vector(self$.bitset)

)
Bitset <- list(
#' @description create a bitset.
#' @param size the size of the bitset.
#' @param from pointer to an existing IterableBitset to use; if \code{NULL}
#' make empty bitset, otherwise copy existing bitset.
new = function(size, from = NULL) {
if (is.null(from)) {
bitset <- create_bitset(size)
} else {
stopifnot(inherits(from, "externalptr"))
bitset <- from
}
max_size <- bitset_max_size(bitset)

self <- list(
.bitset = bitset,
max_size = max_size,

#' @description insert into the bitset.
#' @param v an integer vector of elements to insert.
insert = function(v) {
bitset_insert(self$.bitset, v)
self
},

#' @description remove from the bitset.
#' @param v an integer vector of elements (not indices) to remove.
remove = function(v) {
bitset_remove(self$.bitset, v)
self
},

#' @description clear the bitset.
clear = function() {
bitset_clear(self$.bitset)
self
},

#' @description get the number of elements in the set.
size = function() bitset_size(self$.bitset),

#' @description to "bitwise or" or union two bitsets.
#' @param other the other bitset.
or = function(other) {
bitset_or(self$.bitset, other$.bitset)
self
},

#' @description to "bitwise and" or intersect two bitsets.
#' @param other the other bitset.
and = function(other) {
bitset_and(self$.bitset, other$.bitset)
self
},

#' @description to "bitwise not" or complement a bitset.
#' @param inplace whether to overwrite the current bitset, default = TRUE
not = function(inplace = TRUE) {
Bitset$new(from = bitset_not(self$.bitset, inplace))
},

#' @description to "bitwise xor" or get the symmetric difference of two bitset
#' (keep elements in either bitset but not in their intersection).
#' @param other the other bitset.
xor = function(other){
bitset_xor(self$.bitset, other$.bitset)
self
},

#' @description Take the set difference of this bitset with another
#' (keep elements of this bitset which are not in \code{other}).
#' @param other the other bitset.
set_difference = function(other){
bitset_set_difference(self$.bitset, other$.bitset)
self
},

#' @description sample a bitset.
#' @param rate the success probability for keeping each element, can be
#' a single value for all elements or a vector of unique
#' probabilities for keeping each element.
sample = function(rate) {
stopifnot(is.finite(rate), !is.null(rate))
if (length(rate) == 1) {
bitset_sample(self$.bitset, rate)
} else {
bitset_sample_vector(self$.bitset, rate)
}
self
},

#' @description choose k random items in the bitset
#' @param k the number of items in the bitset to keep. The selection of
#' these k items from N total items in the bitset is random, and
#' k should be chosen such that \eqn{0 \le k \le N}.
choose = function(k) {
stopifnot(is.finite(k))
stopifnot(k <= bitset_size(self$.bitset))
stopifnot(k >= 0)
if (k < self$max_size) {
bitset_choose(self$.bitset, as.integer(k))
}
self
},

#' @description returns a copy the bitset.
copy = function() Bitset$new(from = bitset_copy(self$.bitset)),

#' @description return an integer vector of the elements
#' stored in this bitset.
to_vector = function() bitset_to_vector(self$.bitset)
)
class(self) <- 'Bitset'
self
}
)

#' @title Filter a bitset
Expand Down
22 changes: 21 additions & 1 deletion tests/performance/bench-bitset.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ ggplot(data = core_ops_bset) +

limit_args_grid <- data.frame(limit = 10^(3:8))

create_bset <- bench::press(
{
bench::mark(
min_iterations = 100,
check = FALSE,
filter_gc = TRUE,
{Bitset$new(size = limit)}
)
},
.grid = limit_args_grid
)

create_bset <- simplify_bench_output(create_bset)

# clear
clear_bset <- bench::press(
{
Expand Down Expand Up @@ -119,6 +133,12 @@ not_bset <- bench::press(

not_bset <- simplify_bench_output(out = not_bset)

ggplot(data = create_bset) +
geom_violin(aes(x = expression, y = time, color = expression, fill = expression)) +
facet_wrap(. ~ limit, scales = "free") +
coord_flip() +
ggtitle("Create benchmark")

ggplot(data = clear_bset) +
geom_violin(aes(x = expression, y = time, color = expression, fill = expression)) +
facet_wrap(. ~ limit, scales = "free") +
Expand Down Expand Up @@ -257,4 +277,4 @@ filter_bset <- simplify_bench_output(filter_bset)
ggplot(data = filter_bset) +
geom_violin(aes(x = as.factor(expression), y = time, color = expression, fill = expression)) +
facet_wrap(size ~ limit, scales = "free") +
ggtitle("Sampling operations benchmark: filter")
ggtitle("Sampling operations benchmark: filter")

0 comments on commit d897cad

Please sign in to comment.