From ae48afc3c329f15521afead60bc1e65e32fd8114 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 09:18:53 +0100 Subject: [PATCH] FEAT/PERF: Implement mapping_method support for ScaleContinuous ScaleContinous maps values to palette colours as follows: - unique values are found - unique values are mapped to colors - colors are matched to the original vector If most values are unique, we can be faster by simply maping all values to colors, without finding and matching unique values first. In some scenarios the geom can guess or know if that is going to be the case. The goal of this commit is to let the geom tell the ScaleContinuous scale how the mapping from values to colours should be done. By default the existing "unique" approach is used. The geom may now specify `scale_params = list(fill=list(mapping_method = "raw"))` to tell the scale corresponding to the fill aesthetic to use a "raw" approach of mapping values to colours without finding unique values first. Besides the default "unique" and the new "raw" mapping methods, we also allow the geom to ask to use the "binned" approach, where the geom specifies a number of intervals to use and the mapping process is as follows: - values are binned in N intervals - intervals are mapped to colors This approach is "lossy" (we have a maximum of N different colours), but this can be much faster and have almost no difference with respect to the other mapping methods. --- R/scale-.r | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/R/scale-.r b/R/scale-.r index 19cc0ca809..f6708a0bf7 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -615,12 +615,49 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, transform = default_transform, - map = function(self, x, limits = self$get_limits()) { + map = function(self, x, limits = self$get_limits(), scale_params = NULL) { x <- self$rescale(self$oob(x, range = limits), limits) - - uniq <- unique0(x) - pal <- self$palette(uniq) - scaled <- pal[match(x, uniq)] + if (is.null(scale_params)) { + scale_params <- list() + } + # A mapping method maps the x values in [0-1] range to a continuous aesthetic. + # "unique": The default. Find unique values in x, map the unique values to colours, + # match the mapped colours to original x values. + # "raw": just map all x values to colours. More efficient if we know there are not + # many repeated values. Usually less efficient if there are some repeated + # values. + # "binned": Bin `x` into `mapping_method_bins` levels. Map those levels to colours. + # Assign the colour to each value corresponding to their bin. + # This approach is faster with large vectors, but is lossy. + mapping_method <- scale_params[["mapping_method"]] + if (is.null(mapping_method)) { + mapping_method <- "unique" + } + if (is.character(mapping_method) && !mapping_method %in% c("unique", "raw", "binned")) { + cli::cli_warn(c( + "ScaleContinous does not support the mapping method {mapping_method}", + "i" = "Using 'unique' instead." + )) + mapping_method <- "unique" + } + if (mapping_method == "unique") { + uniq <- unique0(x) + pal <- self$palette(uniq) + scaled <- pal[match(x, uniq)] + } else if (mapping_method == "raw") { + scaled <- self$palette(x) + } else if (mapping_method == "binned") { + mapping_method_bins <- scale_params[["mapping_method_bins"]] + if (is.null(mapping_method_bins)) { + mapping_method_bins <- 1024L + } + mapping_method_bins <- as.integer(mapping_method_bins[1L]) + breaks <- seq(from = 0, to = 1, length.out = mapping_method_bins + 1L) + colormap <- c(self$na.value, self$palette(breaks), self$na.value) + # values below 0 belong to the first bucket, but zero belongs to the second bucket: + breaks[1] <- -.Machine$double.eps + scaled <- colormap[findInterval(x, breaks, rightmost.closed = TRUE) + 1L] + } # A specific palette can have as attribute "may_return_NA = FALSE" # If it has such attribute, we will skip the ifelse(!is.na(scaled), ...)