From bba153e7866b169c11b720f45009c7fd1c105d09 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 09:18:53 +0100 Subject: [PATCH] FEAT/PERF: Implement mapping_method support for ScaleContinuous ScaleContinous maps values to palette colours as follows: - unique values are found - unique values are mapped to colors - colors are matched to the original vector If most values are unique, we can be faster by simply maping all values to colors, without finding and matching unique values first. In some scenarios the geom can guess or know if that is going to be the case. The goal of this commit is to let the geom tell the ScaleContinuous scale how the mapping from values to colours should be done. By default the existing "unique" approach is used. The geom may now specify `scale_params = list(fill=list(mapping_method = "raw"))` to tell the scale corresponding to the fill aesthetic to use a "raw" approach of mapping values to colours without finding unique values first. Besides the default "unique" and the new "raw" mapping methods, we also allow the geom to ask to use the "binned" approach, where the geom specifies a number of intervals to use and the mapping process is as follows: - values are binned in N intervals - intervals are mapped to colors This approach is "lossy" (we have a maximum of N different colours), but this can be much faster and have almost no difference with respect to the other mapping methods. --- R/scale-.r | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/R/scale-.r b/R/scale-.r index db4db8eec5..c38e1108f5 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -615,12 +615,49 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, transform = default_transform, - map = function(self, x, limits = self$get_limits()) { + map = function(self, x, limits = self$get_limits(), scale_params = NULL) { x <- self$rescale(self$oob(x, range = limits), limits) - - uniq <- unique0(x) - pal <- self$palette(uniq) - scaled <- pal[match(x, uniq)] + if (is.null(scale_params)) { + scale_params <- list() + } + # A mapping method maps the x values in [0-1] range to a continuous aesthetic. + # "unique": The default. Find unique values in x, map the unique values to colours, + # match the mapped colours to original x values. + # "raw": just map all x values to colours. More efficient if we know there are not + # many repeated values. Usually less efficient if there are some repeated + # values. + # "binned": Bin `x` into `mapping_method_bins` levels. Map those levels to colours. + # Assign the colour to each value corresponding to their bin. + # This approach is faster with large vectors, but is lossy. + mapping_method <- scale_params[["mapping_method"]] + if (is.null(mapping_method)) { + mapping_method <- "unique" + } + if (is.character(mapping_method) && !mapping_method %in% c("unique", "raw", "binned")) { + cli::cli_warn(c( + "ScaleContinous does not support the mapping method {mapping_method}", + "i" = "Using 'unique' instead." + )) + mapping_method <- "unique" + } + if (mapping_method == "unique") { + uniq <- unique0(x) + pal <- self$palette(uniq) + scaled <- pal[match(x, uniq)] + } else if (mapping_method == "raw") { + scaled <- self$palette(x) + } else if (mapping_method == "binned") { + mapping_method_bins <- scale_params[["mapping_method_bins"]] + if (is.null(mapping_method_bins)) { + mapping_method_bins <- 1024L + } + mapping_method_bins <- as.integer(mapping_method_bins[1L]) + breaks <- seq(from = 0, to = 1, length.out = mapping_method_bins + 1L) + colormap <- c(self$na.value, self$palette(breaks), self$na.value) + # values below 0 belong to the first bucket, but zero belongs to the second bucket: + breaks[1] <- -.Machine$double.eps + scaled <- colormap[findInterval(x, breaks, rightmost.closed = TRUE) + 1L] + } ifelse(!is.na(scaled), scaled, self$na.value) },