From f995ea14b3251f1d2cd1c3922826e88ebdac8ecd Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 08:38:54 +0100 Subject: [PATCH 1/4] FEAT: Let the scale mapping accept parameters --- R/scale-.r | 23 ++++++++++++++++++----- R/scales-.r | 17 +++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/R/scale-.r b/R/scale-.r index b0065ffd52..db4db8eec5 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -458,10 +458,13 @@ Scale <- ggproto("Scale", NULL, cli::cli_abort("Not implemented") }, - map_df = function(self, df, i = NULL) { + map_df = function(self, df, i = NULL, scale_params = NULL) { if (empty(df)) { return() } + if (is.null(scale_params)) { + scale_params <- list() + } aesthetics <- intersect(self$aesthetics, names(df)) names(aesthetics) <- aesthetics @@ -469,14 +472,24 @@ Scale <- ggproto("Scale", NULL, return() } - if (is.null(i)) { - lapply(aesthetics, function(j) self$map(df[[j]])) + if ("scale_params" %in% names(ggproto_formals(self$map))) { + if (is.null(i)) { + lapply(aesthetics, function(j) self$map(df[[j]], scale_params = scale_params[[j]])) + } else { + lapply(aesthetics, function(j) self$map(df[[j]][i], scale_params = scale_params[[j]])) + } } else { - lapply(aesthetics, function(j) self$map(df[[j]][i])) + # Eventually warn if self$map() does not accept scale_params + if (is.null(i)) { + lapply(aesthetics, function(j) self$map(df[[j]])) + } else { + lapply(aesthetics, function(j) self$map(df[[j]][i])) + } + } }, - map = function(self, x, limits = self$get_limits()) { + map = function(self, x, limits = self$get_limits(), scale_params = NULL) { cli::cli_abort("Not implemented") }, diff --git a/R/scales-.r b/R/scales-.r index cb1e784670..3d20adb34a 100644 --- a/R/scales-.r +++ b/R/scales-.r @@ -70,10 +70,23 @@ scales_train_df <- function(scales, df, drop = FALSE) { } # Map values from a data.frame. Returns data.frame -scales_map_df <- function(scales, df) { +scales_map_df <- function(scales, df, scale_params = NULL) { if (empty(df) || length(scales$scales) == 0) return(df) - mapped <- unlist(lapply(scales$scales, function(scale) scale$map_df(df = df)), recursive = FALSE) + mapped <- unlist( + lapply( + scales$scales, + function(scale) { + if ("scale_params" %in% names(ggproto_formals(self$map_df))) { + scale$map_df(df = df, scale_params = scale_params) + } else { + # Eventually warn if scale$map_df() does not accept scale_params + scale$map_df(df = df) + } + } + ), + recursive = FALSE + ) data_frame0(!!!mapped, df[setdiff(names(df), names(mapped))]) } From 7d1dc95db0fd7271900c14d31ffae03b36ecd48f Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 08:47:21 +0100 Subject: [PATCH 2/4] FEAT: Let geoms define scale parameters and pass those to the scales --- R/geom-.r | 16 ++++++++++++++-- R/layer.r | 8 ++++++++ R/plot-build.r | 3 ++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/R/geom-.r b/R/geom-.r index defac7a55a..1a4b9c0cf4 100644 --- a/R/geom-.r +++ b/R/geom-.r @@ -202,8 +202,20 @@ Geom <- ggproto("Geom", }, # Should the geom rename size to linewidth? - rename_size = FALSE - + rename_size = FALSE, + # Parameters to pass to scale$map() for each aesthetic: + # This can be either a named list (names are aesthetics, values are lists of parameters) + # or a function that takes a list of geom parameters as input and returns the list. + # The scale_params can be used to tell the scale map function a mapping method. + # e.g. scale_params = list(fill = list(mapping_method = "raw")) + # See the map method for ScaleContinuous in R/scale-.r for further details. + # + # The scale_params will be used to tell the scale map function the expected colour + # format, in case the geom prefers native colours (because it uses nativeRaster objects) + # instead of the default character vector: + # e.g. scale_params = list(fill = list("color_fmt" = "character")) # "#00FF00" + # e.g. scale_params = list(fill = list("color_fmt" = "native")) # from nativeRaster + scale_params = list() ) diff --git a/R/layer.r b/R/layer.r index 89ccb58028..a23a6688fe 100644 --- a/R/layer.r +++ b/R/layer.r @@ -391,6 +391,14 @@ Layer <- ggproto("Layer", NULL, self$geom$setup_data(data, self$computed_geom_params) }, + get_scale_params = function(self) { + if (is.function(self$geom$scale_params)) { + self$geom$scale_params(params = self$computed_geom_params) + } else { + self$geom$scale_params + } + }, + compute_position = function(self, data, layout) { if (empty(data)) return(data_frame0()) diff --git a/R/plot-build.r b/R/plot-build.r index b6102eed6e..cdf9930aec 100644 --- a/R/plot-build.r +++ b/R/plot-build.r @@ -88,7 +88,8 @@ ggplot_build.ggplot <- function(plot) { npscales <- scales$non_position_scales() if (npscales$n() > 0) { lapply(data, scales_train_df, scales = npscales) - data <- lapply(data, scales_map_df, scales = npscales) + scale_params <- by_layer(function(l, d) l$get_scale_params(), layers, data, "getting scale_params") + data <- mapply(scales_map_df, df = data, scale_params = scale_params, MoreArgs = list(scales = npscales), SIMPLIFY = FALSE) } # Fill in defaults etc. From bba153e7866b169c11b720f45009c7fd1c105d09 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 09:18:53 +0100 Subject: [PATCH 3/4] FEAT/PERF: Implement mapping_method support for ScaleContinuous ScaleContinous maps values to palette colours as follows: - unique values are found - unique values are mapped to colors - colors are matched to the original vector If most values are unique, we can be faster by simply maping all values to colors, without finding and matching unique values first. In some scenarios the geom can guess or know if that is going to be the case. The goal of this commit is to let the geom tell the ScaleContinuous scale how the mapping from values to colours should be done. By default the existing "unique" approach is used. The geom may now specify `scale_params = list(fill=list(mapping_method = "raw"))` to tell the scale corresponding to the fill aesthetic to use a "raw" approach of mapping values to colours without finding unique values first. Besides the default "unique" and the new "raw" mapping methods, we also allow the geom to ask to use the "binned" approach, where the geom specifies a number of intervals to use and the mapping process is as follows: - values are binned in N intervals - intervals are mapped to colors This approach is "lossy" (we have a maximum of N different colours), but this can be much faster and have almost no difference with respect to the other mapping methods. --- R/scale-.r | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/R/scale-.r b/R/scale-.r index db4db8eec5..c38e1108f5 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -615,12 +615,49 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, transform = default_transform, - map = function(self, x, limits = self$get_limits()) { + map = function(self, x, limits = self$get_limits(), scale_params = NULL) { x <- self$rescale(self$oob(x, range = limits), limits) - - uniq <- unique0(x) - pal <- self$palette(uniq) - scaled <- pal[match(x, uniq)] + if (is.null(scale_params)) { + scale_params <- list() + } + # A mapping method maps the x values in [0-1] range to a continuous aesthetic. + # "unique": The default. Find unique values in x, map the unique values to colours, + # match the mapped colours to original x values. + # "raw": just map all x values to colours. More efficient if we know there are not + # many repeated values. Usually less efficient if there are some repeated + # values. + # "binned": Bin `x` into `mapping_method_bins` levels. Map those levels to colours. + # Assign the colour to each value corresponding to their bin. + # This approach is faster with large vectors, but is lossy. + mapping_method <- scale_params[["mapping_method"]] + if (is.null(mapping_method)) { + mapping_method <- "unique" + } + if (is.character(mapping_method) && !mapping_method %in% c("unique", "raw", "binned")) { + cli::cli_warn(c( + "ScaleContinous does not support the mapping method {mapping_method}", + "i" = "Using 'unique' instead." + )) + mapping_method <- "unique" + } + if (mapping_method == "unique") { + uniq <- unique0(x) + pal <- self$palette(uniq) + scaled <- pal[match(x, uniq)] + } else if (mapping_method == "raw") { + scaled <- self$palette(x) + } else if (mapping_method == "binned") { + mapping_method_bins <- scale_params[["mapping_method_bins"]] + if (is.null(mapping_method_bins)) { + mapping_method_bins <- 1024L + } + mapping_method_bins <- as.integer(mapping_method_bins[1L]) + breaks <- seq(from = 0, to = 1, length.out = mapping_method_bins + 1L) + colormap <- c(self$na.value, self$palette(breaks), self$na.value) + # values below 0 belong to the first bucket, but zero belongs to the second bucket: + breaks[1] <- -.Machine$double.eps + scaled <- colormap[findInterval(x, breaks, rightmost.closed = TRUE) + 1L] + } ifelse(!is.na(scaled), scaled, self$na.value) }, From 3d394b00f0a9f45494ffa7336d141043f54dcfe8 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 12:01:48 +0100 Subject: [PATCH 4/4] TEST: scale_params "binned" mapping method --- tests/testthat/test-scale-colour-continuous.R | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/testthat/test-scale-colour-continuous.R b/tests/testthat/test-scale-colour-continuous.R index 10e3ae4dd5..a3b4f78c60 100644 --- a/tests/testthat/test-scale-colour-continuous.R +++ b/tests/testthat/test-scale-colour-continuous.R @@ -18,3 +18,12 @@ test_that("type argument is checked for proper input", { scale_colour_continuous(type = "abc") ) }) + +test_that("scale_params mapping_method supports binned", { + sc <- scale_fill_continuous() + x <- seq(0, 1, length.out = 10) + only_two <- sc$map(x, limits = c(0, 1), scale_params = list(mapping_method = "binned", mapping_method_bins = 2)) + expect_equal(length(unique(only_two)), 2L) +}) + +