From 549fae9738494a7a3536998dd499ed6d54017c95 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 09:03:42 +0100 Subject: [PATCH 01/11] Add helper to get attributes from ggproto method --- R/ggproto.r | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/ggproto.r b/R/ggproto.r index d6b0533f01..f93a6bc6f5 100644 --- a/R/ggproto.r +++ b/R/ggproto.r @@ -351,3 +351,7 @@ format.ggproto_method <- function(x, ...) { # proto2 TODO: better way of getting formals for self$draw ggproto_formals <- function(x) formals(environment(x)$f) + +ggproto_attr <- function(x, which, default = NULL) { + attr(environment(x)$f, which = which, exact = TRUE) %||% default +} From 99b7c60d753fd928b39b58fc788bbd9d7c15e4d5 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 09:29:06 +0100 Subject: [PATCH 02/11] FEAT/PERF: ScaleContinuous accepts palette capability `may_return_na` We can extend palettes with attributes to improve the mapping efficiency. With this commit a palette may define an attribute `may_return_na` to `FALSE`. If it does, `ScaleContinuous` will assume the palette may not return missing values, and it will skip checking for those and replacing them. --- R/scale-.r | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/scale-.r b/R/scale-.r index b0065ffd52..b4d69aa5ed 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -609,7 +609,14 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, pal <- self$palette(uniq) scaled <- pal[match(x, uniq)] - ifelse(!is.na(scaled), scaled, self$na.value) + # A specific palette can have as attribute "may_return_NA = FALSE" + # If it has such attribute, we will skip the ifelse(!is.na(scaled), ...) + pal_may_return_na <- ggproto_attr(self$palette, "may_return_NA", default = TRUE) + if (pal_may_return_na) { + scaled <- ifelse(!is.na(scaled), scaled, self$na.value) + } + + scaled }, rescale = function(self, x, limits = self$get_limits(), range = limits) { From d9e577dcb6386625ccf2cf81dcce16d4fa1d1709 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 12:00:38 +0100 Subject: [PATCH 03/11] TEST: Palette capability "may_return_na" --- tests/testthat/test-scale-colour-continuous.R | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/testthat/test-scale-colour-continuous.R b/tests/testthat/test-scale-colour-continuous.R index 10e3ae4dd5..444b27f9db 100644 --- a/tests/testthat/test-scale-colour-continuous.R +++ b/tests/testthat/test-scale-colour-continuous.R @@ -18,3 +18,32 @@ test_that("type argument is checked for proper input", { scale_colour_continuous(type = "abc") ) }) + +test_that("palette with may_return_NA=FALSE works as expected", { + sc <- scale_fill_continuous() + # A palette that may return NAs, will have NAs replaced by the scale's na.value + # by the scale: + sc$palette <- structure( + function(x) { + rep(NA_character_, length(x)) + }, + may_return_NA = TRUE + ) + sc$na.value <- "red" + nat <- sc$map(0.5, limits = c(0, 1)) + expect_equal(nat, "red") + + # This palette is lying, because it returns NA even though it says it can't. + # The scale will not replace the NA values, leading to further errors. + # You should not do this in production, but it helps to test: + sc <- scale_fill_continuous() + sc$palette <- structure( + function(x) { + rep(NA_character_, length(x)) + }, + may_return_NA = FALSE + ) + sc$na.value <- "red" + nat <- sc$map(0.5, limits = c(0, 1)) + expect_equal(nat, NA_character_) +}) From e86c02a11dad02dd404d531a2188f7448e0e0bc4 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 08:38:54 +0100 Subject: [PATCH 04/11] FEAT: Let the scale mapping accept parameters --- R/scale-.r | 23 ++++++++++++++++++----- R/scales-.r | 17 +++++++++++++++-- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/R/scale-.r b/R/scale-.r index b4d69aa5ed..19cc0ca809 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -458,10 +458,13 @@ Scale <- ggproto("Scale", NULL, cli::cli_abort("Not implemented") }, - map_df = function(self, df, i = NULL) { + map_df = function(self, df, i = NULL, scale_params = NULL) { if (empty(df)) { return() } + if (is.null(scale_params)) { + scale_params <- list() + } aesthetics <- intersect(self$aesthetics, names(df)) names(aesthetics) <- aesthetics @@ -469,14 +472,24 @@ Scale <- ggproto("Scale", NULL, return() } - if (is.null(i)) { - lapply(aesthetics, function(j) self$map(df[[j]])) + if ("scale_params" %in% names(ggproto_formals(self$map))) { + if (is.null(i)) { + lapply(aesthetics, function(j) self$map(df[[j]], scale_params = scale_params[[j]])) + } else { + lapply(aesthetics, function(j) self$map(df[[j]][i], scale_params = scale_params[[j]])) + } } else { - lapply(aesthetics, function(j) self$map(df[[j]][i])) + # Eventually warn if self$map() does not accept scale_params + if (is.null(i)) { + lapply(aesthetics, function(j) self$map(df[[j]])) + } else { + lapply(aesthetics, function(j) self$map(df[[j]][i])) + } + } }, - map = function(self, x, limits = self$get_limits()) { + map = function(self, x, limits = self$get_limits(), scale_params = NULL) { cli::cli_abort("Not implemented") }, diff --git a/R/scales-.r b/R/scales-.r index cb1e784670..3d20adb34a 100644 --- a/R/scales-.r +++ b/R/scales-.r @@ -70,10 +70,23 @@ scales_train_df <- function(scales, df, drop = FALSE) { } # Map values from a data.frame. Returns data.frame -scales_map_df <- function(scales, df) { +scales_map_df <- function(scales, df, scale_params = NULL) { if (empty(df) || length(scales$scales) == 0) return(df) - mapped <- unlist(lapply(scales$scales, function(scale) scale$map_df(df = df)), recursive = FALSE) + mapped <- unlist( + lapply( + scales$scales, + function(scale) { + if ("scale_params" %in% names(ggproto_formals(self$map_df))) { + scale$map_df(df = df, scale_params = scale_params) + } else { + # Eventually warn if scale$map_df() does not accept scale_params + scale$map_df(df = df) + } + } + ), + recursive = FALSE + ) data_frame0(!!!mapped, df[setdiff(names(df), names(mapped))]) } From 04d21e3d30429ba7fce193325d3c7a192279f3a4 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 08:47:21 +0100 Subject: [PATCH 05/11] FEAT: Let geoms define scale parameters and pass those to the scales --- R/geom-.r | 16 ++++++++++++++-- R/layer.r | 8 ++++++++ R/plot-build.r | 3 ++- 3 files changed, 24 insertions(+), 3 deletions(-) diff --git a/R/geom-.r b/R/geom-.r index defac7a55a..1a4b9c0cf4 100644 --- a/R/geom-.r +++ b/R/geom-.r @@ -202,8 +202,20 @@ Geom <- ggproto("Geom", }, # Should the geom rename size to linewidth? - rename_size = FALSE - + rename_size = FALSE, + # Parameters to pass to scale$map() for each aesthetic: + # This can be either a named list (names are aesthetics, values are lists of parameters) + # or a function that takes a list of geom parameters as input and returns the list. + # The scale_params can be used to tell the scale map function a mapping method. + # e.g. scale_params = list(fill = list(mapping_method = "raw")) + # See the map method for ScaleContinuous in R/scale-.r for further details. + # + # The scale_params will be used to tell the scale map function the expected colour + # format, in case the geom prefers native colours (because it uses nativeRaster objects) + # instead of the default character vector: + # e.g. scale_params = list(fill = list("color_fmt" = "character")) # "#00FF00" + # e.g. scale_params = list(fill = list("color_fmt" = "native")) # from nativeRaster + scale_params = list() ) diff --git a/R/layer.r b/R/layer.r index 89ccb58028..a23a6688fe 100644 --- a/R/layer.r +++ b/R/layer.r @@ -391,6 +391,14 @@ Layer <- ggproto("Layer", NULL, self$geom$setup_data(data, self$computed_geom_params) }, + get_scale_params = function(self) { + if (is.function(self$geom$scale_params)) { + self$geom$scale_params(params = self$computed_geom_params) + } else { + self$geom$scale_params + } + }, + compute_position = function(self, data, layout) { if (empty(data)) return(data_frame0()) diff --git a/R/plot-build.r b/R/plot-build.r index b6102eed6e..cdf9930aec 100644 --- a/R/plot-build.r +++ b/R/plot-build.r @@ -88,7 +88,8 @@ ggplot_build.ggplot <- function(plot) { npscales <- scales$non_position_scales() if (npscales$n() > 0) { lapply(data, scales_train_df, scales = npscales) - data <- lapply(data, scales_map_df, scales = npscales) + scale_params <- by_layer(function(l, d) l$get_scale_params(), layers, data, "getting scale_params") + data <- mapply(scales_map_df, df = data, scale_params = scale_params, MoreArgs = list(scales = npscales), SIMPLIFY = FALSE) } # Fill in defaults etc. From 8e365c018c44e68bac337e4faa0f09d7f757b82e Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 09:18:53 +0100 Subject: [PATCH 06/11] FEAT/PERF: Implement mapping_method support for ScaleContinuous ScaleContinous maps values to palette colours as follows: - unique values are found - unique values are mapped to colors - colors are matched to the original vector If most values are unique, we can be faster by simply maping all values to colors, without finding and matching unique values first. In some scenarios the geom can guess or know if that is going to be the case. The goal of this commit is to let the geom tell the ScaleContinuous scale how the mapping from values to colours should be done. By default the existing "unique" approach is used. The geom may now specify `scale_params = list(fill=list(mapping_method = "raw"))` to tell the scale corresponding to the fill aesthetic to use a "raw" approach of mapping values to colours without finding unique values first. Besides the default "unique" and the new "raw" mapping methods, we also allow the geom to ask to use the "binned" approach, where the geom specifies a number of intervals to use and the mapping process is as follows: - values are binned in N intervals - intervals are mapped to colors This approach is "lossy" (we have a maximum of N different colours), but this can be much faster and have almost no difference with respect to the other mapping methods. --- R/scale-.r | 47 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/R/scale-.r b/R/scale-.r index 19cc0ca809..f6708a0bf7 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -615,12 +615,49 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, transform = default_transform, - map = function(self, x, limits = self$get_limits()) { + map = function(self, x, limits = self$get_limits(), scale_params = NULL) { x <- self$rescale(self$oob(x, range = limits), limits) - - uniq <- unique0(x) - pal <- self$palette(uniq) - scaled <- pal[match(x, uniq)] + if (is.null(scale_params)) { + scale_params <- list() + } + # A mapping method maps the x values in [0-1] range to a continuous aesthetic. + # "unique": The default. Find unique values in x, map the unique values to colours, + # match the mapped colours to original x values. + # "raw": just map all x values to colours. More efficient if we know there are not + # many repeated values. Usually less efficient if there are some repeated + # values. + # "binned": Bin `x` into `mapping_method_bins` levels. Map those levels to colours. + # Assign the colour to each value corresponding to their bin. + # This approach is faster with large vectors, but is lossy. + mapping_method <- scale_params[["mapping_method"]] + if (is.null(mapping_method)) { + mapping_method <- "unique" + } + if (is.character(mapping_method) && !mapping_method %in% c("unique", "raw", "binned")) { + cli::cli_warn(c( + "ScaleContinous does not support the mapping method {mapping_method}", + "i" = "Using 'unique' instead." + )) + mapping_method <- "unique" + } + if (mapping_method == "unique") { + uniq <- unique0(x) + pal <- self$palette(uniq) + scaled <- pal[match(x, uniq)] + } else if (mapping_method == "raw") { + scaled <- self$palette(x) + } else if (mapping_method == "binned") { + mapping_method_bins <- scale_params[["mapping_method_bins"]] + if (is.null(mapping_method_bins)) { + mapping_method_bins <- 1024L + } + mapping_method_bins <- as.integer(mapping_method_bins[1L]) + breaks <- seq(from = 0, to = 1, length.out = mapping_method_bins + 1L) + colormap <- c(self$na.value, self$palette(breaks), self$na.value) + # values below 0 belong to the first bucket, but zero belongs to the second bucket: + breaks[1] <- -.Machine$double.eps + scaled <- colormap[findInterval(x, breaks, rightmost.closed = TRUE) + 1L] + } # A specific palette can have as attribute "may_return_NA = FALSE" # If it has such attribute, we will skip the ifelse(!is.na(scaled), ...) From afa0d441221294d5c2e093f41f02079156f54e83 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 12:01:48 +0100 Subject: [PATCH 07/11] TEST: scale_params "binned" mapping method --- tests/testthat/test-scale-colour-continuous.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/testthat/test-scale-colour-continuous.R b/tests/testthat/test-scale-colour-continuous.R index 444b27f9db..9c6f737351 100644 --- a/tests/testthat/test-scale-colour-continuous.R +++ b/tests/testthat/test-scale-colour-continuous.R @@ -47,3 +47,11 @@ test_that("palette with may_return_NA=FALSE works as expected", { nat <- sc$map(0.5, limits = c(0, 1)) expect_equal(nat, NA_character_) }) + +test_that("scale_params mapping_method supports binned", { + sc <- scale_fill_continuous() + x <- seq(0, 1, length.out = 10) + only_two <- sc$map(x, limits = c(0, 1), scale_params = list(mapping_method = "binned", mapping_method_bins = 2)) + expect_equal(length(unique(only_two)), 2L) +}) + From 6513a17b25ee7241e8bda1c356fccf2e76c85b38 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 12:00:38 +0100 Subject: [PATCH 08/11] TEST: Palette capability "may_return_na" --- tests/testthat/test-scale-colour-continuous.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-scale-colour-continuous.R b/tests/testthat/test-scale-colour-continuous.R index 9c6f737351..cdeddfafef 100644 --- a/tests/testthat/test-scale-colour-continuous.R +++ b/tests/testthat/test-scale-colour-continuous.R @@ -54,4 +54,3 @@ test_that("scale_params mapping_method supports binned", { only_two <- sc$map(x, limits = c(0, 1), scale_params = list(mapping_method = "binned", mapping_method_bins = 2)) expect_equal(length(unique(only_two)), 2L) }) - From 44a4d08764736150901967aeb84e290c87a05044 Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 09:32:05 +0100 Subject: [PATCH 09/11] FEAT: ScaleContinuous can return native colour format Some geoms benefit from using native colour format instead of the character based colour format. This commit lets the geoms specify that they prefer native format for colours, and gives the responsibility of converting into that format to ScaleContinuous. Since today it is not mandatory for all scales to honor scale_params, the geom that requests this will have to verify that the color is given in native format anyway, and do the conversion if it has not been done here. However by optionally shifting the responsibility of the conversion to the scale we have potential to further optimizations --- DESCRIPTION | 1 + R/scale-.r | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cbf8db6cd5..d3ca49180d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -31,6 +31,7 @@ Depends: R (>= 3.3) Imports: cli, + farver, glue, grDevices, grid, diff --git a/R/scale-.r b/R/scale-.r index f6708a0bf7..4e5ef336b0 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -640,12 +640,22 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, )) mapping_method <- "unique" } + # Geom prefers native color format: + geom_prefers_native <- identical(scale_params[["color_fmt"]], "native") + if (geom_prefers_native) { + palette <- function(x) farver::encode_native(self$palette(x)) + na.value <- farver::encode_native(self$na.value) + } else { + palette <- self$palette + na.value <- self$na.value + } + if (mapping_method == "unique") { uniq <- unique0(x) - pal <- self$palette(uniq) + pal <- palette(uniq) scaled <- pal[match(x, uniq)] } else if (mapping_method == "raw") { - scaled <- self$palette(x) + scaled <- palette(x) } else if (mapping_method == "binned") { mapping_method_bins <- scale_params[["mapping_method_bins"]] if (is.null(mapping_method_bins)) { @@ -653,7 +663,7 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, } mapping_method_bins <- as.integer(mapping_method_bins[1L]) breaks <- seq(from = 0, to = 1, length.out = mapping_method_bins + 1L) - colormap <- c(self$na.value, self$palette(breaks), self$na.value) + colormap <- c(na.value, palette(breaks), na.value) # values below 0 belong to the first bucket, but zero belongs to the second bucket: breaks[1] <- -.Machine$double.eps scaled <- colormap[findInterval(x, breaks, rightmost.closed = TRUE) + 1L] @@ -663,7 +673,7 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, # If it has such attribute, we will skip the ifelse(!is.na(scaled), ...) pal_may_return_na <- ggproto_attr(self$palette, "may_return_NA", default = TRUE) if (pal_may_return_na) { - scaled <- ifelse(!is.na(scaled), scaled, self$na.value) + scaled <- ifelse(!is.na(scaled), scaled, na.value) } scaled From 8a18be64622ce748070ccc6531a96bd8bed8e6be Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 10:02:25 +0100 Subject: [PATCH 10/11] PERF: Palette capability: accepts_native_output=TRUE If a palette has accepts_native_output=TRUE set as an attribute, ScaleContinuous assumes the palette has an optional argument named `color_fmt` which can be set to either "character" or "native". If the geom prefers a native output format and the palette supports it, we let the palette take care of it. The conversion goes from value -> native colour, which is much faster than going through an intermediate character representation of the colours. --- R/scale-.r | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/R/scale-.r b/R/scale-.r index 4e5ef336b0..b7b58b903f 100644 --- a/R/scale-.r +++ b/R/scale-.r @@ -642,7 +642,17 @@ ScaleContinuous <- ggproto("ScaleContinuous", Scale, } # Geom prefers native color format: geom_prefers_native <- identical(scale_params[["color_fmt"]], "native") - if (geom_prefers_native) { + # Palette capability: Accepts native output + # A specific palette can have as attribute "accepts_native_output = TRUE". + # Then, self$palette must have an additional argument (besides x) named + # `color_fmt = "character"`. If we pass `color_fmt = "native"`, it will + # return colors in native format. + pal_accepts_native <- ggproto_attr(self$palette, "accepts_native_output", default = FALSE) + + if (geom_prefers_native && pal_accepts_native) { + palette <- function(x) self$palette(x, color_fmt = "native") + na.value <- farver::encode_native(self$na.value) + } else if (geom_prefers_native) { palette <- function(x) farver::encode_native(self$palette(x)) na.value <- farver::encode_native(self$na.value) } else { From fdfd6d908edb5dff2ca9064e23f7bfca220f4b6f Mon Sep 17 00:00:00 2001 From: Sergio Oller Date: Sun, 6 Nov 2022 12:02:36 +0100 Subject: [PATCH 11/11] TEST: Palette capability: "accepts_native_output" --- tests/testthat/test-scale-colour-continuous.R | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/testthat/test-scale-colour-continuous.R b/tests/testthat/test-scale-colour-continuous.R index cdeddfafef..0df43ba67e 100644 --- a/tests/testthat/test-scale-colour-continuous.R +++ b/tests/testthat/test-scale-colour-continuous.R @@ -19,6 +19,43 @@ test_that("type argument is checked for proper input", { ) }) +test_that("palette with accepts_native_output returns native colours", { + sc <- scale_fill_continuous() + sc$palette <- structure( + function(x, color_fmt = "character") { + if (color_fmt == "character") { + rep("red", length(x)) + } else { + rep(-16776961L, length(x)) + } + }, + accepts_native_output = TRUE + ) + x <- 0.5 + nat <- sc$map(x, limits = c(0, 1), scale_params = list(color_fmt = "native")) + expect_equal(nat, -16776961L) + chr <- sc$map(x, limits = c(0, 1), scale_params = list(color_fmt = "character")) + expect_equal(chr, "red") + chr2 <- sc$map(x, limits = c(0, 1)) + expect_equal(chr, chr2) +}) + +test_that("palette without accepts_native_output returns native colours as well", { + sc <- scale_fill_continuous() + sc$palette <- function(x) { + rep("red", length(x)) + } + x <- 0.5 + nat <- sc$map(x, limits = c(0, 1), scale_params = list(color_fmt = "native")) + expect_equal(nat, -16776961L) + chr <- sc$map(x, limits = c(0, 1), scale_params = list(color_fmt = "character")) + expect_equal(chr, "red") + chr2 <- sc$map(x, limits = c(0, 1)) + expect_equal(chr, chr2) +}) + + + test_that("palette with may_return_NA=FALSE works as expected", { sc <- scale_fill_continuous() # A palette that may return NAs, will have NAs replaced by the scale's na.value