From 9c5a00d66d3536e17f130abf50af9c3a61e1059d Mon Sep 17 00:00:00 2001
From: David C Hall <davidchall@users.noreply.github.com>
Date: Fri, 26 Mar 2021 09:42:16 -0700
Subject: [PATCH] Fix label_number_si() to use SI prefixes (#235)

* Edit label_number_si() to use SI prefixes

* Update test to send another argument to number()

* Fix unicode error emitted by R CMD check

* Remove non-ASCII character from docs

* Fix unicode mismatch on Windows

* Another attempt to resolve Windows unicode

* Share SI prefixes with label_bytes

* Restore whitespace

* Add billion_scale argument to label_dollar()

* Remove wikipedia hyperlink

* Rename argument as rescale_large

* Work with accuracy and scale arguments

* Clarify short scale used internationally for finance

* Refactor common code in rescale_by_suffix()

* Set default accuracy to NULL

* Fix conflicting factor levels on R 3.4

* Rename short/long scale functions

* Move SI prefixes into SI file

* label_bytes() uses rescale_by_suffix()

* label_number_si() supports scale argument

* Remove sep argument from label_number_si()
This wasn't actually doing anything, because user inputs were overwritten.

* First argument of label_number_si() is unit

* Require unit argument

* Update NEWS

* NEWS update

* Document when `scale` argument is useful

* Remove headings from NEWS

* Fix docs typo
---
 DESCRIPTION                           |  2 +-
 NAMESPACE                             |  2 +
 NEWS.md                               | 22 ++++++++++
 R/label-bytes.R                       | 29 ++++++-------
 R/label-dollar.R                      | 56 ++++++++++++++++++++++--
 R/label-number-si.R                   | 61 +++++++++++++++------------
 R/label-number.r                      |  2 +-
 R/rescale_by_suffix.R                 | 20 +++++++++
 man/brewer_pal.Rd                     |  2 +-
 man/label_bytes.Rd                    | 10 +++--
 man/label_dollar.Rd                   | 34 ++++++++++++++-
 man/label_number.Rd                   |  2 +-
 man/label_number_si.Rd                | 32 ++++++++------
 man/label_percent.Rd                  |  2 +-
 man/label_scientific.Rd               |  2 +-
 man/number.Rd                         |  2 +-
 man/unit_format.Rd                    |  2 +-
 tests/testthat/test-label-bytes.R     |  7 ++-
 tests/testthat/test-label-dollar.R    | 30 +++++++++++++
 tests/testthat/test-label-number-si.R | 42 +++++++++++++-----
 20 files changed, 278 insertions(+), 83 deletions(-)
 create mode 100644 R/rescale_by_suffix.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 58e0947c..81d445aa 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -37,4 +37,4 @@ Suggests:
 Encoding: UTF-8
 LazyLoad: yes
 Roxygen: list(markdown = TRUE, r6 = FALSE)
-RoxygenNote: 7.1.0
+RoxygenNote: 7.1.1
diff --git a/NAMESPACE b/NAMESPACE
index 263f5545..787c535e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -132,10 +132,12 @@ export(pvalue_format)
 export(reciprocal_trans)
 export(regular_minor_breaks)
 export(rescale)
+export(rescale_long_scale)
 export(rescale_max)
 export(rescale_mid)
 export(rescale_none)
 export(rescale_pal)
+export(rescale_short_scale)
 export(reverse_trans)
 export(scientific)
 export(scientific_format)
diff --git a/NEWS.md b/NEWS.md
index 0993d7e8..d67198c8 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,28 @@
 * `manual_pal()` now always returns an unnamed colour vector, which is easy to
   use with `ggplot2::discrete_scale()` (@yutannihilation, #284).
 
+* `label_number_si()` now correctly uses [SI prefixes](https://en.wikipedia.org/wiki/Metric_prefix)
+  (e.g. abbreviations "k" for "kilo-" and "m" for "milli-"). It previously used
+  [short scale abbreviations](https://en.wikipedia.org/wiki/Long_and_short_scales)
+  (e.g. "M" for million, "B" for billion). The short scale is most commonly used
+  in finance, so it is now supported via the new `rescale_large` argument of
+  `label_dollar()` (@davidchall, #235).
+
+* `label_number_si()` now requires the `unit` argument is specified. The default
+  value of the `accuracy` argument is now `NULL`, which automatically chooses
+  the precision. The `sep` argument is removed, which had no purpose (@davidchall, #235).
+
+* `label_dollar()` gains a `rescale_large` argument to support scaling of large
+  numbers by suffix (e.g. "M" for million, "B" for billion). In finance, the
+  short scale is most prevalent (i.e. 1 billion = 1 thousand million). In other
+  contexts, the long scale might be desired (i.e. 1 billion = 1 million million).
+  These two common scales are supported by setting `rescale_large = rescale_short_scale()`
+  or `rescale_large = rescale_long_scale()`, but custom scaling-by-suffix is also
+  supported (@davidchall, #235).
+  
+* `label_bytes()` now correctly accounts for the `scale` argument when choosing
+  auto units (@davidchall, #235).
+
 # scales 1.1.1
 
 * `breaks_width()` now handles `difftime`/`hms` objects (@bhogan-mitre, #244).
diff --git a/R/label-bytes.R b/R/label-bytes.R
index 22aeb089..ce08aeee 100644
--- a/R/label-bytes.R
+++ b/R/label-bytes.R
@@ -1,4 +1,4 @@
-#' Label bytes (1 kb, 2 MB, etc)
+#' Label bytes (1 kB, 2 MB, etc)
 #'
 #' Scale bytes into human friendly units. Can use either SI units (e.g.
 #' kB = 1000 bytes) or binary units (e.g. kiB = 1024 bytes). See
@@ -10,7 +10,7 @@
 #'     SI units (base 1000).
 #'   * "kiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", and "YiB" for
 #'     binary units (base 1024).
-#'   * `auto_si` or `auto_binary` to automatically pick the most approrpiate
+#'   * `auto_si` or `auto_binary` to automatically pick the most appropriate
 #'     unit for each value.
 #' @inheritParams number_format
 #' @param ... Other arguments passed on to [number()]
@@ -37,7 +37,7 @@
 #'   breaks = breaks_width(250 * 1024),
 #'   label = label_bytes("auto_binary")
 #' )
-label_bytes <- function(units = "auto_si", accuracy = 1, ...) {
+label_bytes <- function(units = "auto_si", accuracy = 1, scale = 1, ...) {
   stopifnot(is.character(units), length(units) == 1)
   force_all(accuracy, ...)
 
@@ -48,8 +48,10 @@ label_bytes <- function(units = "auto_si", accuracy = 1, ...) {
       base <- switch(units, auto_binary = 1024, auto_si = 1000)
       suffix <- switch(units, auto_binary = "iB", auto_si = "B")
 
-      power <- findInterval(abs(x), c(0, base^powers)) - 1L
-      units <- paste0(c("", names(powers))[power + 1L], suffix)
+      rescale <- rescale_by_suffix(x * scale, breaks = c(0, base^powers))
+
+      suffix <- paste0(" ", rescale$suffix, suffix)
+      scale <- scale * rescale$scale
     } else {
       si_units <- paste0(names(powers), "B")
       bin_units <- paste0(names(powers), "iB")
@@ -63,22 +65,17 @@ label_bytes <- function(units = "auto_si", accuracy = 1, ...) {
       } else {
         stop("'", units, "' is not a valid unit", call. = FALSE)
       }
+
+      suffix <- paste0(" ", units)
+      scale <- scale / base^power
     }
 
     number(
-      x / base^power,
+      x,
       accuracy = accuracy,
-      suffix = paste0(" ", units),
+      scale = scale,
+      suffix = suffix,
       ...
     )
   }
 }
-
-# Helpers -----------------------------------------------------------------
-
-si_powers <- (-8:8) * 3
-names(si_powers) <- c(
-  rev(c("m", "\u00b5", "n", "p", "f", "a", "z", "y")), "",
-        "k", "M",      "G", "T", "P", "E", "Z", "Y"
-)
-si_powers
diff --git a/R/label-dollar.R b/R/label-dollar.R
index 9c004b9a..511ff50e 100644
--- a/R/label-dollar.R
+++ b/R/label-dollar.R
@@ -14,6 +14,11 @@
 #'   value is less than `largest_with_cents` which by default is 100,000.
 #' @param prefix,suffix Symbols to display before and after value.
 #' @param negative_parens Display negative using parentheses?
+#' @param rescale_large Named list indicating suffixes given to large values
+#'   (e.g. thousands, millions, billions, trillions). Name gives suffix, and
+#'   value specifies the power-of-ten. The two most common scales are provided
+#'   (`rescale_short_scale()` and `rescale_long_scale()`).
+#'   If `NULL`, the default, these suffixes aren't used.
 #' @param ... Other arguments passed on to [base::format()].
 #' @export
 #' @family labels for continuous scales
@@ -23,7 +28,7 @@
 #'
 #' # Customise currency display with prefix and suffix
 #' demo_continuous(c(1, 100), labels = label_dollar(prefix = "USD "))
-#' euro <- dollar_format(
+#' euro <- label_dollar(
 #'   prefix = "",
 #'   suffix = "\u20ac",
 #'   big.mark = ".",
@@ -33,10 +38,26 @@
 #'
 #' # Use negative_parens = TRUE for finance style display
 #' demo_continuous(c(-100, 100), labels = label_dollar(negative_parens = TRUE))
+#'
+#' # In finance the short scale is most prevalent
+#' dollar <- label_dollar(rescale_large = rescale_short_scale())
+#' demo_log10(c(1, 1e18), breaks = log_breaks(7, 1e3), labels = dollar)
+#'
+#' # In other contexts the long scale might be used
+#' long <- label_dollar(prefix = "", rescale_large = rescale_long_scale())
+#' demo_log10(c(1, 1e18), breaks = log_breaks(7, 1e3), labels = long)
+#'
+#' # You can also define a custom naming scheme
+#' gbp <- label_dollar(
+#'   prefix = "\u00a3",
+#'   rescale_large = c(k = 3L, m = 6L, bn = 9L, tn = 12L)
+#' )
+#' demo_log10(c(1, 1e12), breaks = log_breaks(5, 1e3), labels = gbp)
 label_dollar <- function(accuracy = NULL, scale = 1, prefix = "$",
                           suffix = "", big.mark = ",", decimal.mark = ".",
                           trim = TRUE, largest_with_cents = 100000,
-                          negative_parens = FALSE, ...) {
+                          negative_parens = FALSE, rescale_large = NULL,
+                          ...) {
   force_all(
     accuracy,
     scale,
@@ -47,6 +68,7 @@ label_dollar <- function(accuracy = NULL, scale = 1, prefix = "$",
     trim,
     largest_with_cents,
     negative_parens,
+    rescale_large,
     ...
   )
   function(x) dollar(
@@ -60,6 +82,7 @@ label_dollar <- function(accuracy = NULL, scale = 1, prefix = "$",
       trim = trim,
       largest_with_cents = largest_with_cents,
       negative_parens,
+      rescale_large = rescale_large,
       ...
     )
 }
@@ -86,9 +109,10 @@ dollar_format <- label_dollar
 dollar <- function(x, accuracy = NULL, scale = 1, prefix = "$",
                    suffix = "", big.mark = ",", decimal.mark = ".",
                    trim = TRUE, largest_with_cents = 100000,
-                   negative_parens = FALSE, ...) {
+                   negative_parens = FALSE, rescale_large = NULL,
+                   ...) {
   if (length(x) == 0) return(character())
-  if (is.null(accuracy)) {
+  if (is.null(accuracy) && is.null(rescale_large)) {
     if (needs_cents(x * scale, largest_with_cents)) {
       accuracy <- .01
     } else {
@@ -102,6 +126,18 @@ dollar <- function(x, accuracy = NULL, scale = 1, prefix = "$",
   negative <- !is.na(x) & x < 0
   x <- abs(x)
 
+  if (!is.null(rescale_large)) {
+    if (!(is.integer(rescale_large) && all(rescale_large > 0))) {
+      stop("`rescale_large` must be positive integers.", call. = FALSE)
+    }
+
+    rescale <- rescale_by_suffix(x * scale, breaks = c(0, 10^rescale_large))
+
+    sep <- if (suffix == "") "" else " "
+    suffix <- paste0(rescale$suffix, sep, suffix)
+    scale <- scale * rescale$scale
+  }
+
   amount <- number(
     x,
     accuracy = accuracy,
@@ -126,3 +162,15 @@ dollar <- function(x, accuracy = NULL, scale = 1, prefix = "$",
 
   amount
 }
+
+#' @export
+#' @rdname label_dollar
+rescale_short_scale <- function() {
+  c(K = 3L, M = 6L, B = 9L, T = 12L)
+}
+
+#' @export
+#' @rdname label_dollar
+rescale_long_scale <- function() {
+  c(K = 3L, M = 6L, B = 12L, T = 18L)
+}
diff --git a/R/label-number-si.R b/R/label-number-si.R
index 56df9480..f50f0a17 100644
--- a/R/label-number-si.R
+++ b/R/label-number-si.R
@@ -1,46 +1,55 @@
-#' Label numbers with SI prefixes (2k, 1M, 5T etc)
+#' Label numbers with SI prefixes (2 kg, 5 mm, etc)
 #'
-#' `number_si()` automatically scales and labels with the best SI prefix,
-#' "K" for values \eqn{\ge} 10e3, "M" for \eqn{\ge} 10e6,
-#' "B" for \eqn{\ge} 10e9, and "T" for \eqn{\ge} 10e12.
+#' `label_number_si()` automatically adds the most suitable SI prefix and scales
+#' the values appropriately. For example, values greater than 1000 gain a "k"
+#' prefix (abbreviated from "kilo-") and are scaled by 1/1000.
+#' See [Metric Prefix](https://en.wikipedia.org/wiki/Metric_prefix) on Wikipedia
+#' for more details.
 #'
 #' @inherit number_format return params
-#' @param unit Optional units specifier.
-#' @param sep Separator between number and SI unit. Defaults to `" "` if
-#'   `units` is supplied, and `""` if not.
+#' @param unit Unit of measurement (e.g. `"m"` for meter, the SI unit of length).
+#' @param scale A scaling factor: `x` will be multiplied by `scale` before
+#'   formatting. This is useful if the underlying data is already using an SI
+#'   prefix.
 #' @export
 #' @family labels for continuous scales
 #' @family labels for log scales
 #' @examples
-#' demo_continuous(c(1, 1e9), label = label_number_si())
-#' demo_continuous(c(1, 5000), label = label_number_si(unit = "g"))
-#' demo_continuous(c(1, 1000), label = label_number_si(unit = "m"))
+#' demo_continuous(c(1, 1000), labels = label_number_si("m"))
 #'
-#' demo_log10(c(1, 1e9), breaks = log_breaks(10), labels = label_number_si())
-label_number_si <- function(accuracy = 1, unit = NULL, sep = NULL, ...) {
-  sep <- if (is.null(unit)) "" else " "
+#' demo_log10(c(1, 1e9), breaks = log_breaks(10), labels = label_number_si("m"))
+#' demo_log10(c(1e-9, 1), breaks = log_breaks(10), labels = label_number_si("g"))
+#'
+#' # use scale when data already uses SI prefix (e.g. stored in kg)
+#' kg <- label_number_si("g", scale = 1e3)
+#' demo_log10(c(1e-9, 1), breaks = log_breaks(10), labels = kg)
+label_number_si <- function(unit, accuracy = NULL, scale = 1, ...) {
+  sep <- if (is.null(unit) || !nzchar(unit)) "" else " "
   force_all(accuracy, ...)
 
   function(x) {
-    breaks <- c(0, 10^c(K = 3, M = 6, B = 9, T = 12))
-
-    n_suffix <- cut(abs(x),
-      breaks = c(unname(breaks), Inf),
-      labels = c(names(breaks)),
-      right = FALSE
-    )
-    n_suffix[is.na(n_suffix)] <- ""
-    suffix <- paste0(sep, n_suffix, unit)
+    rescale <- rescale_by_suffix(x * scale, breaks = 10^si_powers)
 
-    scale <- 1 / breaks[n_suffix]
-    # for handling Inf and 0-1 correctly
-    scale[which(scale %in% c(Inf, NA))] <- 1
+    suffix <- paste0(sep, rescale$suffix, unit)
+    scale <- scale * rescale$scale
 
     number(x,
       accuracy = accuracy,
-      scale = unname(scale),
+      scale = scale,
       suffix = suffix,
       ...
     )
   }
 }
+
+# power-of-ten prefixes used by the International System of Units (SI)
+# https://www.bipm.org/en/measurement-units/prefixes.html
+#
+# note: irregular prefixes (hecto, deca, deci, centi) are not stored
+# because they don't commonly appear in scientific usage anymore
+si_powers <- (-8:8) * 3
+names(si_powers) <- c(
+  rev(c("m", "\u00b5", "n", "p", "f", "a", "z", "y")), "",
+        "k", "M",      "G", "T", "P", "E", "Z", "Y"
+)
+si_powers
diff --git a/R/label-number.r b/R/label-number.r
index 3062c688..36658e9d 100644
--- a/R/label-number.r
+++ b/R/label-number.r
@@ -24,7 +24,7 @@
 #'
 #'   Applied to rescaled data.
 #' @param scale A scaling factor: `x` will be multiplied by `scale` before
-#'   formating. This is useful if the underlying data is very small or very
+#'   formatting. This is useful if the underlying data is very small or very
 #'   large.
 #' @param prefix,suffix Symbols to display before and after value.
 #' @param big.mark Character used between every 3 digits to separate thousands.
diff --git a/R/rescale_by_suffix.R b/R/rescale_by_suffix.R
new file mode 100644
index 00000000..ba1ebb1b
--- /dev/null
+++ b/R/rescale_by_suffix.R
@@ -0,0 +1,20 @@
+# each value of x is assigned a suffix and associated scaling factor
+rescale_by_suffix <- function(x, breaks) {
+  suffix <- as.character(cut(
+    abs(x),
+    breaks = c(unname(breaks), Inf),
+    labels = names(breaks),
+    right = FALSE
+  ))
+  suffix[is.na(suffix)] <- names(which.min(breaks))
+
+  scale <- unname(1 / breaks[suffix])
+  scale[which(scale %in% c(Inf, NA))] <- 1
+
+  # exact zero is not scaled
+  x_zero <- which(abs(x) == 0)
+  scale[x_zero] <- 1
+  suffix[x_zero] <- ""
+
+  list(scale = scale, suffix = suffix)
+}
diff --git a/man/brewer_pal.Rd b/man/brewer_pal.Rd
index 19c9d4d8..245ef968 100644
--- a/man/brewer_pal.Rd
+++ b/man/brewer_pal.Rd
@@ -13,7 +13,7 @@ brewer_pal(type = "seq", palette = 1, direction = 1)
 index into the list of palettes of appropriate \code{type}}
 
 \item{direction}{Sets the order of colours in the scale. If 1, the default,
-colours are as output by \code{\link[RColorBrewer:brewer.pal]{RColorBrewer::brewer.pal()}}. If -1, the
+colours are as output by \code{\link[RColorBrewer:ColorBrewer]{RColorBrewer::brewer.pal()}}. If -1, the
 order of colours is reversed.}
 }
 \description{
diff --git a/man/label_bytes.Rd b/man/label_bytes.Rd
index 2c2c80d9..cad8aaf9 100644
--- a/man/label_bytes.Rd
+++ b/man/label_bytes.Rd
@@ -2,9 +2,9 @@
 % Please edit documentation in R/label-bytes.R
 \name{label_bytes}
 \alias{label_bytes}
-\title{Label bytes (1 kb, 2 MB, etc)}
+\title{Label bytes (1 kB, 2 MB, etc)}
 \usage{
-label_bytes(units = "auto_si", accuracy = 1, ...)
+label_bytes(units = "auto_si", accuracy = 1, scale = 1, ...)
 }
 \arguments{
 \item{units}{Unit to use. Should either one of:
@@ -13,7 +13,7 @@ label_bytes(units = "auto_si", accuracy = 1, ...)
 SI units (base 1000).
 \item "kiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", and "YiB" for
 binary units (base 1024).
-\item \code{auto_si} or \code{auto_binary} to automatically pick the most approrpiate
+\item \code{auto_si} or \code{auto_binary} to automatically pick the most appropriate
 unit for each value.
 }}
 
@@ -24,6 +24,10 @@ difference between adjacent values.
 
 Applied to rescaled data.}
 
+\item{scale}{A scaling factor: \code{x} will be multiplied by \code{scale} before
+formatting. This is useful if the underlying data is very small or very
+large.}
+
 \item{...}{Other arguments passed on to \code{\link[=number]{number()}}}
 }
 \value{
diff --git a/man/label_dollar.Rd b/man/label_dollar.Rd
index 801e9541..d0c614da 100644
--- a/man/label_dollar.Rd
+++ b/man/label_dollar.Rd
@@ -4,6 +4,8 @@
 \alias{label_dollar}
 \alias{dollar_format}
 \alias{dollar}
+\alias{rescale_short_scale}
+\alias{rescale_long_scale}
 \title{Label currencies ($100, $2.50, etc)}
 \usage{
 label_dollar(
@@ -16,6 +18,7 @@ label_dollar(
   trim = TRUE,
   largest_with_cents = 1e+05,
   negative_parens = FALSE,
+  rescale_large = NULL,
   ...
 )
 
@@ -29,6 +32,7 @@ dollar_format(
   trim = TRUE,
   largest_with_cents = 1e+05,
   negative_parens = FALSE,
+  rescale_large = NULL,
   ...
 )
 
@@ -43,8 +47,13 @@ dollar(
   trim = TRUE,
   largest_with_cents = 1e+05,
   negative_parens = FALSE,
+  rescale_large = NULL,
   ...
 )
+
+rescale_short_scale()
+
+rescale_long_scale()
 }
 \arguments{
 \item{accuracy, largest_with_cents}{Number to round to. If \code{NULL}, the default,
@@ -53,7 +62,7 @@ values has non-zero fractional component (e.g. cents) and the largest
 value is less than \code{largest_with_cents} which by default is 100,000.}
 
 \item{scale}{A scaling factor: \code{x} will be multiplied by \code{scale} before
-formating. This is useful if the underlying data is very small or very
+formatting. This is useful if the underlying data is very small or very
 large.}
 
 \item{prefix, suffix}{Symbols to display before and after value.}
@@ -68,6 +77,12 @@ width (see \code{\link[base:format]{base::format()}}).}
 
 \item{negative_parens}{Display negative using parentheses?}
 
+\item{rescale_large}{Named list indicating suffixes given to large values
+(e.g. thousands, millions, billions, trillions). Name gives suffix, and
+value specifies the power-of-ten. The two most common scales are provided
+(\code{rescale_short_scale()} and \code{rescale_long_scale()}).
+If \code{NULL}, the default, these suffixes aren't used.}
+
 \item{...}{Other arguments passed on to \code{\link[base:format]{base::format()}}.}
 
 \item{x}{A numeric vector}
@@ -98,7 +113,7 @@ demo_continuous(c(1, 100), labels = label_dollar())
 
 # Customise currency display with prefix and suffix
 demo_continuous(c(1, 100), labels = label_dollar(prefix = "USD "))
-euro <- dollar_format(
+euro <- label_dollar(
   prefix = "",
   suffix = "\u20ac",
   big.mark = ".",
@@ -108,6 +123,21 @@ demo_continuous(c(1000, 1100), labels = euro)
 
 # Use negative_parens = TRUE for finance style display
 demo_continuous(c(-100, 100), labels = label_dollar(negative_parens = TRUE))
+
+# In finance the short scale is most prevalent
+dollar <- label_dollar(rescale_large = rescale_short_scale())
+demo_log10(c(1, 1e18), breaks = log_breaks(7, 1e3), labels = dollar)
+
+# In other contexts the long scale might be used
+long <- label_dollar(prefix = "", rescale_large = rescale_long_scale())
+demo_log10(c(1, 1e18), breaks = log_breaks(7, 1e3), labels = long)
+
+# You can also define a custom naming scheme
+gbp <- label_dollar(
+  prefix = "\u00a3",
+  rescale_large = c(k = 3L, m = 6L, bn = 9L, tn = 12L)
+)
+demo_log10(c(1, 1e12), breaks = log_breaks(5, 1e3), labels = gbp)
 }
 \seealso{
 Other labels for continuous scales: 
diff --git a/man/label_number.Rd b/man/label_number.Rd
index 044508b9..6d97d988 100644
--- a/man/label_number.Rd
+++ b/man/label_number.Rd
@@ -76,7 +76,7 @@ difference between adjacent values.
 Applied to rescaled data.}
 
 \item{scale}{A scaling factor: \code{x} will be multiplied by \code{scale} before
-formating. This is useful if the underlying data is very small or very
+formatting. This is useful if the underlying data is very small or very
 large.}
 
 \item{prefix, suffix}{Symbols to display before and after value.}
diff --git a/man/label_number_si.Rd b/man/label_number_si.Rd
index 3d60ea09..08380d6e 100644
--- a/man/label_number_si.Rd
+++ b/man/label_number_si.Rd
@@ -2,11 +2,13 @@
 % Please edit documentation in R/label-number-si.R
 \name{label_number_si}
 \alias{label_number_si}
-\title{Label numbers with SI prefixes (2k, 1M, 5T etc)}
+\title{Label numbers with SI prefixes (2 kg, 5 mm, etc)}
 \usage{
-label_number_si(accuracy = 1, unit = NULL, sep = NULL, ...)
+label_number_si(unit, accuracy = NULL, scale = 1, ...)
 }
 \arguments{
+\item{unit}{Unit of measurement (e.g. \code{"m"} for meter, the SI unit of length).}
+
 \item{accuracy}{A number to round to. Use (e.g.) \code{0.01} to show 2 decimal
 places of precision. If \code{NULL}, the default, uses a heuristic that should
 ensure breaks have the minimum number of digits needed to show the
@@ -14,10 +16,9 @@ difference between adjacent values.
 
 Applied to rescaled data.}
 
-\item{unit}{Optional units specifier.}
-
-\item{sep}{Separator between number and SI unit. Defaults to \code{" "} if
-\code{units} is supplied, and \code{""} if not.}
+\item{scale}{A scaling factor: \code{x} will be multiplied by \code{scale} before
+formatting. This is useful if the underlying data is already using an SI
+prefix.}
 
 \item{...}{Other arguments passed on to \code{\link[base:format]{base::format()}}.}
 }
@@ -32,16 +33,21 @@ they work similarly for all scales, including those that generate legends
 rather than axes.
 }
 \description{
-\code{number_si()} automatically scales and labels with the best SI prefix,
-"K" for values \eqn{\ge} 10e3, "M" for \eqn{\ge} 10e6,
-"B" for \eqn{\ge} 10e9, and "T" for \eqn{\ge} 10e12.
+\code{label_number_si()} automatically adds the most suitable SI prefix and scales
+the values appropriately. For example, values greater than 1000 gain a "k"
+prefix (abbreviated from "kilo-") and are scaled by 1/1000.
+See \href{https://en.wikipedia.org/wiki/Metric_prefix}{Metric Prefix} on Wikipedia
+for more details.
 }
 \examples{
-demo_continuous(c(1, 1e9), label = label_number_si())
-demo_continuous(c(1, 5000), label = label_number_si(unit = "g"))
-demo_continuous(c(1, 1000), label = label_number_si(unit = "m"))
+demo_continuous(c(1, 1000), labels = label_number_si("m"))
+
+demo_log10(c(1, 1e9), breaks = log_breaks(10), labels = label_number_si("m"))
+demo_log10(c(1e-9, 1), breaks = log_breaks(10), labels = label_number_si("g"))
 
-demo_log10(c(1, 1e9), breaks = log_breaks(10), labels = label_number_si())
+# use scale when data already uses SI prefix (e.g. stored in kg)
+kg <- label_number_si("g", scale = 1e3)
+demo_log10(c(1e-9, 1), breaks = log_breaks(10), labels = kg)
 }
 \seealso{
 Other labels for continuous scales: 
diff --git a/man/label_percent.Rd b/man/label_percent.Rd
index 1999b583..cf001bbb 100644
--- a/man/label_percent.Rd
+++ b/man/label_percent.Rd
@@ -49,7 +49,7 @@ difference between adjacent values.
 Applied to rescaled data.}
 
 \item{scale}{A scaling factor: \code{x} will be multiplied by \code{scale} before
-formating. This is useful if the underlying data is very small or very
+formatting. This is useful if the underlying data is very small or very
 large.}
 
 \item{prefix}{Symbols to display before and after value.}
diff --git a/man/label_scientific.Rd b/man/label_scientific.Rd
index 4e2d9c68..0e5a49dc 100644
--- a/man/label_scientific.Rd
+++ b/man/label_scientific.Rd
@@ -41,7 +41,7 @@ scientific(
 \item{digits}{Number of digits to show before exponent.}
 
 \item{scale}{A scaling factor: \code{x} will be multiplied by \code{scale} before
-formating. This is useful if the underlying data is very small or very
+formatting. This is useful if the underlying data is very small or very
 large.}
 
 \item{prefix, suffix}{Symbols to display before and after value.}
diff --git a/man/number.Rd b/man/number.Rd
index 2d024370..cefbabd8 100644
--- a/man/number.Rd
+++ b/man/number.Rd
@@ -27,7 +27,7 @@ difference between adjacent values.
 Applied to rescaled data.}
 
 \item{scale}{A scaling factor: \code{x} will be multiplied by \code{scale} before
-formating. This is useful if the underlying data is very small or very
+formatting. This is useful if the underlying data is very small or very
 large.}
 
 \item{prefix}{Symbols to display before and after value.}
diff --git a/man/unit_format.Rd b/man/unit_format.Rd
index 4b87bc63..c365415c 100644
--- a/man/unit_format.Rd
+++ b/man/unit_format.Rd
@@ -26,7 +26,7 @@ difference between adjacent values.
 Applied to rescaled data.}
 
 \item{scale}{A scaling factor: \code{x} will be multiplied by \code{scale} before
-formating. This is useful if the underlying data is very small or very
+formatting. This is useful if the underlying data is very small or very
 large.}
 
 \item{prefix}{Symbols to display before and after value.}
diff --git a/tests/testthat/test-label-bytes.R b/tests/testthat/test-label-bytes.R
index 0b60a4c2..635edb41 100644
--- a/tests/testthat/test-label-bytes.R
+++ b/tests/testthat/test-label-bytes.R
@@ -3,7 +3,7 @@ test_that("auto units always rounds down", {
 })
 
 test_that("auto units handles 0 and other special values", {
-  expect_equal(label_bytes()(NA), NA_character_)
+  expect_equal(label_bytes()(NA_real_), NA_character_)
   expect_equal(label_bytes()(0), "0 B")
   expect_equal(label_bytes()(-1), "-1 B")
   expect_equal(label_bytes()(Inf), "Inf")
@@ -14,6 +14,11 @@ test_that("can use either binary or si units", {
   expect_equal(label_bytes("kiB")(1024), "1 kiB")
 })
 
+test_that("compatible with scale argument", {
+  expect_equal(label_bytes("auto_si", scale = 2)(500), "1 kB")
+  expect_equal(label_bytes("auto_binary", scale = 2)(512), "1 kiB")
+})
+
 test_that("errors if unknown unit", {
   expect_error(label_bytes("unit")(0), "valid unit")
 })
diff --git a/tests/testthat/test-label-dollar.R b/tests/testthat/test-label-dollar.R
index 148cd4b5..c2dd7b9c 100644
--- a/tests/testthat/test-label-dollar.R
+++ b/tests/testthat/test-label-dollar.R
@@ -17,3 +17,33 @@ test_that("preserves names", {
 test_that("decimal.mark could be modified", {
   expect_equal(label_dollar(decimal.mark = ",")(123.45), "$123,45")
 })
+
+test_that("rescale_large works", {
+  x <- 10^(seq(0, 18, 3))
+
+  expect_equal(
+    label_dollar(rescale_large = rescale_short_scale())(x),
+    c("$1", "$1K", "$1M", "$1B", "$1T", "$1,000T", "$1,000,000T")
+  )
+  expect_equal(
+    label_dollar(rescale_large = rescale_long_scale())(x),
+    c("$1", "$1K", "$1M", "$1,000M", "$1B", "$1,000B", "$1T")
+  )
+  expect_equal(
+    label_dollar(rescale_large = c(k = 3L, m = 6L, bn = 9L, tn = 12L))(x),
+    c("$1", "$1k", "$1m", "$1bn", "$1tn", "$1,000tn", "$1,000,000tn")
+  )
+})
+
+test_that("rescale_large compatible with scale argument", {
+  x <- c(1, 2, 10, 100)
+
+  expect_equal(
+    label_dollar(scale = 2, rescale_large = rescale_short_scale())(x * 1e3),
+    c("$2.0K", "$4.0K", "$20.0K", "$200.0K")
+  )
+  expect_equal(
+    label_dollar(scale = 2, rescale_large = rescale_short_scale())(x * 1e4),
+    c("$20K", "$40K", "$200K", "$2M")
+  )
+})
diff --git a/tests/testthat/test-label-number-si.R b/tests/testthat/test-label-number-si.R
index d41bb358..645e817f 100644
--- a/tests/testthat/test-label-number-si.R
+++ b/tests/testthat/test-label-number-si.R
@@ -1,22 +1,44 @@
 test_that("rescales values independently", {
-  number_si <- label_number_si()
-  expect_equal(number_si(c(1e3, 1e6, 1e9)), c("1K", "1M", "1B"))
-  expect_equal(number_si(c(-1e3, 1e6, 1e9)), c("-1K", "1M", "1B"))
-  expect_equal(number_si(c(.50, 1e6, 1e15)), c("0", "1M", "1 000T"))
+  number_si <- label_number_si("")
+  expect_equal(
+    number_si(10^c(-24, -21, -18, -15, -12, -9, -6, -3, 0, 3, 6, 9, 12, 15, 18, 21, 24)),
+    c("1y", "1z", "1a", "1f", "1p", "1n", "1\u00b5", "1m", "1", "1k", "1M", "1G", "1T", "1P", "1E", "1Z", "1Y")
+  )
+  expect_equal(number_si(c(-1e3, 1e6, 1e9)), c("-1.0k", "1.0M", "1.0G"))
+})
+
+test_that("requires unit argument", {
+  expect_error(label_number_si())
+  expect_silent(label_number_si("m"))
+})
+
+test_that("handles units correctly", {
+  number_si <- label_number_si(unit = "g")
+  expect_equal(number_si(c(1e-6, 1e-3, 1, 1e3, 1e6)), c("1 \u00b5g", "1 mg", "1 g", "1 kg", "1 Mg"))
+})
+
+test_that("handles out-of-range inputs correctly", {
+  number_si <- label_number_si("", accuracy = 0.1)
+  expect_equal(number_si(c(0, 1e-27, 1e-25, 1e25, 1e27)), c("0.0", "0.0y", "0.1y", "10.0Y", "1 000.0Y"))
 })
 
 test_that("handles bad inputs gracefully", {
-  number_si <- label_number_si()
-  expect_equal(number_si(c(1, NA)), c("1", NA))
-  expect_equal(number_si(c(1, Inf)), c("1", "Inf"))
+  number_si <- label_number_si("m")
+  expect_equal(number_si(c(1, NA)), c("1 m", NA))
+  expect_equal(number_si(c(1, Inf)), c("1 m", "Inf"))
 })
 
 test_that("arguments passed on to number()", {
-  number_si <- label_number_si(accuracy = .1, prefix = "$")
-  expect_equal(number_si(c(.50, 1e6, 1e9)), c("$0.5", "$1.0M", "$1.0B"))
+  number_si <- label_number_si("", accuracy = .1, big.mark = ",")
+  expect_equal(number_si(1e27), "1,000.0Y")
+})
+
+test_that("compatible with scale argument", {
+  number_si <- label_number_si("m", scale = 2)
+  expect_equal(number_si(500), "1 km")
 })
 
 test_that("number_si preserves names", {
-  number_si <- label_number_si()
+  number_si <- label_number_si("m")
   expect_named(number_si(c(a = 1)), "a")
 })