diff --git a/DESCRIPTION b/DESCRIPTION index 201a39e7..ef4871db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: tinytable Type: Package Title: Simple and Configurable Tables in 'HTML', 'LaTeX', 'Markdown', 'Word', 'PNG', 'PDF', and 'Typst' Formats Description: Create highly customized tables with this simple and dependency-free package. Data frames can be converted to 'HTML', 'LaTeX', 'Markdown', 'Word', 'PNG', 'PDF', or 'Typst' tables. The user interface is minimalist and easy to learn. The syntax concise. 'HTML' tables can be customized using the flexible 'Bootstrap' framework, and 'LaTeX' code with the 'tabularray' package. -Version: 0.0.2.9005 +Version: 0.0.2.9006 Depends: R (>= 4.1.0) Enhances: @@ -10,6 +10,7 @@ Enhances: Suggests: altdoc, ggplot2, + htmltools, markdown, palmerpenguins, pandoc, diff --git a/NEWS.md b/NEWS.md index a8e0c10e..567e48ab 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ New: - `Typst` tables are now supported using the `tablex` extension: - https://typst.app/ - https://github.com/PgBiel/typst-tablex +- `escape` argument in `format_tt()` escapes or substitutes special characters in LaTeX or HTML output to prevent compilation and rendering errors. - `notes` argument in `tt()` can insert superscript markers inside cells to refer to notes at the bottom of the page. - `tt(x, notes = list("*" = list(i = 0:1, j = 2, text = "Hello world)))` - `notes` agument in `tt()` now works wth Markdown and Word, but must be a single string. diff --git a/R/escape.R b/R/escape.R new file mode 100644 index 00000000..00d9493a --- /dev/null +++ b/R/escape.R @@ -0,0 +1,39 @@ +escape_text <- function(x, output = "latex") { + if (length(x) < 1 || all(is.na(x))) { + return(x) + } + + out <- x + + if (isTRUE(output == "latex")) { + # LaTeX escaping code adapted from the `gt` package, published under MIT + # https://github.com/rstudio/gt/ + # YEAR: 2018-2024 + # COPYRIGHT HOLDER: gt authors + # If all text elements are `NA_character_` then return `text` unchanged + latex_special_chars <- c( + "\\" = "\\textbackslash{}", + "~" = "\\textasciitilde{}", + "^" = "\\textasciicircum{}", + "&" = "\\&", + "%" = "\\%", + "$" = "\\$", + "#" = "\\#", + "_" = "\\_", + "{" = "\\{", + "}" = "\\}" + ) + na_out <- is.na(out) + m <- gregexpr("[\\\\&%$#_{}~^]", out[!na_out], perl = TRUE) + special_chars <- regmatches(out[!na_out], m) + escaped_chars <- lapply(special_chars, function(x) { + latex_special_chars[x] + }) + regmatches(out[!na_out], m) <- escaped_chars + } else if (isTRUE(output == "html")) { + assert_dependency("htmltools") + out <- htmltools::htmlEscape(out) + } + + return(out) +} \ No newline at end of file diff --git a/R/format_tt.R b/R/format_tt.R index a9fec4e0..8d1d3853 100644 --- a/R/format_tt.R +++ b/R/format_tt.R @@ -14,6 +14,7 @@ #' @param date A string passed to the `format()` function, such as "%Y-%m-%d". See the "Details" section in `?strptime` #' @param bool A function to format logical columns. Defaults to title case. #' @param other A function to format columns of other types. Defaults to `as.character()`. +#' @param escape Logical or String; if TRUE, escape special characters to display them as text in the format of the output of a `tt()` table. If `format_tt()` is called as a standalone function instead of on a `tt()` table, the `escape` argument accepts strings to specify the escaping method: "latex" or "html". #' @param markdown Logical; if TRUE, render markdown syntax in cells. Ex: `_italicized text_` is properly italicized in HTML and LaTeX. #' @param sprintf String passed to the `?sprintf` function to format numbers or interpolate strings with a user-defined pattern (similar to the `glue` package, but using Base R). #' @inheritParams tt @@ -42,6 +43,7 @@ format_tt <- function(x, date = "%Y-%m-%d", bool = function(column) tools::toTitleCase(tolower(column)), other = as.character, + escape = FALSE, markdown = FALSE, sprintf = NULL ) { @@ -61,6 +63,7 @@ format_tt <- function(x, url = url, date = date, bool = bool, + escape = escape, markdown = markdown, other = other) out <- meta(out, "lazy_format", c(meta(out)$lazy_format, list(cal))) @@ -79,6 +82,7 @@ format_tt <- function(x, date = date, bool = bool, other = other, + escape = escape, markdown = markdown) } return(out) @@ -96,6 +100,7 @@ format_tt_lazy <- function(x, url = FALSE, date = "%Y-%m-%d", bool = identity, + escape = FALSE, markdown = FALSE, other = as.character ) { @@ -195,6 +200,20 @@ format_tt_lazy <- function(x, } # loop over columns + # escape latex characters + if (!isFALSE(escape)) { + if (isTRUE(escape == "latex")) { + o <- "latex" + } else if (isTRUE(escape == "html")) { + o <- "html" + } else { + o <- meta(x)$output + } + for (col in j) { + x[[col]] <- escape_text(x[[col]], output = o) + } + } + # markdown at the very end if (isTRUE(markdown)) { assert_dependency("markdown") diff --git a/inst/tinytest/_tinysnapshot/escape-html.txt b/inst/tinytest/_tinysnapshot/escape-html.txt new file mode 100644 index 00000000..055ef989 --- /dev/null +++ b/inst/tinytest/_tinysnapshot/escape-html.txt @@ -0,0 +1 @@ +[1] " \n\n \n \n \n tinytable_xnclv9tvdcq09jxgyitf\n \n \n \n \n \n \n\n \n
\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
LaTeXHTML
Dollars $<br>
Percent %<sup>4</sup>
Underscore _<emph>blah</emph>
\n
\n\n \n \n\n \n\n" diff --git a/inst/tinytest/_tinysnapshot/escape-latex.txt b/inst/tinytest/_tinysnapshot/escape-latex.txt new file mode 100644 index 00000000..e113b1ba --- /dev/null +++ b/inst/tinytest/_tinysnapshot/escape-latex.txt @@ -0,0 +1 @@ +[1] "\\begin{table}\n\\centering\n\\begin{tblr}[ %% tabularray outer open\n] %% tabularray outer close\n{ %% tabularray inner open\ncolspec={Q[]Q[]},\n} %% tabularray inner close\n\\toprule\nLaTeX & HTML \\\\ \\midrule %% TinyTableHeader\nDollars \\$ &
\\\\\nPercent \\% & 4 \\\\\nUnderscore \\_ & blah \\\\\n\\bottomrule\n\\end{tblr}\n\\end{table}" diff --git a/inst/tinytest/test-escape.R b/inst/tinytest/test-escape.R new file mode 100644 index 00000000..4eafb3a1 --- /dev/null +++ b/inst/tinytest/test-escape.R @@ -0,0 +1,14 @@ +source("helpers.R") +using("tinysnapshot") + +dat <- data.frame( + "LaTeX" = c("Dollars $", "Percent %", "Underscore _"), + "HTML" = c("
", "4", "blah") +) + +set.seed(1024) # reproducibility of html unique IDs +tab <- tt(dat) |> format_tt(escape = TRUE) |> save_tt("latex") +expect_snapshot_print(tab, "escape-latex") + +tab <- tt(dat) |> format_tt(escape = TRUE) |> save_tt("html") +expect_snapshot_print(tab, "escape-html") \ No newline at end of file diff --git a/man/format_tt.Rd b/man/format_tt.Rd index c7af2266..52d9f730 100644 --- a/man/format_tt.Rd +++ b/man/format_tt.Rd @@ -16,6 +16,7 @@ format_tt( date = "\%Y-\%m-\%d", bool = function(column) tools::toTitleCase(tolower(column)), other = as.character, + escape = FALSE, markdown = FALSE, sprintf = NULL ) @@ -43,6 +44,8 @@ format_tt( \item{other}{A function to format columns of other types. Defaults to \code{as.character()}.} +\item{escape}{Logical or String; if TRUE, escape special characters to display them as text in the format of the output of a \code{tt()} table. If \code{format_tt()} is called as a standalone function instead of on a \code{tt()} table, the \code{escape} argument accepts strings to specify the escaping method: "latex" or "html".} + \item{markdown}{Logical; if TRUE, render markdown syntax in cells. Ex: \verb{_italicized text_} is properly italicized in HTML and LaTeX.} \item{sprintf}{String passed to the \code{?sprintf} function to format numbers or interpolate strings with a user-defined pattern (similar to the \code{glue} package, but using Base R).} diff --git a/vignettes/tutorial.qmd b/vignettes/tutorial.qmd index 81fab6ef..6e2c61fa 100644 --- a/vignettes/tutorial.qmd +++ b/vignettes/tutorial.qmd @@ -324,6 +324,25 @@ format_tt(dat, digits = 1, num_suffix = TRUE) ``` +## Escape special characters + +LaTeX and HTML use special characters to indicate strings which should be interpreted rather than displayed as text. For example, including underscores or dollar signs in LaTeX can cause compilation errors in some documents. To display those special characters, we need to substitute or escape them with backslashes, depending on the output format. The `escape` argument of `format_tt()` can be used to do this automatically: + +```{r} +dat <- data.frame( + "LaTeX" = c("Dollars $", "Percent %", "Underscore _"), + "HTML" = c("
", "4", "blah") +) + +tt(dat) |> format_tt(escape = TRUE) +``` + +When applied to a `tt()` table, `format_tt()` will determine the type of escaping to do automatically. When applied to a string or vector, we must specify the type of escaping to apply: + +```{r} +format_tt("_ Dollars $", escape = "latex") +``` + ## Markdown Markdown can be rendered in cells by using the `markdown` argument of the `format_tt()` function (note: this requires installing the `markdown` as an optional dependency).