diff --git a/R/location_path.R b/R/location_path.R index 33b1dc5a..19d894d7 100644 --- a/R/location_path.R +++ b/R/location_path.R @@ -69,7 +69,7 @@ orderly_location_path <- R6::R6Class( ## This split just acts to make the http one easier to think about - ## it's not the job of the driver to do validation, but the server. location_path_import_metadata <- function(str, hash, root) { - meta <- outpack_metadata_load(as_json(str)) + meta <- outpack_metadata_core_load(as_json(str)) id <- meta$id hash_validate_data(str, hash, sprintf("metadata for '%s'", id)) diff --git a/R/outpack_insert.R b/R/outpack_insert.R index 8dd47e91..e9ab53e6 100644 --- a/R/outpack_insert.R +++ b/R/outpack_insert.R @@ -1,6 +1,6 @@ outpack_insert_packet <- function(path, json, root = NULL) { assert_is(root, "outpack_root") - meta <- outpack_metadata_load(json) + meta <- outpack_metadata_core_load(json) assert_is_directory(path) hash_algorithm <- root$config$core$hash_algorithm diff --git a/R/outpack_metadata.R b/R/outpack_metadata.R index 00d044d0..077cc8e0 100644 --- a/R/outpack_metadata.R +++ b/R/outpack_metadata.R @@ -31,7 +31,7 @@ orderly_metadata <- function(id, root = NULL, locate = FALSE) { if (!file.exists(path_metadata)) { cli::cli_abort("Packet '{id}' not found in outpack index") } - outpack_metadata_load(path_metadata) + outpack_metadata_load(path_metadata, root$config$orderly$plugins) } @@ -41,19 +41,32 @@ orderly_metadata <- function(id, root = NULL, locate = FALSE) { ##' context of reading a metadata file written out as part of a failed ##' run. ##' +##' Custom metadata saved by plugins may not be deserialised as +##' expected when called with this function, as it is designed to +##' operate separately from a valid orderly root (i.e., it will load +##' data from any file regardless of where it came from). If `plugins` +##' is `TRUE` (the default) then we will deserialise all data that +##' matches any loaded plugin. This means that the behaviour of this +##' function depends on if you have loaded the plugin packages. You +##' can force this by running `orderly2::orderly_config()` within any +##' orderly directory, which will load any declared plugins. +##' ##' @title Read outpack metadata json file ##' ##' @param path Path to the json file ##' +##' @param plugins Try and deserialise data from all loaded plugins +##' (see Details). +##' ##' @return A list of outpack metadata; see the schema for details. In ##' contrast to reading the json file directly with ##' `jsonlite::fromJSON`, this function will take care to convert ##' scalar and length-one vectors into the expected types. ##' ##' @export -orderly_metadata_read <- function(path) { +orderly_metadata_read <- function(path, plugins = TRUE) { assert_file_exists(path, call = environment()) - outpack_metadata_load(path) + outpack_metadata_load(path, if (plugins) .plugins else NULL) } outpack_metadata_create <- function(path, name, id, time, files, @@ -166,10 +179,14 @@ outpack_metadata_core <- function(id, root, call = NULL) { } +outpack_metadata_core_read <- function(path) { + outpack_metadata_core_load(read_string(path)) +} + metadata_core_names <- c("id", "name", "parameters", "time", "files", "depends") -outpack_metadata_core_read <- function(path) { - data <- jsonlite::read_json(path)[metadata_core_names] +outpack_metadata_core_load <- function(json) { + data <- jsonlite::parse_json(json)[metadata_core_names] outpack_metadata_core_deserialise(data) } @@ -193,7 +210,7 @@ outpack_metadata_core_deserialise <- function(data) { } -outpack_metadata_load <- function(json) { +outpack_metadata_load <- function(json, plugins) { if (!inherits(json, "json")) { # could use starts with "{" json <- read_string(json) } @@ -202,6 +219,16 @@ outpack_metadata_load <- function(json) { if (!is.null(data$custom$orderly)) { data$custom$orderly <- custom_metadata_deserialise(data$custom$orderly) } + for (nm in intersect(names(data$custom), names(plugins))) { + data$custom[[nm]] <- tryCatch( + plugins[[nm]]$deserialise(data$custom[[nm]]), + error = function(e) { + cli::cli_warn( + c("Deserialising custom metadata '{nm}' for '{data$id}' failed", + x = e$message)) + data$custom[[nm]] + }) + } data } diff --git a/R/plugin.R b/R/plugin.R index f0ceeae0..160dc406 100644 --- a/R/plugin.R +++ b/R/plugin.R @@ -29,6 +29,17 @@ ##' then no serialisation is done, and no metadata from your plugin ##' will be added. ##' +##' @param deserialise A function to deserialise any metadata +##' serialised by the `serialise` function. This is intended to help +##' deal with issues disambiguating unserialising objects from json +##' (scalars vs arrays of lenth 1, data.frames vs lists-of-lists +##' etc), and will make your plugin nicer to work with +##' [orderly2::orderly_metadata_extract()]. This function will be +##' given a single argument `data` which is the data from +##' `jsonlite::fromJSON(..., simplifyVector = FALSE)` and you should +##' apply any required simplifications yourself, returning a +##' modified copy of the argument. +##' ##' @param cleanup Optionally, a function to clean up any state that ##' your plugin uses. You can call `orderly_plugin_context` from ##' within this function and access anything you need from that. If @@ -45,9 +56,11 @@ ##' ##' @export orderly_plugin_register <- function(name, config, serialise = NULL, - cleanup = NULL, schema = NULL) { + deserialise = NULL, cleanup = NULL, + schema = NULL) { assert_scalar_character(name, call = environment()) - plugin <- orderly_plugin(name, config, serialise, cleanup, schema) + plugin <- orderly_plugin(name, config, serialise, deserialise, cleanup, + schema) .plugins[[name]] <- plugin } @@ -69,30 +82,36 @@ load_orderly_plugin <- function(name) { .plugins <- new.env(parent = emptyenv()) -orderly_plugin <- function(package, config, serialise, cleanup, schema, - call = NULL) { +orderly_plugin <- function(package, config, serialise, deserialise, cleanup, + schema, call = NULL) { assert_is(config, "function", call = call) if (is.null(cleanup)) { cleanup <- plugin_no_cleanup } if (!is.null(schema)) { if (is.null(serialise)) { - stop("If 'schema' is given, then 'serialise' must be non-NULL") + cli::cli_abort( + "If 'schema' is given, then 'serialise' must be non-NULL", + call = call) } path_pkg <- pkg_root(package) if (!file.exists(file.path(path_pkg, schema))) { cli::cli_abort( - "Expected schema file '{schema}' to exist in package '{package}'") + "Expected schema file '{schema}' to exist in package '{package}'", + call = call) } schema <- sprintf("%s/%s", package, schema) } - if (is.null(serialise)) { - serialise <- plugin_no_serialise + if (!is.null(deserialise) && is.null(serialise)) { + cli::cli_abort( + "If 'deserialise' is given, then 'serialise' must be non-NULL", + call = call) } assert_is(cleanup, "function", call = call) ret <- list(package = package, config = config, - serialise = serialise, + serialise = serialise %||% plugin_no_serialise, + deserialise = deserialise %||% plugin_no_deserialise, cleanup = cleanup, schema = schema) class(ret) <- "orderly_plugin" @@ -229,6 +248,11 @@ plugin_no_serialise <- function(data) { } +plugin_no_deserialise <- function(data) { + data +} + + ## Some careful work here is required to cope with the case where ## orderly2 and the plugin package are installed directly or in dev ## mode diff --git a/man/orderly_metadata_read.Rd b/man/orderly_metadata_read.Rd index e9aa924b..8acf1397 100644 --- a/man/orderly_metadata_read.Rd +++ b/man/orderly_metadata_read.Rd @@ -4,10 +4,13 @@ \alias{orderly_metadata_read} \title{Read outpack metadata json file} \usage{ -orderly_metadata_read(path) +orderly_metadata_read(path, plugins = TRUE) } \arguments{ \item{path}{Path to the json file} + +\item{plugins}{Try and deserialise data from all loaded plugins +(see Details).} } \value{ A list of outpack metadata; see the schema for details. In @@ -22,3 +25,14 @@ reference to a root which contains it. It may be useful in the context of reading a metadata file written out as part of a failed run. } +\details{ +Custom metadata saved by plugins may not be deserialised as +expected when called with this function, as it is designed to +operate separately from a valid orderly root (i.e., it will load +data from any file regardless of where it came from). If \code{plugins} +is \code{TRUE} (the default) then we will deserialise all data that +matches any loaded plugin. This means that the behaviour of this +function depends on if you have loaded the plugin packages. You +can force this by running \code{orderly2::orderly_config()} within any +orderly directory, which will load any declared plugins. +} diff --git a/man/orderly_plugin_register.Rd b/man/orderly_plugin_register.Rd index b464af5a..dbb0d92a 100644 --- a/man/orderly_plugin_register.Rd +++ b/man/orderly_plugin_register.Rd @@ -8,6 +8,7 @@ orderly_plugin_register( name, config, serialise = NULL, + deserialise = NULL, cleanup = NULL, schema = NULL ) @@ -34,6 +35,17 @@ unnamed list with values corresponding to \code{data}. If \code{NULL}, then no serialisation is done, and no metadata from your plugin will be added.} +\item{deserialise}{A function to deserialise any metadata +serialised by the \code{serialise} function. This is intended to help +deal with issues disambiguating unserialising objects from json +(scalars vs arrays of lenth 1, data.frames vs lists-of-lists +etc), and will make your plugin nicer to work with +\code{\link[=orderly_metadata_extract]{orderly_metadata_extract()}}. This function will be +given a single argument \code{data} which is the data from +\code{jsonlite::fromJSON(..., simplifyVector = FALSE)} and you should +apply and required simplifications yourself, returning a +modified copy of the argument.} + \item{cleanup}{Optionally, a function to clean up any state that your plugin uses. You can call \code{orderly_plugin_context} from within this function and access anything you need from that. If diff --git a/tests/testthat/test-outpack-packet.R b/tests/testthat/test-outpack-packet.R index eb49dc9f..41ec5adf 100644 --- a/tests/testthat/test-outpack-packet.R +++ b/tests/testthat/test-outpack-packet.R @@ -311,7 +311,7 @@ test_that("Can add multiple copies of extra data", { outpack_packet_end_quietly(p) path_metadata <- file.path(root$path, ".outpack", "metadata", p$id) - meta <- outpack_metadata_load(path_metadata) + meta <- outpack_metadata_load(path_metadata, NULL) expect_equal(meta$custom, list(app1 = list(a = 1, b = 2), app2 = list(c = list(1, 2, 3)))) diff --git a/tests/testthat/test-plugin.R b/tests/testthat/test-plugin.R index 1abcd1e0..b422646d 100644 --- a/tests/testthat/test-plugin.R +++ b/tests/testthat/test-plugin.R @@ -12,10 +12,6 @@ test_that("Can run simple example with plugin", { meta <- orderly_metadata(id, root = path) - ## Our nice vectors have become lists here, due to the general pain - ## of deserialising json, into R but at least it's all there. - ## Probably the most general solution involves plugins being able to - ## provide deserialisers that can apply any required simplification? expect_equal( meta$custom$example.random, list(list(as = "dat", mean = mean(cmp), variance = var(cmp)))) @@ -42,6 +38,36 @@ test_that("can run interactive example with plugin", { }) +test_that("Can use custom deserialiser plugin", { + clear_plugins() + on.exit(clear_plugins()) + path <- test_prepare_orderly_example("plugin") + + .plugins[["example.random"]]$deserialise <- function(data) { + data_frame( + as = vcapply(data, "[[", "as"), + mean = vnapply(data, "[[", "mean"), + variance = vnapply(data, "[[", "variance")) + } + + envir <- new.env() + set.seed(1) + id <- orderly_run_quietly("plugin", root = path, envir = envir) + + set.seed(1) + cmp <- rnorm(10) + expect_identical(envir$dat, cmp) + + meta <- orderly_metadata(id, root = path) + + root <- root_open(path, locate = FALSE, require_orderly = FALSE) + meta <- orderly_metadata(id, root = root) + expect_s3_class(meta$custom$example.random, "data.frame") + expect_equal(meta$custom$example.random, + data_frame(as = "dat", mean = mean(cmp), variance = var(cmp))) +}) + + test_that("loading plugin triggers package load", { skip_if_not_installed("mockery") clear_plugins() @@ -122,6 +148,7 @@ test_that("validate that plugins make sense", { skip_if_not_installed("mockery") config <- function(...) "config" serialise <- function(...) "serialise" + deserialise <- function(...) "deserialise" cleanup <- function(...) "cleanup" schema <- withr::local_tempfile(fileext = ".json") writeLines("{}", schema) @@ -129,25 +156,33 @@ test_that("validate that plugins make sense", { mock_pkg_root <- mockery::mock(dirname(schema), cycle = TRUE) mockery::stub(orderly_plugin, "pkg_root", mock_pkg_root) - p <- orderly_plugin("pkg", config, NULL, NULL, NULL) + p <- orderly_plugin("pkg", config, NULL, NULL, NULL, NULL) expect_identical(p$config, config) expect_identical(p$serialise, plugin_no_serialise) + expect_identical(p$deserialise, plugin_no_deserialise) expect_identical(p$cleanup, plugin_no_cleanup) expect_null(p$schema) - p <- orderly_plugin("pkg", config, serialise, cleanup, basename(schema)) + p <- orderly_plugin("pkg", config, serialise, deserialise, cleanup, + basename(schema)) expect_identical(p$config, config) expect_identical(p$serialise, serialise) + expect_identical(p$deserialise, deserialise) expect_identical(p$cleanup, cleanup) expect_equal(p$schema, file.path("pkg", basename(schema))) expect_error( - orderly_plugin("pkg", config, NULL, NULL, basename(schema)), + orderly_plugin("pkg", config, NULL, NULL, NULL, basename(schema)), "If 'schema' is given, then 'serialise' must be non-NULL") + expect_error( + orderly_plugin("pkg", config, NULL, deserialise, NULL, NULL), + "If 'deserialise' is given, then 'serialise' must be non-NULL") + unlink(schema) expect_error( - orderly_plugin("pkg", config, serialise, cleanup, basename(schema)), + orderly_plugin("pkg", config, serialise, deserialise, cleanup, + basename(schema)), sprintf("Expected schema file '%s' to exist in package 'pkg'", basename(schema)), fixed = TRUE) @@ -173,3 +208,29 @@ test_that("deal with devmode roots", { expect_equal(pkg_root("pkg"), "/path/to/pkg") expect_equal(pkg_root("pkg"), file.path("/path/to/pkg", "inst")) }) + + +test_that("gracefully cope with failed deserialisation", { + clear_plugins() + on.exit(clear_plugins()) + path <- test_prepare_orderly_example("plugin") + + .plugins[["example.random"]]$deserialise <- function(data) { + stop("some error here") + } + + set.seed(1) + cmp <- rnorm(10) + + envir <- new.env() + set.seed(1) + id <- orderly_run_quietly("plugin", root = path, envir = envir) + w <- expect_warning( + meta <- orderly_metadata(id, root = path), + "Deserialising custom metadata 'example.random' for '.+' failed") + expect_match(conditionMessage(w), "some error here") + expect_type(meta$custom$example.random, "list") + expect_equal( + meta$custom$example.random, + list(list(as = "dat", mean = mean(cmp), variance = var(cmp)))) +}) diff --git a/vignettes/plugins.Rmd b/vignettes/plugins.Rmd index 0dd75034..fc10450b 100644 --- a/vignettes/plugins.Rmd +++ b/vignettes/plugins.Rmd @@ -60,7 +60,7 @@ writeLines(c( path_example <- file.path(path_root, "src", "example") fs::dir_create(path_example) writeLines(c( - 'example.db::query(sql = "SELECT * FROM mtcars WHERE cyl == 4", as = "dat")', + 'dat <- example.db::query("SELECT * FROM mtcars WHERE cyl == 4")', 'orderly2::orderly_artefact("Summary of data", "data.rds")', "", 'saveRDS(summary(dat), "data.rds")'), @@ -165,23 +165,16 @@ The return value here should be the `data` argument with any auxiliary data adde Finally, for our minimal example, we need the function that actually does the query; in our example above this is `example.db::query`: ```{r, export_to_package = "minimal", eval = FALSE} -query <- function(sql, as) { +query <- function(sql) { ctx <- orderly2::orderly_plugin_context("example.db") dbname <- ctx$config$path con <- DBI::dbConnect(RSQLite::SQLite(), dbname) on.exit(DBI::dbDisconnect(con)) - d <- DBI::dbGetQuery(con, sql) - ctx$envir[[as]] <- d - invisible() + DBI::dbGetQuery(con, sql) } ``` -The arguments here are whatever you want the user to provide -- nothing here is special to `orderly2`. The important function here to call is `orderly2::orderly_plugin_context` which returns information that you can use to make the plugin work. This is explained in `?orderly2::orderly_plugin_context`, but in this example we use two elements: - -* `config`: the configuration for this plugin (i.e., the return value from our function `db_config`) -* `envir`: the (R) environment that the current packet is using - -We modify the packet environment by writing to `ctx$envir`. Finally, we return metadata to save into the final packet metadata, but here we return `NULL`, so this is always empty. Later, we'll show why and how to change this. +The arguments here are whatever you want the user to provide -- nothing here is special to `orderly2`. The important function here to call is `orderly2::orderly_plugin_context` which returns information that you can use to make the plugin work. This is explained in `?orderly2::orderly_plugin_context`, but in this example we use just one element, `config`, the configuration for this plugin (i.e., the return value from our function `db_config`); see `orderly2::orderly_plugin_context` for other context that can be accessed here. The last bit of package code is to register the plugin, we do this by calling `orderly2::orderly_plugin_register` within `.onLoad()` which is a special R function called when a package is loaded. This means that whenever your packages is loaded (regardless of whether it is attached) it will register the plugin. @@ -193,7 +186,7 @@ The last bit of package code is to register the plugin, we do this by calling `o } ``` -(It is important that the `name` argument here matches your package name, as orderly2 will trigger loading the package based on this name in the configuration; we may support multiple plugins within one page later.) +(It is important that the `name` argument here matches your package name, as orderly2 will trigger loading the package based on this name in the configuration; we may support multiple plugins within one package later.) Note that our `query` function here does not appear within this registration, just the function to read and process the configuration. @@ -206,7 +199,7 @@ r_output(readLines(file.path(path_pkg, "R/plugin.R"))) and the `NAMESPACE` file contains ```{r, echo = FALSE, results = "asis"} -plain_output(readLines(file.path(path_pkg, "R/plugin.R"))) +plain_output(readLines(file.path(path_pkg, "NAMESPACE"))) ``` ### Trying it out @@ -234,7 +227,7 @@ In our case, we expect a single key-value pair in `orderly_config.yml` with the ```{r, export_to_package = "full", eval = FALSE} db_config <- function(data, filename) { - if (!is.list(data) || !is.null(names(data)) || length(data) == 0) { + if (!is.list(data) || is.null(names(data)) || length(data) == 0) { stop("Expected a named list for orderly_config.yml:example.db") } if (length(data$path) != 1 || !is.character(data$path)) { @@ -262,16 +255,15 @@ Nothing about what the plugin does is saved into the report metadata unless you To save metadata, use the function `orderly2::orderly_plugin_add_metadata`; this takes as arguments your plugin name, any string you like to structure the saved metadata (here we'll use `query`) and whatever data you want to save: ```{r, export_to_package = "full", eval = FALSE} -query <- function(sql, as) { +query <- function(sql) { ctx <- orderly2::orderly_plugin_context("example.db") dbname <- ctx$config$path con <- DBI::dbConnect(RSQLite::SQLite(), dbname) on.exit(DBI::dbDisconnect(con)) d <- DBI::dbGetQuery(con, sql) - ctx$envir[[as]] <- d - info <- list(sql = sql, as = as, rows = nrow(d), cols = names(d)) + info <- list(sql = sql, rows = nrow(d), cols = names(d)) orderly2::orderly_plugin_add_metadata("example.db", "query", info) - invisible() + d } ``` @@ -280,12 +272,12 @@ This function is otherwise the same as the minimal version above. We also need to provide a serialisation function to ensure that the metadata is saved as expected. Because we saved our metadata under the key `query`, we will get a list back with an element `query` and then an unnamed list with as many elements as there were `query` calls in a given report. ```{r} -orderly_db_serialise <- function(data) { +db_serialise <- function(data) { for (i in seq_along(data$query)) { # Always save cols as a vector, even if length 1: data$query[[i]]$cols <- I(data$query[[i]]$cols) } - jsonlite::toJSON(data, auto_unbox = TRUE) + jsonlite::toJSON(data$query, auto_unbox = TRUE) } ``` @@ -300,11 +292,11 @@ writeLines(c( "{", ' "$schema": "http://json-schema.org/draft-07/schema#",', "", - ' "type": "object",', - ' "additionalProperties": {', + ' "type": "array",', + ' "items": {', ' "type": "object",', ' "properties": {', - ' "query": {', + ' "sql": {', ' "type": "string"', " },", ' "rows": {', @@ -313,11 +305,11 @@ writeLines(c( ' "cols": {', ' "type": "array",', ' "items": {', - ' "type": "character"', + ' "type": "string"', " }", " }", " },", - ' "required": ["query", "rows", "cols"],', + ' "required": ["sql", "rows", "cols"],', ' "additionalProperties": false', " }", "}"), @@ -327,7 +319,18 @@ json_output(readLines(path_schema)) We save this file as `inst/schema.json` within the package (any path within `inst` is fine). -Now, when we register the plugin, we provide the path to this schema, along with the serialisation function: +Finally, we can also add a deserialiation hook to convert the loaded metadata into a nice `data.frame`: + +```{r, echo = FALSE, results = "asis"} +db_deserialise <- function(data) { + data.frame( + sql = vapply(data, "[[", character(1), "sql"), + rows = vapply(data, "[[", numeric(1), "rows"), + cols = I(lapply(data, "[[", "cols"))) +} +``` + +Now, when we register the plugin, we provide the path to this schema, along with the serialisation and deserialisation functions: ```{r export_to_package = "full", eval = FALSE} .onLoad <- function(...) { @@ -335,6 +338,7 @@ Now, when we register the plugin, we provide the path to this schema, along with name = "example.db", config = db_config, serialise = db_serialise, + deserialise = db_deserialise, schema = "schema.json") } ``` @@ -358,6 +362,16 @@ r_output(readLines(file.path(path_pkg, "R/plugin.R"))) (this code could be in any .R file in the package, or across several). +```{r, include = FALSE} +pkgload::load_all(path_pkg) +``` + +```{r} +id <- orderly2::orderly_run("example", root = path_root) +meta <- orderly2::orderly_metadata(id, root = path_root) +meta$custom$example.db +``` + # Potential uses Our need for this functionality are similar to this example - pulling out the database functionality from the original version of orderly into something that is more independent, as it turns out to be useful only in a fraction of orderly use-cases. We can imagine other potential uses though, such as: