From 65041d5015f0bb1770ea75b9982bf1d6aaf0a733 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Mon, 20 Nov 2023 09:15:11 +0000 Subject: [PATCH 1/4] Set wd rather than using root= in vignettes --- vignettes/dependencies.Rmd | 49 +++++++++------- vignettes/introduction.Rmd | 103 ++++++++++++++++++++-------------- vignettes/plugins.Rmd | 2 +- vignettes/query.Rmd | 6 +- vignettes/troubleshooting.Rmd | 22 ++++++-- 5 files changed, 109 insertions(+), 73 deletions(-) diff --git a/vignettes/dependencies.Rmd b/vignettes/dependencies.Rmd index 86e4c78b..02d63d73 100644 --- a/vignettes/dependencies.Rmd +++ b/vignettes/dependencies.Rmd @@ -28,6 +28,16 @@ inline <- function(x) { knitr::opts_chunk$set( collapse = TRUE) + +.here <- getwd() +knitr::knit_hooks$set(inwd = function(before, options) { + if (before) { + setwd(options$inwd) + } else { + setwd(.here) + } + invisible() +}) ``` One of the core aims of `orderly2` is to allow collaborative analysis; to do this the end of one piece of work is an input for another piece of work, perhaps someone else's. To make this work in practice, one `orderly2` report can "depend" on some completed packet (or several completed packets) in order to pull in files as inputs. @@ -87,15 +97,15 @@ r_output(readLines(file.path(path, "src/analysis/orderly.R"))) Here, we've used `orderly2::orderly_dependency()` to pull in the file `data.rds` from the most recent version (`latest()`) of the `data` packet, then we've used that file as normal to make a plot, which we've saved as `analysis.png` (this is very similar to the example from `vignette("introduction")`, to get us started). -```{r} -id1 <- orderly2::orderly_run("data", root = path) -id2 <- orderly2::orderly_run("analysis", root = path) +```{r, inwd = path} +id1 <- orderly2::orderly_run("data") +id2 <- orderly2::orderly_run("analysis") ``` When we look at the metadata for the packet created from the `analysis` report, we can see it has used `r inline(id1)` as its dependency: -```{r} -orderly2::orderly_metadata(id2, root = path)$depends +```{r, inwd = path} +orderly2::orderly_metadata(id2)$depends ``` (indeed it had to, there is only one copy of the `data` packet to pick from). @@ -141,10 +151,10 @@ r_output(readLines(file.path(path, "src/data/orderly.R"))) We can run this for several values of `cyl`: -```{r} -orderly2::orderly_run("data", list(cyl = 4), root = path) -orderly2::orderly_run("data", list(cyl = 6), root = path) -orderly2::orderly_run("data", list(cyl = 8), root = path) +```{r, inwd = path} +orderly2::orderly_run("data", list(cyl = 4)) +orderly2::orderly_run("data", list(cyl = 6)) +orderly2::orderly_run("data", list(cyl = 8)) ``` Our follow-on analysis contains: @@ -155,16 +165,16 @@ r_output(readLines(file.path(path, "src/analysis/orderly.R"))) Here the query `latest(parameter:cyl == this:cyl)` says "find the most recent packet where it's parameter "cyl" (`parameter:cyl`) is the same as the parameter in the currently running report (`this:cyl`). -```{r} -orderly2::orderly_run("analysis", list(cyl = 4), root = path) +```{r, inwd = path} +orderly2::orderly_run("analysis", list(cyl = 4)) ``` ## Interpreting errors If your query fails to resolve a candidate it will error: -```{r, error = TRUE} -orderly2::orderly_run("analysis", list(cyl = 9000), root = path) +```{r, error = TRUE, inwd = path} +orderly2::orderly_run("analysis", list(cyl = 9000)) ``` The error message here tries to be fairly self explanatory; we have failed to find a packet that satisfies our query` latest(parameter:cyl == this:cyl && name == "data")`; note that the report name `data` has become part of this query, so there are two conditions being matched on. @@ -181,16 +191,16 @@ This tells you that your query can be decomposed into two subqueries `A` (the ma You can also ask `orderly2` to explain any query for you: -```{r} +```{r, inwd = path} orderly2::orderly_query_explain( - quote(latest(parameter:cyl == 9000)), name = "data", root = path) + quote(latest(parameter:cyl == 9000)), name = "data") ``` If you save this object you can explore it in more detail: -```{r} +```{r, inwd = path} explanation <- orderly2::orderly_query_explain( - quote(latest(parameter:cyl == 9000)), name = "data", root = path) + quote(latest(parameter:cyl == 9000)), name = "data") explanation$parts$B ``` @@ -198,11 +208,10 @@ explanation$parts$B You can also use `orderly2::orderly_metadata_extract` to work out what values you might have looked for: -```{r} +```{r, inwd = path} orderly2::orderly_metadata_extract( name = "data", - extract = c(cyl = "parameters.cyl is number"), - root = path) + extract = c(cyl = "parameters.cyl is number")) ``` ## Filtering candidates in other ways diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd index 48fdf074..cedc00cf 100644 --- a/vignettes/introduction.Rmd +++ b/vignettes/introduction.Rmd @@ -28,6 +28,16 @@ inline <- function(x) { knitr::opts_chunk$set( collapse = TRUE) + +.here <- getwd() +knitr::knit_hooks$set(inwd = function(before, options) { + if (before) { + setwd(options$inwd) + } else { + setwd(.here) + } + invisible() +}) ``` This vignette provides a how-to style introduction to `orderly2`, an overview of key ingredients to writing orderly reports, and a summary of key features and ideas. It may be useful to look at `vignette("orderly2")` for a more roundabout discussion of what `orderly2` is trying to achieve, or `vignette("migrating")` if you are familiar with version 1 of orderly as this explains concepts in terms of differences from the previous version. @@ -39,7 +49,7 @@ The first step is to initialise an empty `orderly2` repository. An `orderly2` re Create an orderly2 repository by calling `orderly2::orderly_init()`: ```{r} -path <- tempfile() # we'll use a temporary directory here +path <- tempfile() # we'll use a temporary directory here - see note below orderly2::orderly_init(path) ``` @@ -49,7 +59,7 @@ which creates a few files: dir_tree(path, all = TRUE) ``` -This step should be performed on a completely empty directory, otherwise an error will be thrown. Later, you will re-initialise an `orderly2` repository when cloning to a new machine, such as when working with others; this is discussed in `vignette("collaboration")`l. +This step should be performed on a completely empty directory, otherwise an error will be thrown. Later, you will re-initialise an `orderly2` repository when cloning to a new machine, such as when working with others; this is discussed in `vignette("collaboration")`. The `orderly_config.yml` file contains very little by default: @@ -57,6 +67,19 @@ The `orderly_config.yml` file contains very little by default: yaml_output(readLines(file.path(path, "orderly_config.yml"))) ``` +For this vignette, the created orderly root is in R's per-session temporary directory, which will be deleted once R exits. If you want to use a directory that will persist across restarting R (which you would certainly want when using `orderly2` on a real project!) you should replace this with a path within your home directory, or other location that you control. + + + +For the rest of the vignette we will evaluate commands from within this directory, by changing the directory to the path we've created: + +```r +setwd(path) +``` + # Creating your first orderly report An orderly report is a directory `src/` containing a file `orderly.R`. That file may have special commands in it, but for now we'll create one that is as simple as possible; we'll create some random data and save it to disk. This seems silly, but imagine this standing in for something like: @@ -93,8 +116,8 @@ r_output(readLines(file.path(path, "src/incoming_data/orderly.R"))) To run the report and create a new **packet**, use `orderly2::orderly_run()`: -```{r} -id <- orderly2::orderly_run("incoming_data", root = path) +```{r, inwd = path} +id <- orderly2::orderly_run("incoming_data") id ``` @@ -120,11 +143,11 @@ That's it! Notice that the initial script is just a plain R script, and you can Once created, you can then refer to this report by id and pull its files wherever you need them, both in the context of another orderly report or just to copy to your desktop to email someone. For example, to copy the file `data.rds` that we created to some location outside of orderly's control you could do -```{r} +```{r, inwd = path} dest <- tempfile() fs::dir_create(dest) orderly2::orderly_copy_files(id, files = c("final.rds" = "data.rds"), - dest = dest, root = path) + dest = dest) ``` which copies `data.rds` to some new temporary directory `dest` with name `final.rds`. This uses `orderly2`'s `outpack_` functions, which are designed to interact with outpack archives regardless of how they were created (`orderly2` is a program that creates `outpack` archives). Typically these are lower-level than `orderly_` functions. @@ -162,8 +185,8 @@ Here, we've used `orderly2::orderly_dependency()` to pull in the file `data.rds` We can run this just as before, using `orderly2::orderly_run()`: -```{r} -id <- orderly2::orderly_run("analysis", root = path) +```{r, inwd = path} +id <- orderly2::orderly_run("analysis") ``` For more information on dependencies, see `vignette("dependencies")`. @@ -201,8 +224,8 @@ r_output(readLines(file.path(path, "src/incoming_data/orderly.R"))) Here, we've added a block of special orderly commands; these could go anywhere, for example above the files that they refer to. If strict mode is enabled (see below) then `orderly2::orderly_resource` calls must go before the files are used as they will only be made available at that point (see below). -```{r} -id <- orderly2::orderly_run("incoming_data", root = path) +```{r, inwd = path} +id <- orderly2::orderly_run("incoming_data") ``` # Parameterised reports @@ -247,30 +270,28 @@ You can do anything in your report that switches on the value of a parameter: However, you should see parameters as relatively heavyweight things and try to have a consistent set over all packets created from a report. In this report we use it to control the size of the generated data set. -```{r} -id <- orderly2::orderly_run("random", list(n_samples = 15), root = path) +```{r, inwd = path} +id <- orderly2::orderly_run("random", list(n_samples = 15)) ``` Our resulting file has 15 rows, as the parameter we passed in affected the report: -```{r} +```{r, inwd = path} orderly2::orderly_copy_files(id, files = c("random.rds" = "data.rds"), - dest = dest, root = path) + dest = dest) readRDS(file.path(dest, "random.rds")) ``` You can use these parameters in orderly's search functions. For example we can find the most recent version of a packet by running: -```{r} -orderly2::orderly_search('latest(name == "random")', - root = path) +```{r, inwd = path} +orderly2::orderly_search('latest(name == "random")') ``` But we can also pass in parameter queries here: -```{r} -orderly2::orderly_search('latest(name == "random" && parameter:n_samples > 10)', - root = path) +```{r, inwd = path} +orderly2::orderly_search('latest(name == "random" && parameter:n_samples > 10)') ``` These can be used within `orderly2::orderly_dependency()` (the `name == "random"` part is implied by the first `name` argument), for example @@ -324,8 +345,8 @@ r_output(readLines(file.path(path, "src/use_shared/orderly.R"))) We can run this: -```{r} -id <- orderly2::orderly_run("use_shared", root = path) +```{r, inwd = path} +id <- orderly2::orderly_run("use_shared") ``` In the resulting archive, the file that was used from the shared directory is present: @@ -364,24 +385,24 @@ withr::with_dir(file.path(path, "src/incoming_data"), sys.source("orderly.R", new.env(parent = .GlobalEnv))) ``` -```{r} -orderly2::orderly_cleanup_status("incoming_data", root = path) +```{r, inwd = path} +orderly2::orderly_cleanup_status("incoming_data") ``` If you have files here that are unknown to orderly it will tell you about them and prompt you to tell it about them explicitly. You can clean up generated files by running (as suggested in the message): -```{r} -orderly2::orderly_cleanup("incoming_data", root = path) +```{r, inwd = path} +orderly2::orderly_cleanup("incoming_data") ``` There is a `dry_run = TRUE` argument you can pass if you want to see what would be deleted without using the status function. You can also keep these files out of git by using the `orderly2::orderly_gitignore_update` function: -```{r} -orderly2::orderly_gitignore_update("incoming_data", root = path) +```{r, inwd = path} +orderly2::orderly_gitignore_update("incoming_data") ``` This creates (or updates) a `.gitignore` file within the report so that generated files will not be included by git. If you have already accidentally committed them then the gitignore has no real effect and you should do some git surgery, see the git manuals or this [handy, if profane, guide](https://ohshitgit.com/). @@ -392,23 +413,21 @@ If you delete packets from your `archive/` directory then this puts `orderly2` i At the moment, we have two copies of the `incoming_data` task: -```{r} +```{r, inwd = path} orderly2::orderly_metadata_extract( name = "incoming_data", - extract = c(time = "time.start"), - root = path) + extract = c(time = "time.start")) ``` -```{r include = FALSE} -id_latest <- orderly2::orderly_search("latest", name = "incoming_data", - root = path) +```{r include = FALSE, inwd = path} +id_latest <- orderly2::orderly_search("latest", name = "incoming_data") unlink(file.path(path, "archive", "incoming_data", id_latest), recursive = TRUE) ``` When we run the `analysis` task, it will pull in the most recent version (`r inline(id_latest)`). However, if you had deleted this manually (e.g., to save space or accidentally) or corrupted it (e.g., by opening some output in Excel and letting it save changes) it will not be able to be included, and running `analysis` will fail: -```{r error = TRUE} -orderly2::orderly_run("analysis", root = path) +```{r error = TRUE, inwd = path} +orderly2::orderly_run("analysis") ``` The error here tries to be fairly informative, telling us that we failed because when copying files from `r inline(id_latest)` we found that the packet was corrupt, because the file `data.rds` was not found in the archive. It also suggests a fix; we can tell `orderly2` that `r inline(id_latest)` is "orphaned" and should not be considered for inclusion when we look for dependencies. @@ -423,16 +442,16 @@ r_output( or we can validate *all* the packets we have: -```{r} -orderly2::orderly_validate_archive(action = "orphan", root = path) +```{r, inwd = path} +orderly2::orderly_validate_archive(action = "orphan") ``` If we had the option `core.require_complete_tree` enabled, then this process would also look for any packets that used our now-deleted packet and orphan those too, as we no longer have a complete tree that includes them. If you want to remove references to the orphaned packets, you can use `orderly2::orderly_prune_orphans()` to remove them entirely: -```{r} -orderly2::orderly_prune_orphans(root = path) +```{r, inwd = path} +orderly2::orderly_prune_orphans() ``` # Debugging and coping with errors @@ -476,8 +495,8 @@ As can be perhaps inferred from the filenames, the files `.outpack/metadata/" gert::git_commit("initial", author = user, committer = user, repo = path) + +.here <- getwd() +knitr::knit_hooks$set(inwd = function(before, options) { + if (before) { + setwd(options$inwd) + } else { + setwd(.here) + } + invisible() +}) ``` -```{r, error = TRUE} -orderly2::orderly_run("data", root = path) +```{r, error = TRUE, inwd = path} +orderly2::orderly_run("data") ``` which may have directed you to this very page. If you just want to continue working anyway, then run the suggested command: @@ -38,14 +48,14 @@ options(orderly_git_error_is_warning = TRUE) after which things will work with a warning the first time that session: -```{r} -orderly2::orderly_run("data", root = path) +```{r, inwd = path} +orderly2::orderly_run("data") ``` subsequent calls will not display the warning: -```{r} -orderly2::orderly_run("data", root = path) +```{r, inwd = path} +orderly2::orderly_run("data") ``` ```{r, include = FALSE} From e37a7cbc0c5ec9d96ece9e11cdebc8f52166a902 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Mon, 20 Nov 2023 11:24:40 +0000 Subject: [PATCH 2/4] Move common code into helper file --- vignettes/collaboration.Rmd | 27 +-------------------------- vignettes/dependencies.Rmd | 31 +------------------------------ vignettes/introduction.Rmd | 31 +------------------------------ vignettes/plugins.Rmd | 13 ++----------- 4 files changed, 5 insertions(+), 97 deletions(-) diff --git a/vignettes/collaboration.Rmd b/vignettes/collaboration.Rmd index 7d384221..1d983ecc 100644 --- a/vignettes/collaboration.Rmd +++ b/vignettes/collaboration.Rmd @@ -8,23 +8,8 @@ vignette: > --- ```{r, include = FALSE} -dir_tree <- function(path, sub = ".", ...) { - withr::with_dir(path, fs::dir_tree(sub, ...)) -} - -lang_output <- function(x, lang) { - writeLines(c(sprintf("```%s", lang), x, "```")) -} -r_output <- function(x) lang_output(x, "r") -yaml_output <- function(x) lang_output(x, "yaml") -plain_output <- function(x) lang_output(x, "plain") -orderly_file <- function(...) { - system.file(..., package = "orderly2", mustWork = TRUE) -} +source("common.R") -knitr::opts_chunk$set( - collapse = TRUE) - path <- tempfile() fs::dir_create(path) path_git <- file.path(path, "git") @@ -54,16 +39,6 @@ knitr::opts_hooks$set(as = function(options) { options$class.output <- options$as options }) - -.here <- getwd() -knitr::knit_hooks$set(inwd = function(before, options) { - if (before) { - setwd(options$inwd) - } else { - setwd(.here) - } - invisible() -}) ```