From bee18885922e5b67f8989f289b479df58a203671 Mon Sep 17 00:00:00 2001 From: richfitz Date: Fri, 31 May 2024 10:46:30 +0000 Subject: [PATCH] =?UTF-8?q?Deploying=20to=20gh-pages=20from=20@=20mrc-ide/?= =?UTF-8?q?orderly2@9eb12a65bf04458135555d81774901b24a52c22f=20?= =?UTF-8?q?=F0=9F=9A=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- articles/collaboration.html | 16 +-- articles/dependencies.html | 91 ++++++++-------- articles/introduction.html | 171 +++++++++++++++--------------- articles/plugins.html | 17 +-- articles/troubleshooting.html | 12 +-- pkgdown.yml | 2 +- reference/orderly_cleanup.html | 10 +- reference/orderly_config.html | 2 +- reference/orderly_config_set.html | 6 +- reference/orderly_example.html | 4 +- reference/orderly_init.html | 2 +- reference/orderly_list_src.html | 4 +- reference/orderly_run.html | 8 +- search.json | 2 +- 14 files changed, 176 insertions(+), 171 deletions(-) diff --git a/articles/collaboration.html b/articles/collaboration.html index acf939d3..52e1e561 100644 --- a/articles/collaboration.html +++ b/articles/collaboration.html @@ -207,7 +207,7 @@

An example
 orderly2::orderly_init(".")
-##  Created orderly root at '/tmp/RtmpMTfZsy/file1b1d254eff81/alice'
+##  Created orderly root at '/tmp/RtmpXRCMha/file1b3662d2f1fe/alice'
 ##  Wrote '.gitignore'
 orderly2::orderly_list_src()
 ## [1] "data"
@@ -220,11 +220,11 @@

An example
 id <- orderly2::orderly_run("data")
-##  Starting packet 'data' `20240424-151148-abb5c2a3` at 2024-04-24 15:11:48.677576
+##  Starting packet 'data' `20240531-104602-53ab22e4` at 2024-05-31 10:46:02.333439
 ## > orderly2::orderly_artefact("Final data", "data.rds")
 ## > saveRDS(mtcars, "data.rds")
 ##  Finished running data.R
-##  Finished 20240424-151148-abb5c2a3 at 2024-04-24 15:11:48.736665 (0.05908823 secs)
+## Finished 20240531-104602-53ab22e4 at 2024-05-31 10:46:02.390739 (0.05730009 secs)

Perhaps it takes several goes for Alice to be happy with the analysis, but at some point she has something ready to share. She can then “push” the final packet up onto their server:

@@ -234,7 +234,7 @@

An example
 orderly2::orderly_init(".")
-##  Created orderly root at '/tmp/RtmpMTfZsy/file1b1d254eff81/bob'
+##  Created orderly root at '/tmp/RtmpXRCMha/file1b3662d2f1fe/bob'
 ##  Wrote '.gitignore'
 orderly2::orderly_location_add(
@@ -248,7 +248,7 @@ 

An example= "data", options = list(allow_remote = TRUE, pull_metadata = TRUE)) ## id name parameters -## 1 20240424-151148-abb5c2a3 data

+## 1 20240531-104602-53ab22e4 data

Having seen there is a new “data” packet here, he can pull this down locally (TODO: mrc-4414 makes this nicer):

Now Bob is in a position to develop against the same packet that -Alice ran (20240424-151148-abb5c2a3)

+Alice ran (20240531-104602-53ab22e4)

Possible working patterns @@ -416,7 +416,7 @@

Sharing p use_file_store = TRUE, require_complete_tree = TRUE ) -## Created orderly root at '/tmp/RtmpMTfZsy/file1b1d254eff81/sharepoint'

+## Created orderly root at '/tmp/RtmpXRCMha/file1b3662d2f1fe/sharepoint'

Create an orderly store with a file store and a complete tree. See orderly2::orderly_init() for more details.

diff --git a/articles/dependencies.html b/articles/dependencies.html index b2c51270..52b1fc2d 100644 --- a/articles/dependencies.html +++ b/articles/dependencies.html @@ -152,16 +152,17 @@

Basic usevignette("introduction"), to get us started).

 id1 <- orderly2::orderly_run("data")
-##  Starting packet 'data' `20240424-151152-40cb9e6b` at 2024-04-24 15:11:52.256443
+##  Starting packet 'data' `20240531-104605-c78fa402` at 2024-05-31 10:46:05.782817
 ## > d <- read.csv("data.csv")
 ## > d$z <- resid(lm(y ~ x, d))
 ## > saveRDS(d, "data.rds")
 ##  Finished running data.R
-##  Finished 20240424-151152-40cb9e6b at 2024-04-24 15:11:52.292981 (0.03653789 secs)
-id2 <- orderly2::orderly_run("analysis")
-##  Starting packet 'analysis' `20240424-151152-532850fe` at 2024-04-24 15:11:52.327995
+##  Finished 20240531-104605-c78fa402 at 2024-05-31 10:46:05.821991 (0.03917432 secs)
+
+id2 <- orderly2::orderly_run("analysis")
+##  Starting packet 'analysis' `20240531-104605-d93d2c5e` at 2024-05-31 10:46:05.851649
 ## > orderly2::orderly_dependency("data", "latest()", "data.rds")
-##  Depending on data @ `20240424-151152-40cb9e6b` (via latest(name == "data"))
+##  Depending on data @ `20240531-104605-c78fa402` (via latest(name == "data"))
 ## > d <- readRDS("data.rds")
 ## > png("analysis.png")
 ## > plot(y ~ x, d)
@@ -169,14 +170,14 @@ 

Basic use## agg_png ## 2 ## Finished running analysis.R -## Finished 20240424-151152-532850fe at 2024-04-24 15:11:52.448688 (0.1206923 secs)

+## Finished 20240531-104605-d93d2c5e at 2024-05-31 10:46:05.943615 (0.09196615 secs)

When we look at the metadata for the packet created from the analysis report, we can see it has used -20240424-151152-40cb9e6b as its dependency:

-
+20240531-104605-c78fa402 as its dependency:

+
 orderly2::orderly_metadata(id2)$depends
 ##                     packet                  query        files
-## 1 20240424-151152-40cb9e6b latest(name == "data") data.rds....
+## 1 20240531-104605-c78fa402 latest(name == "data") data.rds....

(indeed it had to, there is only one copy of the data packet to pick from).

@@ -194,44 +195,46 @@

Filtering candidates by parameters## └── data ## └── data.R

with src/data/data.R containing:

-
+
 orderly2::orderly_parameters(cyl = NULL)
 d <- mtcars[mtcars$cyl == cyl, ]
 saveRDS(d, "data.rds")

We can run this for several values of cyl:

-
+
 orderly2::orderly_run("data", list(cyl = 4))
-##  Starting packet 'data' `20240424-151152-d21a88f8` at 2024-04-24 15:11:52.823968
+##  Starting packet 'data' `20240531-104606-50d29110` at 2024-05-31 10:46:06.318926
 ##  Parameters:
 ## • cyl: 4
 ## > orderly2::orderly_parameters(cyl = NULL)
 ## > d <- mtcars[mtcars$cyl == cyl, ]
 ## > saveRDS(d, "data.rds")
 ##  Finished running data.R
-##  Finished 20240424-151152-d21a88f8 at 2024-04-24 15:11:52.857697 (0.03372908 secs)
-## [1] "20240424-151152-d21a88f8"
-orderly2::orderly_run("data", list(cyl = 6))
-##  Starting packet 'data' `20240424-151152-e0514143` at 2024-04-24 15:11:52.879508
+##  Finished 20240531-104606-50d29110 at 2024-05-31 10:46:06.351108 (0.03218246 secs)
+## [1] "20240531-104606-50d29110"
+
+orderly2::orderly_run("data", list(cyl = 6))
+##  Starting packet 'data' `20240531-104606-5e3fbe71` at 2024-05-31 10:46:06.371227
 ##  Parameters:
 ## • cyl: 6
 ## > orderly2::orderly_parameters(cyl = NULL)
 ## > d <- mtcars[mtcars$cyl == cyl, ]
 ## > saveRDS(d, "data.rds")
 ##  Finished running data.R
-##  Finished 20240424-151152-e0514143 at 2024-04-24 15:11:52.907932 (0.02842307 secs)
-## [1] "20240424-151152-e0514143"
-orderly2::orderly_run("data", list(cyl = 8))
-##  Starting packet 'data' `20240424-151152-eea3f8b3` at 2024-04-24 15:11:52.935514
+##  Finished 20240531-104606-5e3fbe71 at 2024-05-31 10:46:06.400539 (0.0293119 secs)
+## [1] "20240531-104606-5e3fbe71"
+
+orderly2::orderly_run("data", list(cyl = 8))
+##  Starting packet 'data' `20240531-104606-6bb0c3d6` at 2024-05-31 10:46:06.42374
 ##  Parameters:
 ## • cyl: 8
 ## > orderly2::orderly_parameters(cyl = NULL)
 ## > d <- mtcars[mtcars$cyl == cyl, ]
 ## > saveRDS(d, "data.rds")
 ##  Finished running data.R
-##  Finished 20240424-151152-eea3f8b3 at 2024-04-24 15:11:52.963491 (0.02797675 secs)
-## [1] "20240424-151152-eea3f8b3"
+## Finished 20240531-104606-6bb0c3d6 at 2024-05-31 10:46:06.450406 (0.02666569 secs) +## [1] "20240531-104606-6bb0c3d6"

Our follow-on analysis contains:

-
+
 

Interpreting errors

If your query fails to resolve a candidate it will error:

-
+
 orderly2::orderly_run("analysis", list(cyl = 9000))
-##  Starting packet 'analysis' `20240424-151153-41f38125` at 2024-04-24 15:11:53.260982
+##  Starting packet 'analysis' `20240531-104606-bcf590c3` at 2024-05-31 10:46:06.741264
 ##  Parameters:
 ## • cyl: 9000
 ## > orderly2::orderly_parameters(cyl = NULL)
@@ -281,7 +284,7 @@ 

Interpreting errors## + "latest(parameter:cyl == this:cyl)", ## + "data.rds") ## Error running analysis.R -## Finished 20240424-151153-41f38125 at 2024-04-24 15:11:53.327842 (0.06685972 secs) +## Finished 20240531-104606-bcf590c3 at 2024-05-31 10:46:06.80333 (0.06206608 secs) ## Error in `orderly2::orderly_run()`: ## ! Failed to run report ## Caused by error in `outpack_packet_use_dependency()`: @@ -296,7 +299,7 @@

Interpreting errors
+
 rlang::last_error()$explanation
 ## Evaluated query: 'latest(A && B)' and found 0 packets
 ## • A (parameter:cyl == this:cyl): 0 packets
@@ -313,7 +316,7 @@ 

Interpreting errors
+
 orderly2::orderly_query_explain(
   quote(latest(parameter:cyl == 9000)), name = "data")
 ## Evaluated query: 'latest(A && B)' and found 0 packets
@@ -321,7 +324,7 @@ 

Interpreting errors## ## • B (name == "data"): 3 packets

If you save this object you can explore it in more detail:

-

(this would have worked with rlang::last_error()$explanation$parts$A too).

You can also use orderly2::orderly_metadata_extract to work out what values you might have looked for:

-
+
 orderly2::orderly_metadata_extract(
   name = "data",
   extract = c(cyl = "parameters.cyl is number"))
 ##                         id cyl
-## 1 20240424-151152-d21a88f8   4
-## 2 20240424-151152-e0514143   6
-## 3 20240424-151152-eea3f8b3   8
+## 1 20240531-104606-50d29110 4 +## 2 20240531-104606-5e3fbe71 6 +## 3 20240531-104606-6bb0c3d6 8

Filtering candidates in other ways @@ -365,7 +368,7 @@

Filtering candidates in other wayslatest(parameter:cyl == environment:cyl) to match against whatever value cyl took in the evaluating environment.

Instead of a query, you can provide a single id (e.g, -20240424-151153-1de72a0d), which would mean that even as +20240531-104606-99e93c16), which would mean that even as new copies of the data packet are created, this dependency will always resolve to the same value.

You can chain together logical operations with @@ -373,7 +376,7 @@

Filtering candidates in other ways==, the usual complement of comparison operators will work. So you might have complex queries like

-
+
 latest((parameter:x == 1 || parameter:x == 2) && parameter:y > 10)

but in practice most people have queries that are a series of restrictions with &&.

@@ -409,7 +412,7 @@

Computing de “z”.

Especially if there are only three values and these are hard coded, you might just write it out as

-
+
 orderly2::orderly_dependency("C", quote(latest(parameter:p == "x")),
                              c("data/x.rds" = "result.rds"))
 orderly2::orderly_dependency("C", quote(latest(parameter:p == "y")),
@@ -421,7 +424,7 @@ 

Computing de destination file a different name (so we end up with three files in data/).

You can write this out as a for loop:

-
+
 for (p in c("x", "y", "z")) {
   orderly2::orderly_dependency("C", quote(latest(parameter:p == environment:p)),
                                c("data/${p}.rds" = "result.rds"))
diff --git a/articles/introduction.html b/articles/introduction.html
index 0d772990..634f620e 100644
--- a/articles/introduction.html
+++ b/articles/introduction.html
@@ -121,7 +121,7 @@ 

Creating an empty orderly reposito
 path <- tempfile() # we'll use a temporary directory here - see note below
 orderly2::orderly_init(path)
-##  Created orderly root at '/tmp/RtmpE2ZjAK/file1ba8c5540ac'
+## Created orderly root at '/tmp/RtmpsfXo4L/file1bc046aceb83'

which creates a few files:

## .
 ## ├── .outpack
@@ -183,14 +183,15 @@ 

Creating your first orderly reportorderly2::orderly_run():

 id <- orderly2::orderly_run("incoming_data")
-##  Starting packet 'incoming_data' `20240424-151156-ca4abba3` at 2024-04-24 15:11:56.795397
+##  Starting packet 'incoming_data' `20240531-104610-016da7fc` at 2024-05-31 10:46:10.010346
 ## > d <- read.csv("data.csv")
 ## > d$z <- resid(lm(y ~ x, d))
 ## > saveRDS(d, "data.rds")
 ##  Finished running incoming_data.R
-##  Finished 20240424-151156-ca4abba3 at 2024-04-24 15:11:56.861329 (0.06593227 secs)
-id
-## [1] "20240424-151156-ca4abba3"
+## Finished 20240531-104610-016da7fc at 2024-05-31 10:46:10.070675 (0.06032944 secs)

+
+id
+## [1] "20240531-104610-016da7fc"

The id that is created is a new identifier for the packet that will be both unique among all packets (within reason) and chronologically sortable. A packet that has an id that sorts after @@ -199,7 +200,7 @@

Creating your first orderly report## . ## ├── archive ## │ └── incoming_data -## │ └── 20240424-151156-ca4abba3 +## │ └── 20240531-104610-016da7fc ## │ ├── data.csv ## │ ├── data.rds ## │ └── incoming_data.R @@ -212,7 +213,7 @@

Creating your first orderly report## └── incoming_data.R

A few things have changed here:

    -
  • we have a directory archive/incoming_data/20240424-151156-ca4abba3; +
  • we have a directory archive/incoming_data/20240531-104610-016da7fc; this directory contains
    • the file that was created when we ran the report @@ -241,7 +242,7 @@

      Creating your first orderly reportdata.rds that we created to some location outside of orderly’s control you could do

      -
      +
       dest <- tempfile()
       fs::dir_create(dest)
       orderly2::orderly_copy_files(id, files = c("final.rds" = "data.rds"),
      @@ -268,7 +269,7 @@ 

      Depending on packets from anot ## ├── data.csv ## └── incoming_data.R

      and src/analysis/analysis.R contains:

      -
      +
       orderly2::orderly_dependency("incoming_data", "latest()",
                                    c("incoming.rds" = "data.rds"))
       d <- readRDS("incoming.rds")
      @@ -282,12 +283,12 @@ 

      Depending on packets from anot to make a plot, which we’ve saved as analysis.png.

      We can run this just as before, using orderly2::orderly_run():

      -
      +
       id <- orderly2::orderly_run("analysis")
      -##  Starting packet 'analysis' `20240424-151157-52293830` at 2024-04-24 15:11:57.324472
      +##  Starting packet 'analysis' `20240531-104610-89680745` at 2024-05-31 10:46:10.539819
       ## > orderly2::orderly_dependency("incoming_data", "latest()",
       ## +                              c("incoming.rds" = "data.rds"))
      -##  Depending on incoming_data @ `20240424-151156-ca4abba3` (via latest(name == "incoming_data"))
      +##  Depending on incoming_data @ `20240531-104610-016da7fc` (via latest(name == "incoming_data"))
       ## > d <- readRDS("incoming.rds")
       ## > png("analysis.png")
       ## > plot(y ~ x, d)
      @@ -295,7 +296,7 @@ 

      Depending on packets from anot ## agg_png ## 2 ## Finished running analysis.R -## Finished 20240424-151157-52293830 at 2024-04-24 15:11:57.409331 (0.08485889 secs)

      +## Finished 20240531-104610-89680745 at 2024-05-31 10:46:10.619527 (0.07970786 secs)

      For more information on dependencies, see vignette("dependencies").

      @@ -354,7 +355,7 @@

      Available in-report orderly comman a report that returns information about the currently running report (its id, resolved dependencies etc).

      Let’s add some additional annotations to the previous reports:

      -
      +
       orderly2::orderly_strict_mode()
       orderly2::orderly_resource("data.csv")
       orderly2::orderly_artefact("Processed data", "data.rds")
      @@ -367,9 +368,9 @@ 

      Available in-report orderly comman is enabled (see below) then orderly2::orderly_resource calls must go before the files are used as they will only be made available at that point (see below).

      -
      +
       id <- orderly2::orderly_run("incoming_data")
      -##  Starting packet 'incoming_data' `20240424-151157-9e2c72c1` at 2024-04-24 15:11:57.620057
      +##  Starting packet 'incoming_data' `20240531-104610-d43a7e7d` at 2024-05-31 10:46:10.830976
       ## > orderly2::orderly_strict_mode()
       ## > orderly2::orderly_resource("data.csv")
       ## > orderly2::orderly_artefact("Processed data", "data.rds")
      @@ -377,7 +378,7 @@ 

      Available in-report orderly comman ## > d$z <- resid(lm(y ~ x, d)) ## > saveRDS(d, "data.rds") ## Finished running incoming_data.R -## Finished 20240424-151157-9e2c72c1 at 2024-04-24 15:11:57.651862 (0.0318048 secs)

      +## Finished 20240531-104610-d43a7e7d at 2024-05-31 10:46:10.857081 (0.0261054 secs)

      Parameterised reports @@ -389,7 +390,7 @@

      Parameterised reports

      For example, consider a simple report where we generate samples based on some parameter:

      -
      +
       orderly2::orderly_parameters(n_samples = 10)
       x <- seq_len(n_samples)
       d <- data.frame(x = x, y = x + rnorm(n_samples))
      @@ -397,11 +398,11 @@ 

      Parameterised reportsThis creates a report that has a single parameter n_samples with a default value of 10. We could have used

      -
      +
       

      to define a parameter with no default, or defined multiple parameters with

      -
      +
       orderly2::orderly_parameters(n_samples = 10, distribution = "normal")

      You can do anything in your report that switches on the value of a parameter:

      @@ -417,9 +418,9 @@

      Parameterised reports -
      +
       id <- orderly2::orderly_run("random", list(n_samples = 15))
      -##  Starting packet 'random' `20240424-151157-dd5418e7` at 2024-04-24 15:11:57.86802
      +##  Starting packet 'random' `20240531-104611-127add53` at 2024-05-31 10:46:11.075315
       ##  Parameters:
       ## • n_samples: 15
       ## > orderly2::orderly_parameters(n_samples = 10)
      @@ -427,10 +428,10 @@ 

      Parameterised reports## > d <- data.frame(x = x, y = x + rnorm(n_samples)) ## > saveRDS(d, "data.rds") ## Finished running random.R -## Finished 20240424-151157-dd5418e7 at 2024-04-24 15:11:57.903692 (0.03567195 secs)

      +## Finished 20240531-104611-127add53 at 2024-05-31 10:46:11.103935 (0.02861929 secs)

      Our resulting file has 15 rows, as the parameter we passed in affected the report:

      -
      +
       orderly2::orderly_copy_files(id, files = c("random.rds" = "data.rds"),
                                    dest = dest)
       readRDS(file.path(dest, "random.rds"))
      @@ -452,24 +453,24 @@ 

      Parameterised reports## 15 15 16.8885049

      You can use these parameters in orderly’s search functions. For example we can find the most recent version of a packet by running:

      -
      +
       orderly2::orderly_search('latest(name == "random")')
      -## [1] "20240424-151157-dd5418e7"
      +## [1] "20240531-104611-127add53"

      But we can also pass in parameter queries here:

      -
      +
       orderly2::orderly_search('latest(name == "random" && parameter:n_samples > 10)')
      -## [1] "20240424-151157-dd5418e7"
      +## [1] "20240531-104611-127add53"

      These can be used within orderly2::orderly_dependency() (the name == "random" part is implied by the first name argument), for example

      -
      +
       orderly2::orderly_dependency("random", "latest(parameter:n_samples > 10)",
                                    c("randm.rds" = "data.rds"))

      In this case if the report that you are querying from also has parameters you can use these within the query, using the this prefix. So suppose our downstream report simply uses n for the number of samples we might write:

      -
      +
       orderly2::orderly_dependency("random", "latest(parameter:n_samples == this:n)",
                                    c("randm.rds" = "data.rds"))

      to depend on the most recent packet called random where @@ -490,21 +491,21 @@

      Shared resources## . ## ├── archive ## │ ├── analysis -## │ │ └── 20240424-151157-52293830 +## │ │ └── 20240531-104610-89680745 ## │ │ ├── analysis.R ## │ │ ├── analysis.png ## │ │ └── incoming.rds ## │ ├── incoming_data -## │ │ ├── 20240424-151156-ca4abba3 +## │ │ ├── 20240531-104610-016da7fc ## │ │ │ ├── data.csv ## │ │ │ ├── data.rds ## │ │ │ └── incoming_data.R -## │ │ └── 20240424-151157-9e2c72c1 +## │ │ └── 20240531-104610-d43a7e7d ## │ │ ├── data.csv ## │ │ ├── data.rds ## │ │ └── incoming_data.R ## │ └── random -## │ └── 20240424-151157-dd5418e7 +## │ └── 20240531-104611-127add53 ## │ ├── data.rds ## │ └── random.R ## ├── draft @@ -524,7 +525,7 @@

      Shared resources## └── random.R

      We can then write an orderly report use_shared that uses this shared file, with its use_shared.R containing:

      -
      +
       orderly2::orderly_shared_resource("data.csv")
       orderly2::orderly_artefact("analysis", "analysis.png")
       
      @@ -533,9 +534,9 @@ 

      Shared resourcesplot(y ~ x, d) dev.off()

      We can run this:

      -
      +
       id <- orderly2::orderly_run("use_shared")
      -##  Starting packet 'use_shared' `20240424-151158-6420b350` at 2024-04-24 15:11:58.394643
      +##  Starting packet 'use_shared' `20240531-104611-9015f76f` at 2024-05-31 10:46:11.56592
       ## > orderly2::orderly_shared_resource("data.csv")
       ## > orderly2::orderly_artefact("analysis", "analysis.png")
       ## > d <- read.csv("data.csv")
      @@ -545,11 +546,11 @@ 

      Shared resources## agg_png ## 2 ## Finished running use_shared.R -## Finished 20240424-151158-6420b350 at 2024-04-24 15:11:58.432082 (0.03743935 secs)

      +## Finished 20240531-104611-9015f76f at 2024-05-31 10:46:11.599199 (0.03327847 secs)

      In the resulting archive, the file that was used from the shared directory is present:

      ## archive/use_shared
      -## └── 20240424-151158-6420b350
      +## └── 20240531-104611-9015f76f
       ##     ├── analysis.png
       ##     ├── data.csv
       ##     └── use_shared.R
      @@ -567,7 +568,7 @@

      Strict mode
      +
       

      anywhere within your orderly file (conventionally at the top). We may make this more granular in future, but by adding this we:

      @@ -600,7 +601,7 @@

      Interactive developmentincoming_data/incoming_data.R script, we would leave behind generated files. We can report on this with orderly2::orderly_cleanup_status:

      -
      +
       orderly2::orderly_cleanup_status("incoming_data")
       ##  incoming_data is not clean:
       ##  1 file can be deleted by running 'orderly2::orderly_cleanup("incoming_data")':
      @@ -609,7 +610,7 @@ 

      Interactive development

      You can clean up generated files by running (as suggested in the message):

      -
      +
       orderly2::orderly_cleanup("incoming_data")
       ##  Deleting 1 file from 'incoming_data':
       ## • data.rds
      @@ -617,7 +618,7 @@

      Interactive development

      You can also keep these files out of git by using the orderly2::orderly_gitignore_update function:

      -
      +
       orderly2::orderly_gitignore_update("incoming_data")
       ##  Wrote 'src/incoming_data/.gitignore'

      This creates (or updates) a .gitignore file within the @@ -638,54 +639,54 @@

      Deleting things from the archive

      At the moment, we have two copies of the incoming_data task:

      -
      +
       orderly2::orderly_metadata_extract(
         name = "incoming_data",
         extract = c(time = "time.start"))
       ##                         id                time
      -## 1 20240424-151156-ca4abba3 2024-04-24 15:11:56
      -## 2 20240424-151157-9e2c72c1 2024-04-24 15:11:57
      +## 1 20240531-104610-016da7fc 2024-05-31 10:46:10 +## 2 20240531-104610-d43a7e7d 2024-05-31 10:46:10

      When we run the analysis task, it will pull in the most -recent version (20240424-151157-9e2c72c1). However, if you +recent version (20240531-104610-d43a7e7d). However, if you had deleted this manually (e.g., to save space or accidentally) or corrupted it (e.g., by opening some output in Excel and letting it save changes) it will not be able to be included, and running analysis will fail:

      -
      +
       orderly2::orderly_run("analysis")
      -##  Starting packet 'analysis' `20240424-151159-123f84e6` at 2024-04-24 15:11:59.074683
      +##  Starting packet 'analysis' `20240531-104612-2bc20ffb` at 2024-05-31 10:46:12.174124
       ## > orderly2::orderly_dependency("incoming_data", "latest()",
       ## +                              c("incoming.rds" = "data.rds"))
       ##  Error running analysis.R
      -##  Finished 20240424-151159-123f84e6 at 2024-04-24 15:11:59.186806 (0.1121228 secs)
      +##  Finished 20240531-104612-2bc20ffb at 2024-05-31 10:46:12.251796 (0.07767177 secs)
       ## Error in `orderly2::orderly_run()`:
       ## ! Failed to run report
       ## Caused by error in `orderly_copy_files()`:
      -## ! Unable to copy files, due to deleted packet 20240424-151157-9e2c72c1
      -##  Consider 'orderly2::orderly_validate_archive("20240424-151157-9e2c72c1",
      +## ! Unable to copy files, due to deleted packet 20240531-104610-d43a7e7d
      +##  Consider 'orderly2::orderly_validate_archive("20240531-104610-d43a7e7d",
       ##   action = "orphan")' to remove this packet from consideration
       ## Caused by error:
       ## ! File not found in archive
       ##  data.rds

      The error here tries to be fairly informative, telling us that we failed because when copying files from -20240424-151157-9e2c72c1 we found that the packet was +20240531-104610-d43a7e7d we found that the packet was corrupt, because the file data.rds was not found in the archive. It also suggests a fix; we can tell orderly2 that -20240424-151157-9e2c72c1 is “orphaned” and should not be +20240531-104610-d43a7e7d is “orphaned” and should not be considered for inclusion when we look for dependencies.

      We can carry out the suggestion and just validate this packet by running

      -
      -orderly2::orderly_validate_archive("20240424-151157-9e2c72c1", action = "orphan")
      -

      or we can validate all the packets we have:

      +orderly2::orderly_validate_archive("20240531-104610-d43a7e7d", action = "orphan")
      +

      or we can validate all the packets we have:

      +
       orderly2::orderly_validate_archive(action = "orphan")
      -##  20240424-151156-ca4abba3 (incoming_data) is valid
      -##  20240424-151157-52293830 (analysis) is valid
      -##  20240424-151157-9e2c72c1 (incoming_data) is invalid due to its files
      -##  20240424-151157-dd5418e7 (random) is valid
      -##  20240424-151158-6420b350 (use_shared) is valid
      +## 20240531-104610-016da7fc (incoming_data) is valid +## 20240531-104610-89680745 (analysis) is valid +## 20240531-104610-d43a7e7d (incoming_data) is invalid due to its files +## 20240531-104611-127add53 (random) is valid +## 20240531-104611-9015f76f (use_shared) is valid

      If we had the option core.require_complete_tree enabled, then this process would also look for any packets that used our now-deleted packet and orphan those too, as we no longer have a complete @@ -693,7 +694,7 @@

      Deleting things from the archiveIf you want to remove references to the orphaned packets, you can use orderly2::orderly_prune_orphans() to remove them entirely:

      -
      +
       orderly2::orderly_prune_orphans()
       ##  Pruning 1 orphan packet
      @@ -772,16 +773,16 @@

      Interaction with the outpack store## │ └── outpack.rds ## ├── location ## │ ├── local -## │ │ ├── 20240424-151156-ca4abba3 -## │ │ ├── 20240424-151157-52293830 -## │ │ ├── 20240424-151157-dd5418e7 -## │ │ └── 20240424-151158-6420b350 +## │ │ ├── 20240531-104610-016da7fc +## │ │ ├── 20240531-104610-89680745 +## │ │ ├── 20240531-104611-127add53 +## │ │ └── 20240531-104611-9015f76f ## │ └── orphan ## └── metadata -## ├── 20240424-151156-ca4abba3 -## ├── 20240424-151157-52293830 -## ├── 20240424-151157-dd5418e7 -## └── 20240424-151158-6420b350

      +## ├── 20240531-104610-016da7fc +## ├── 20240531-104610-89680745 +## ├── 20240531-104611-127add53 +## └── 20240531-104611-9015f76f

      As can be perhaps inferred from the filenames, the files .outpack/metadata/<packet-id> are the metadata for each packet as it has been run. The files @@ -797,8 +798,6 @@

      Interaction with the outpack store## ├── config.json ## ├── files ## │ └── sha256 -## │ ├── 07 -## │ │ └── 23503a5d442825e689a02210f23fcda8a8add4bfccfb7b2968d15e5ac53131 ## │ ├── 0a ## │ │ └── a82571c21c4e5f1f435e8bef2328dda5ef47e177d78d63d1c4ec647a5a388a ## │ ├── 25 @@ -811,26 +810,28 @@

      Interaction with the outpack store## │ │ └── 96f49230c2791c05706f24cb2335cd0fad5d3625dc6bca124c44a51857f3f8 ## │ ├── a6 ## │ │ └── 80ab7c65a52327a3d9c5499d114f513f18eabe7f63a98f9fc308c2b3744c82 -## │ ├── d2 -## │ │ └── d6e26133ae17ee798eefbf56b0a67401c5969f121500680c398cfd6811f3eb ## │ ├── d9 ## │ │ └── 1699ae410cbd811e1f028f8a732e5162b7df854eec08d921141f965851272d -## │ └── ec -## │ └── b53285781a4d36c65168c80ee14f2af2c885423c6166b9425f40c3c6cd8297 +## │ ├── e4 +## │ │ └── 85ccf56a34751880f8ce2303a1f87ea4bc3ec630204663cd7e8e91b9530b7d +## │ ├── ec +## │ │ └── b53285781a4d36c65168c80ee14f2af2c885423c6166b9425f40c3c6cd8297 +## │ └── ee +## │ └── 616d4356d967a761be8523cda8836ad981865f3531829b12ee4321266e2a10 ## ├── index ## │ └── outpack.rds ## ├── location ## │ ├── local -## │ │ ├── 20240424-151156-ca4abba3 -## │ │ ├── 20240424-151157-52293830 -## │ │ ├── 20240424-151157-dd5418e7 -## │ │ └── 20240424-151158-6420b350 +## │ │ ├── 20240531-104610-016da7fc +## │ │ ├── 20240531-104610-89680745 +## │ │ ├── 20240531-104611-127add53 +## │ │ └── 20240531-104611-9015f76f ## │ └── orphan ## └── metadata -## ├── 20240424-151156-ca4abba3 -## ├── 20240424-151157-52293830 -## ├── 20240424-151157-dd5418e7 -## └── 20240424-151158-6420b350

      +## ├── 20240531-104610-016da7fc +## ├── 20240531-104610-89680745 +## ├── 20240531-104611-127add53 +## └── 20240531-104611-9015f76f

      The files under .outpack/files/ should never be modified or deleted. This approach to storage naturally deduplicates the file archive, so that a large file used in many places is only ever stored diff --git a/articles/plugins.html b/articles/plugins.html index 0f7f42d8..68e766ca 100644 --- a/articles/plugins.html +++ b/articles/plugins.html @@ -151,7 +151,7 @@

      An example
      minimum_orderly_version: 1.99.0
       plugins:
         example.db:
      -    path: /tmp/Rtmp2G7QEn/file1c9c2b7ca4e0

      + path: /tmp/Rtmp5h4cmt/file1cb33a833731

      Our plugin is called example.db and is listed within the plugins section, along with its configuration; in this case indicating the path where the SQLite file can be loaded from.

      @@ -303,13 +303,13 @@

      Trying it out
       orderly2::orderly_run("example", root = path_root)
      -##  Starting packet 'example' `20240424-151208-3cca1390` at 2024-04-24 15:12:08.242349
      +##  Starting packet 'example' `20240531-104620-68bd14e0` at 2024-05-31 10:46:20.413571
       ## > dat <- example.db::query("SELECT * FROM mtcars WHERE cyl == 4")
       ## > orderly2::orderly_artefact("Summary of data", "data.rds")
       ## > saveRDS(summary(dat), "data.rds")
       ##  Finished running example.R
      -##  Finished 20240424-151208-3cca1390 at 2024-04-24 15:12:08.418706 (0.1763575 secs)
      -## [1] "20240424-151208-3cca1390"

      +## Finished 20240531-104620-68bd14e0 at 2024-05-31 10:46:20.507448 (0.09387755 secs) +## [1] "20240531-104620-68bd14e0"

      @@ -449,7 +449,7 @@

      Saving metadata about what th
      ## .
       ## ├── archive
       ## │   └── example
      -## │       └── 20240424-151208-3cca1390
      +## │       └── 20240531-104620-68bd14e0
       ## │           ├── data.rds
       ## │           └── example.R
       ## ├── draft
      @@ -501,13 +501,14 @@ 

      Saving metadata about what th several).

       id <- orderly2::orderly_run("example", root = path_root)
      -##  Starting packet 'example' `20240424-151209-4cbf368f` at 2024-04-24 15:12:09.303066
      +##  Starting packet 'example' `20240531-104621-72bd98ec` at 2024-05-31 10:46:21.451365
       ## > dat <- example.db::query("SELECT * FROM mtcars WHERE cyl == 4")
       ## > orderly2::orderly_artefact("Summary of data", "data.rds")
       ## > saveRDS(summary(dat), "data.rds")
       ##  Finished running example.R
      -##  Finished 20240424-151209-4cbf368f at 2024-04-24 15:12:09.344407 (0.04134059 secs)
      -meta <- orderly2::orderly_metadata(id, root = path_root)
      +##  Finished 20240531-104621-72bd98ec at 2024-05-31 10:46:21.497601 (0.04623556 secs)
      +
      +meta <- orderly2::orderly_metadata(id, root = path_root)
       meta$custom$example.db
       ##                                   sql rows         cols
       ## 1 SELECT * FROM mtcars WHERE cyl == 4   11 mpg, cyl....
      diff --git a/articles/troubleshooting.html b/articles/troubleshooting.html index d925503a..f1c8cc3a 100644 --- a/articles/troubleshooting.html +++ b/articles/troubleshooting.html @@ -130,21 +130,21 @@

      Outpack files accidentally ## <https://mrc-ide.github.io/orderly2/articles/troubleshooting.html> ## This warning is displayed once per session.

      ##  Wrote '.gitignore'
      -
      ##  Starting packet 'data' `20240424-151216-56be58ed` at 2024-04-24 15:12:16.342203
      +
      ##  Starting packet 'data' `20240531-104627-7631c63f` at 2024-05-31 10:46:27.464741
      ## > orderly2::orderly_artefact("Final data", "data.rds")
       ## > saveRDS(mtcars, "data.rds")
      ##  Finished running data.R
      -
      ##  Finished 20240424-151216-56be58ed at 2024-04-24 15:12:16.382838 (0.04063487 secs)
      -
      ## [1] "20240424-151216-56be58ed"
      +
      ##  Finished 20240531-104627-7631c63f at 2024-05-31 10:46:27.501524 (0.0367837 secs)
      +
      ## [1] "20240531-104627-7631c63f"

      subsequent calls will not display the warning:

       orderly2::orderly_run("data")
      -
      ##  Starting packet 'data' `20240424-151216-7966dbcd` at 2024-04-24 15:12:16.477657
      +
      ##  Starting packet 'data' `20240531-104627-95ec4609` at 2024-05-31 10:46:27.588723
      ## > orderly2::orderly_artefact("Final data", "data.rds")
       ## > saveRDS(mtcars, "data.rds")
      ##  Finished running data.R
      -
      ##  Finished 20240424-151216-7966dbcd at 2024-04-24 15:12:16.501315 (0.02365756 secs)
      -
      ## [1] "20240424-151216-7966dbcd"
      +
      ##  Finished 20240531-104627-95ec4609 at 2024-05-31 10:46:27.613544 (0.02482033 secs)
      +
      ## [1] "20240531-104627-95ec4609"

      The rest of this section discusses how you might permanently fix the issue.

      diff --git a/pkgdown.yml b/pkgdown.yml index 7b764574..2300a723 100644 --- a/pkgdown.yml +++ b/pkgdown.yml @@ -11,7 +11,7 @@ articles: plugins: plugins.html query: query.html troubleshooting: troubleshooting.html -last_built: 2024-04-24T15:11Z +last_built: 2024-05-31T10:45Z urls: reference: https://mrc-ide.github.io/orderly2/reference article: https://mrc-ide.github.io/orderly2/articles diff --git a/reference/orderly_cleanup.html b/reference/orderly_cleanup.html index e11d461e..ea5d0357 100644 --- a/reference/orderly_cleanup.html +++ b/reference/orderly_cleanup.html @@ -150,24 +150,24 @@

      Notes for user of orderly1Examples

      # Create a simple example:
       path <- orderly2::orderly_example("default")
      -#>  Created orderly root at '/tmp/RtmpHs4Wii/file181b538e7daf'
      +#>  Created orderly root at '/tmp/RtmpEXni4X/file1830235c478'
       
       # We simulate running a packet interactively by using 'source';
       # you might have run this line-by-line, or with the "Source"
       # button in Rstudio.
       source(file.path(path, "src/data/data.R"), chdir = TRUE)
      -#> Warning: cannot open file '/tmp/RtmpHs4Wii/file181b538e7daf/src/data/data.R': No such file or directory
      +#> Warning: cannot open file '/tmp/RtmpEXni4X/file1830235c478/src/data/data.R': No such file or directory
       #> Error in file(filename, "r", encoding = encoding): cannot open the connection
       
       # Having run this, the output of the report is present in the
       # source directory:
       fs::dir_tree(path)
      -#> Error: [ENOENT] Failed to search directory '/tmp/RtmpHs4Wii/file181b538e7daf': no such file or directory
      +#> Error: [ENOENT] Failed to search directory '/tmp/RtmpEXni4X/file1830235c478': no such file or directory
       
       # We can detect what might want cleaning up by running
       # "orderly_cleanup_status":
       orderly2::orderly_cleanup_status("data", root = path)
      -#> Error: Directory does not exist: '/tmp/RtmpHs4Wii/file181b538e7daf'
      +#> Error: Directory does not exist: '/tmp/RtmpEXni4X/file1830235c478'
       
       # Soon this will print more nicely to the screen, but for now you
       # can see that the status of "data.rds" is "derived", which means
      @@ -176,7 +176,7 @@ 

      Examples # Do the actual deletion: orderly2::orderly_cleanup("data", root = path) -#> Error: Directory does not exist: '/tmp/RtmpHs4Wii/file181b538e7daf' +#> Error: Directory does not exist: '/tmp/RtmpEXni4X/file1830235c478'

      diff --git a/reference/orderly_list_src.html b/reference/orderly_list_src.html index 6099600c..a8f3420b 100644 --- a/reference/orderly_list_src.html +++ b/reference/orderly_list_src.html @@ -114,9 +114,9 @@

      See also

      Examples

      path <- orderly2::orderly_example("default")
      -#>  Created orderly root at '/tmp/RtmpHs4Wii/file181b17927cd4'
      +#>  Created orderly root at '/tmp/RtmpEXni4X/file1830341016cc'
       orderly2::orderly_list_src(root = path)
      -#> Error: Directory does not exist: '/tmp/RtmpHs4Wii/file181b17927cd4'
      +#> Error: Directory does not exist: '/tmp/RtmpEXni4X/file1830341016cc'