From d28636a95726f7bd0bc15a111fb36aca9bf7f8bb Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 21 Nov 2023 15:24:40 +0000 Subject: [PATCH 1/8] Be lazy about reading full metadata in extract --- R/outpack_metadata.R | 5 +++-- R/outpack_tools.R | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/R/outpack_metadata.R b/R/outpack_metadata.R index d8edaac2..00d044d0 100644 --- a/R/outpack_metadata.R +++ b/R/outpack_metadata.R @@ -166,9 +166,10 @@ outpack_metadata_core <- function(id, root, call = NULL) { } + +metadata_core_names <- c("id", "name", "parameters", "time", "files", "depends") outpack_metadata_core_read <- function(path) { - keep <- c("id", "name", "parameters", "time", "files", "depends") - data <- jsonlite::read_json(path)[keep] + data <- jsonlite::read_json(path)[metadata_core_names] outpack_metadata_core_deserialise(data) } diff --git a/R/outpack_tools.R b/R/outpack_tools.R index 1dee6185..1fe3dccb 100644 --- a/R/outpack_tools.R +++ b/R/outpack_tools.R @@ -208,7 +208,13 @@ orderly_metadata_extract <- function(..., extract = NULL, root = NULL, } extract <- parse_extract(extract, environment()) - meta <- lapply(ids, orderly_metadata, root = root) + is_core_metadata <- + vlapply(extract$from, function(el) el[[1]] %in% metadata_core_names) + if (all(is_core_metadata)) { + meta <- lapply(ids, outpack_metadata_core, root = root) + } else { + meta <- lapply(ids, orderly_metadata, root = root) + } envir <- environment() ret <- data_frame(id = ids) From 1c17799f25d11759573aaab249e918f86839b9a7 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 21 Nov 2023 15:40:09 +0000 Subject: [PATCH 2/8] Atomically write the index --- R/outpack_index.R | 2 +- R/util.R | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/R/outpack_index.R b/R/outpack_index.R index 39a7b9f0..4e49ebf1 100644 --- a/R/outpack_index.R +++ b/R/outpack_index.R @@ -69,7 +69,7 @@ index_update <- function(root_path, prev, skip_cache) { if (!identical(data, prev)) { fs::dir_create(dirname(path_index)) - saveRDS(data, path_index) + saverds_atomic(data, path_index) } data diff --git a/R/util.R b/R/util.R index 94502612..a28922f6 100644 --- a/R/util.R +++ b/R/util.R @@ -615,3 +615,12 @@ file_canonical_case <- function(path, workdir) { } paste(ret, collapse = "/") } + + +saverds_atomic <- function(data, path) { + tmp <- tempfile(pattern = sub("\\.rds", "", basename(path)), + tmpdir = dirname(path), + fileext = ".rds") + saveRDS(data, tmp) + fs::file_move(tmp, path) +} From fdaf67cdfd12a710f930b0b42faedc702d6188fa Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 21 Nov 2023 15:40:18 +0000 Subject: [PATCH 3/8] Rebuild the index if it is corrupt on read --- R/outpack_index.R | 5 ++++- tests/testthat/test-outpack-metadata.R | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/R/outpack_index.R b/R/outpack_index.R index 4e49ebf1..0c6c2579 100644 --- a/R/outpack_index.R +++ b/R/outpack_index.R @@ -59,7 +59,10 @@ index_update <- function(root_path, prev, skip_cache) { path_index <- file.path(root_path, ".outpack", "index", "outpack.rds") if (length(prev) == 0 && file.exists(path_index) && !skip_cache) { - prev <- readRDS(path_index) + prev <- tryCatch(readRDS(path_index), error = function(e) { + cli::cli_alert_warning("outpack index corrupted, rebuilding") + NULL + }) } data <- prev diff --git a/tests/testthat/test-outpack-metadata.R b/tests/testthat/test-outpack-metadata.R index 2c932a65..a68606d9 100644 --- a/tests/testthat/test-outpack-metadata.R +++ b/tests/testthat/test-outpack-metadata.R @@ -122,3 +122,12 @@ test_that("Sensible error if metadata file not found", { orderly_metadata(outpack_id(), root), "Packet '.+' not found in outpack index") }) + + +test_that("corrupted metadata is replaced automatically", { + root <- create_temporary_root() + id <- create_random_packet(root) + file.create(file.path(root$path, ".outpack", "index", "outpack.rds")) + expect_message(outpack_index$new(root$path)$refresh(), + "outpack index corrupted, rebuilding") +}) From 2325e19ff690718abe5e9f3f0c573a6b8ca15a04 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Fri, 17 Nov 2023 18:32:27 +0000 Subject: [PATCH 4/8] Add optional progress markers when rebuilding the index --- R/outpack_index.R | 54 +++++++++++++++++++++++++-------- tests/testthat/helper-orderly.R | 3 +- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/R/outpack_index.R b/R/outpack_index.R index 0c6c2579..2c33ee49 100644 --- a/R/outpack_index.R +++ b/R/outpack_index.R @@ -12,13 +12,13 @@ outpack_index <- R6::R6Class( private$path_ <- path }, - rebuild = function() { - private$data_ <- index_update(private$path_, NULL, TRUE) + rebuild = function(progress = NULL) { + private$data_ <- index_update(private$path_, NULL, TRUE, progress) invisible(self) }, - refresh = function() { - private$data_ <- index_update(private$path_, private$data_, FALSE) + refresh = function(progress = NULL) { + private$data_ <- index_update(private$path_, private$data_, FALSE, progress) invisible(self) }, @@ -55,7 +55,8 @@ outpack_index <- R6::R6Class( )) -index_update <- function(root_path, prev, skip_cache) { +index_update <- function(root_path, prev, skip_cache, progress) { + progress <- progress %||% getOption("orderly_index_progress", TRUE) path_index <- file.path(root_path, ".outpack", "index", "outpack.rds") if (length(prev) == 0 && file.exists(path_index) && !skip_cache) { @@ -66,11 +67,14 @@ index_update <- function(root_path, prev, skip_cache) { } data <- prev - data$location <- read_locations(root_path, data$location) - data$metadata <- read_metadata(root_path, data$metadata) + data$metadata <- read_metadata(root_path, data$metadata, progress) + data$location <- read_locations(root_path, data$location, progress) data$unpacked <- data$location$packet[data$location$location == local] if (!identical(data, prev)) { + if (progress) { + cli::cli_progress_message("Writing index to disk") + } fs::dir_create(dirname(path_index)) saverds_atomic(data, path_index) } @@ -79,7 +83,7 @@ index_update <- function(root_path, prev, skip_cache) { } -read_metadata <- function(root_path, prev) { +read_metadata <- function(root_path, prev, progress) { path <- file.path(root_path, ".outpack", "metadata") id_new <- setdiff(dir(path), names(prev)) @@ -87,8 +91,18 @@ read_metadata <- function(root_path, prev) { return(prev) } + if (progress) { + cli::cli_progress_bar("Reading metadata", total = length(id_new)) + } files <- file.path(path, id_new) - new <- lapply(files, outpack_metadata_core_read) + new <- vector("list", length(id_new)) + for (i in seq_along(id_new)) { + new[[i]] <- outpack_metadata_core_read(files[[i]]) + if (progress) { + cli::cli_progress_update() + } + } + names(new) <- id_new ret <- c(prev, new) ret[order(names(ret))] @@ -96,7 +110,7 @@ read_metadata <- function(root_path, prev) { } -read_locations <- function(root_path, prev) { +read_locations <- function(root_path, prev, progress) { if (is.null(prev)) { prev <- data_frame(packet = character(), time = empty_time(), @@ -107,7 +121,8 @@ read_locations <- function(root_path, prev) { location_path <- fs::dir_ls(file.path(root_path, ".outpack", "location"), type = "directory") location_name <- basename(location_path) - new <- do.call(rbind, lapply(location_name, read_location, root_path, prev)) + new <- do.call(rbind, + lapply(location_name, read_location, root_path, prev, progress)) ret <- rbind(prev, new) ## Always sort by location, then id ret <- ret[order(match(ret$location, location_name), ret$packet), ] @@ -117,7 +132,7 @@ read_locations <- function(root_path, prev) { } -read_location <- function(location_name, root_path, prev) { +read_location <- function(location_name, root_path, prev, progress) { path <- file.path(root_path, ".outpack", "location", location_name) packets <- dir(path, re_id) is_new <- !(packets %in% prev$packet[prev$location == location_name]) @@ -125,7 +140,20 @@ read_location <- function(location_name, root_path, prev) { return(NULL) } - dat <- lapply(file.path(path, packets[is_new]), jsonlite::read_json) + id_new <- packets[is_new] + dat <- vector("list", length(id_new)) + files <- file.path(path, id_new) + if (progress) { + cli::cli_progress_bar(sprintf("Reading location '%s'", location_name), + total = length(id_new)) + } + for (i in seq_along(id_new)) { + dat[[i]] <- jsonlite::read_json(files[[i]]) + if (progress) { + cli::cli_progress_update() + } + } + data_frame(packet = vcapply(dat, "[[", "packet"), time = num_to_time(vnapply(dat, "[[", "time")), hash = vcapply(dat, "[[", "hash"), diff --git a/tests/testthat/helper-orderly.R b/tests/testthat/helper-orderly.R index 52e9e57b..9ce5646d 100644 --- a/tests/testthat/helper-orderly.R +++ b/tests/testthat/helper-orderly.R @@ -1,6 +1,7 @@ options(outpack.schema_validate = requireNamespace("jsonvalidate", quietly = TRUE) && - packageVersion("jsonvalidate") >= "1.4.0") + packageVersion("jsonvalidate") >= "1.4.0", + orderly_index_progress = FALSE) test_prepare_orderly_example <- function(examples, ...) { From 713dd02affeb139555a2245faaea5b2f362fe142 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 21 Nov 2023 15:50:22 +0000 Subject: [PATCH 5/8] Tests of progress bars in metadata read --- R/outpack_index.R | 3 --- tests/testthat/test-outpack-index.R | 36 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/test-outpack-index.R diff --git a/R/outpack_index.R b/R/outpack_index.R index 2c33ee49..6ea66c7c 100644 --- a/R/outpack_index.R +++ b/R/outpack_index.R @@ -72,9 +72,6 @@ index_update <- function(root_path, prev, skip_cache, progress) { data$unpacked <- data$location$packet[data$location$location == local] if (!identical(data, prev)) { - if (progress) { - cli::cli_progress_message("Writing index to disk") - } fs::dir_create(dirname(path_index)) saverds_atomic(data, path_index) } diff --git a/tests/testthat/test-outpack-index.R b/tests/testthat/test-outpack-index.R new file mode 100644 index 00000000..3aaaeb87 --- /dev/null +++ b/tests/testthat/test-outpack-index.R @@ -0,0 +1,36 @@ +test_that("reading metadata reports progress if requested", { + root <- create_temporary_root() + ids <- create_random_packet_chain(5, root = root) + + mock_progress_bar <- mockery::mock() + mock_progress_update <- mockery::mock() + mockery::stub(read_metadata, "cli::cli_progress_bar", mock_progress_bar) + mockery::stub(read_metadata, "cli::cli_progress_update", mock_progress_update) + + read_metadata(root$path, NULL, FALSE) + mockery::expect_called(mock_progress_bar, 0) + mockery::expect_called(mock_progress_update, 0) + + read_metadata(root$path, NULL, TRUE) + mockery::expect_called(mock_progress_bar, 1) + mockery::expect_called(mock_progress_update, 5) +}) + + +test_that("reading location reports progress if requested", { + root <- create_temporary_root() + ids <- create_random_packet_chain(5, root = root) + + mock_progress_bar <- mockery::mock() + mock_progress_update <- mockery::mock() + mockery::stub(read_location, "cli::cli_progress_bar", mock_progress_bar) + mockery::stub(read_location, "cli::cli_progress_update", mock_progress_update) + + read_location("local", root$path, NULL, FALSE) + mockery::expect_called(mock_progress_bar, 0) + mockery::expect_called(mock_progress_update, 0) + + read_location("local", root$path, NULL, TRUE) + mockery::expect_called(mock_progress_bar, 1) + mockery::expect_called(mock_progress_update, 5) +}) From 8a1103d515da9fe9ce47bcee8b1326c07ca9fe4c Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 21 Nov 2023 15:55:04 +0000 Subject: [PATCH 6/8] Bump version number --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index dee96d55..dc4ddc87 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: orderly2 Title: Orderly Next Generation -Version: 1.99.9 +Version: 1.99.10 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("Robert", "Ashton", role = "aut"), From 5760b0f97e0fe44ad887bae360397336138e6268 Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Tue, 21 Nov 2023 16:00:54 +0000 Subject: [PATCH 7/8] Fix lint --- R/outpack_index.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/outpack_index.R b/R/outpack_index.R index 6ea66c7c..1308ae34 100644 --- a/R/outpack_index.R +++ b/R/outpack_index.R @@ -18,7 +18,8 @@ outpack_index <- R6::R6Class( }, refresh = function(progress = NULL) { - private$data_ <- index_update(private$path_, private$data_, FALSE, progress) + private$data_ <- index_update(private$path_, private$data_, FALSE, + progress) invisible(self) }, @@ -118,8 +119,9 @@ read_locations <- function(root_path, prev, progress) { location_path <- fs::dir_ls(file.path(root_path, ".outpack", "location"), type = "directory") location_name <- basename(location_path) - new <- do.call(rbind, - lapply(location_name, read_location, root_path, prev, progress)) + new <- do.call( + rbind, + lapply(location_name, read_location, root_path, prev, progress)) ret <- rbind(prev, new) ## Always sort by location, then id ret <- ret[order(match(ret$location, location_name), ret$packet), ] From dc1d6cf51b32f4438caf87b5d85ba808ce0627fe Mon Sep 17 00:00:00 2001 From: Rich FitzJohn Date: Wed, 22 Nov 2023 11:07:32 +0000 Subject: [PATCH 8/8] Don't fail if index writing fails --- R/outpack_index.R | 2 +- R/util.R | 12 ++++++++++-- tests/testthat/test-util.R | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/R/outpack_index.R b/R/outpack_index.R index 1308ae34..f17d91ac 100644 --- a/R/outpack_index.R +++ b/R/outpack_index.R @@ -74,7 +74,7 @@ index_update <- function(root_path, prev, skip_cache, progress) { if (!identical(data, prev)) { fs::dir_create(dirname(path_index)) - saverds_atomic(data, path_index) + saverds_atomic(data, path_index, allow_fail = TRUE) } data diff --git a/R/util.R b/R/util.R index a28922f6..0c9bdcdc 100644 --- a/R/util.R +++ b/R/util.R @@ -617,10 +617,18 @@ file_canonical_case <- function(path, workdir) { } -saverds_atomic <- function(data, path) { +saverds_atomic <- function(data, path, allow_fail = FALSE) { tmp <- tempfile(pattern = sub("\\.rds", "", basename(path)), tmpdir = dirname(path), fileext = ".rds") saveRDS(data, tmp) - fs::file_move(tmp, path) + if (allow_fail) { + tryCatch( + fs::file_move(tmp, path), + error = function(e) unlink(tmp)) + } else { + tryCatch( + fs::file_move(tmp, path), + finally = unlink(tmp)) + } } diff --git a/tests/testthat/test-util.R b/tests/testthat/test-util.R index 3ea032e8..f06bc747 100644 --- a/tests/testthat/test-util.R +++ b/tests/testthat/test-util.R @@ -306,3 +306,18 @@ test_that("can convert files to canonical case", { expect_equal(file_canonical_case("A/win~1/C", tmp), NA_character_) expect_equal(file_canonical_case(c("a/b/c", "a/b/d"), tmp), c("a/b/c", NA)) }) + + +test_that("can gracefully cope with rds save failure", { + mock_move <- mockery::mock(stop("some error"), cycle = TRUE) + mockery::stub(saverds_atomic, "fs::file_move", mock_move) + tmp <- withr::local_tempdir() + path <- file.path(tmp, "file.rds") + expect_silent( + saverds_atomic(NULL, path, allow_fail = TRUE)) + expect_equal(dir(tmp), character()) + expect_error( + saverds_atomic(NULL, path, allow_fail = FALSE), + "some error") + expect_equal(dir(tmp), character()) +})