Skip to content

Commit

Permalink
Merge pull request #292 from vimc/vimc-4320
Browse files Browse the repository at this point in the history
Allow partial pulling of orderly trees
  • Loading branch information
r-ash authored Jul 7, 2021
2 parents a4a7c20 + e534f10 commit a343ad0
Show file tree
Hide file tree
Showing 13 changed files with 383 additions and 71 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: orderly
Title: Lightweight Reproducible Reporting
Version: 1.3.1
Version: 1.3.2
Description: Order, create and store reports from R. By defining a
lightweight interface around the inputs and outputs of an
analysis, a lot of the repetitive work for reproducible research
Expand All @@ -20,7 +20,7 @@ Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
person("James", "Thompson", role = "aut"),
person("Imperial College of Science, Technology and Medicine",
role = "cph"))
URL: https://github.com/vimc/orderly
URL: https://www.vaccineimpact.org/orderly/, https://github.com/vimc/orderly
BugReports: https://github.com/vimc/orderly/issues
SystemRequirements: git
Imports:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ export(orderly_latest)
export(orderly_list)
export(orderly_list_archive)
export(orderly_list_drafts)
export(orderly_list_metadata)
export(orderly_log)
export(orderly_log_off)
export(orderly_log_on)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# orderly 1.3.2

* Allow partial pulling of orderly dependency trees, by passing `recursive = FALSE` to `orderly::orderly_pull_archive` and `orderly::orderly_pull_dependencies`. This can reduce the total amount of data transferred when you do not care as much about the integregity of the local archive (vimc-4320)

# orderly 1.2.41

* Clearer error message where dependency resolution fails due to a query dependency (vimc-4742)
Expand Down
70 changes: 55 additions & 15 deletions R/db2.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,14 +197,37 @@ report_db_open_existing <- function(con, config) {
}


report_db_import <- function(name, id, config, timeout = 10) {
orderly_log("import", sprintf("%s:%s", name, id))
report_db_import <- function(name, id, config, timeout = 10, metadata = FALSE) {
if (metadata) {
orderly_log("import", sprintf("%s:%s (metadata only)", name, id))
} else {
orderly_log("import", sprintf("%s:%s", name, id))
}
con <- orderly_db("destination", config)
on.exit(DBI::dbDisconnect(con))

if (!metadata) {
prev <- DBI::dbGetQuery(con, "SELECT * from report_version WHERE id = $1",
id)
if (nrow(prev) > 0) {
## It seems there is a chance here that something bad could
## happen (e.g., a migration occurs which means that the
## imported metadata is out of date) leaving everything a bit
## inconsistent. This would be detectable by comparing the
## contents of the two rds files
## (archive/:name/:id/orderly_run.rds vs
## metadata/:name/:id). However, with no recovery mechanism I
## think we're better off ignoring this for now...
return(invisible())
}
}

meta <- read_metadata(name, id, config, metadata)

## sqlite busy handler expects milliseconds
RSQLite::sqliteSetBusyHandler(con, timeout * 1000)
DBI::dbExecute(con, "BEGIN IMMEDIATE")
report_data_import(con, name, id, config)
report_data_import(con, meta, config)
DBI::dbCommit(con)
}

Expand All @@ -219,16 +242,22 @@ report_db_rebuild <- function(config, verbose = TRUE) {
report_db_destroy(con, config)
}
report_db_init(con, config)
reports <- unlist(lapply(list_dirs(path_archive(root)), list_dirs))
if (length(reports) > 0L) {
for (p in reports[order(basename(reports))]) {
id <- basename(p)
name <- basename(dirname(p))
if (verbose) {
message(sprintf("%s (%s)", id, name))
}
report_data_import(con, name, id, config)

reports_archive <- orderly_list_archive(config, FALSE)
reports_metadata <- orderly_list_metadata(config, FALSE)
metadata <- rep(c(FALSE, TRUE),
c(nrow(reports_archive), nrow(reports_metadata)))
reports <- cbind(rbind(reports_archive, reports_metadata), metadata)
for (i in order(reports$id)) {
id <- reports$id[[i]]
name <- reports$name[[i]]
metadata <- reports$metadata[[i]]
if (verbose) {
fmt <- if (metadata) "%s (%s) (metadata only)" else "%s (%s)"
message(sprintf(fmt, id, name))
}
meta <- read_metadata(name, id, config, metadata)
report_data_import(con, meta, config)
}

legacy_report_db_rebuild_published(config)
Expand All @@ -244,9 +273,9 @@ report_db_needs_rebuild <- function(config) {
}


report_data_import <- function(con, name, id, config) {
workdir <- file.path(config$root, "archive", name, id)
dat_rds <- readRDS(path_orderly_run_rds(workdir))
report_data_import <- function(con, dat_rds, config) {
name <- dat_rds$meta$name
id <- dat_rds$meta$id

sql_name <- "SELECT name FROM report WHERE name = $1"
if (nrow(DBI::dbGetQuery(con, sql_name, name)) == 0L) {
Expand Down Expand Up @@ -599,3 +628,14 @@ report_db_dialect <- function(con) {
stop("Can't determine SQL dialect")
}
}


read_metadata <- function(name, id, config, metadata_store) {
if (metadata_store) {
path_meta <- file.path(path_metadata(config$root), name, id)
} else {
workdir <- file.path(path_archive(config$root), name, id)
path_meta <- path_orderly_run_rds(workdir)
}
readRDS(path_meta)
}
18 changes: 18 additions & 0 deletions R/migrate.R
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,24 @@ migrate_single <- function(path, config) {
}


migrate_metadata <- function(dat_rds, config) {
assert_is(config, "orderly_config")

report_archive_version <- dat_rds$archive_version
archive_version <- config$archive_version

if (report_archive_version > archive_version) {
stop("Report was created with orderly more recent than this, upgrade!")
}

if (report_archive_version < archive_version) {
stop(sprintf(
"Can't migrate metadata for '%s:%s', migrate remote or pull archive",
dat_rds$meta$name, dat_rds$meta$id))
}
}


migrate_clean <- function(config, dry_run) {
files <- list.files(file.path(config$root, "archive"),
"^orderly_run_([0-9]+\\.){3}rds", # nolint
Expand Down
3 changes: 3 additions & 0 deletions R/paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ path_archive <- function(root, name = NULL) {
}
}

path_metadata <- function(root) {
file.path(root, "metadata")
}

path_remote_cache <- function(root) {
file.path(root, ".orderly", "remote", "cache")
Expand Down
37 changes: 37 additions & 0 deletions R/query.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,43 @@ orderly_list_archive <- function(root = NULL, locate = TRUE) {
orderly_list2(FALSE, root, locate)
}


##' List reports that are present only as metadata; these are the
##' result of doing \code{\link{orderly_pull_archive}} with
##' \code{recursive = FALSE}, in which case only metadata was
##' downloaded and not the report contents itself.
##'
##' @title List reports with only local metadata
##'
##' @inheritParams orderly_list
##'
##' @param include_archive Logical, indicating if we should include
##' reports that are also included in the archive.
##'
##' @return A \code{\link{data.frame}} with columns \code{name} and
##' \code{id}, as for \code{\link{orderly_list_archive}}
##'
##' @export
##' @examples
##' path <- orderly::orderly_example("minimal")
##' # No metadata-only reports will be present, unless you have run
##' # orderly::orderly_pull_archive(..., recursive = FALSE)
##' orderly::orderly_list_metadata(path)
orderly_list_metadata <- function(root = NULL, locate = FALSE,
include_archive = FALSE) {
config <- orderly_config(root, locate)
check <- list_dirs(path_metadata(config$root))
res <- lapply(check, dir, pattern = version_id_re)
ret <- data_frame(name = rep(basename(check), lengths(res)),
id = as.character(unlist(res)))
if (!include_archive && nrow(ret) > 0) {
drop <- ret$id %in% orderly_list_archive(root, locate)$id
ret <- ret[!drop, ]
rownames(ret) <- NULL
}
ret
}

##' Find most recent version of an orderly report. The most recent
##' report is always the most recently run report that has been
##' committed (regardless of the order in which they were committed).
Expand Down
Loading

0 comments on commit a343ad0

Please sign in to comment.