From a3b63522d1f12ab11b06f3c521725d346d87a525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20Li=C3=A9tar?= Date: Wed, 23 Oct 2024 15:06:25 +0100 Subject: [PATCH] WIP --- DESCRIPTION | 2 +- NAMESPACE | 3 + R/compare.R | 482 ++++++++++++++++-------------- R/location.R | 8 +- man/orderly_compare_packets.Rd | 63 ++-- man/orderly_comparison_explain.Rd | 34 +++ tests/testthat/_snaps/compare.md | 212 +++++++------ tests/testthat/test-compare.R | 194 +++++------- tests/testthat/test-location.R | 24 +- 9 files changed, 541 insertions(+), 481 deletions(-) create mode 100644 man/orderly_comparison_explain.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 6637f7d1..afc179c2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: orderly2 Title: Orderly Next Generation -Version: 1.99.51 +Version: 1.99.52 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("Robert", "Ashton", role = "aut"), diff --git a/NAMESPACE b/NAMESPACE index 804ca6aa..281f972c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,15 @@ # Generated by roxygen2: do not edit by hand +S3method(format,orderly_comparison) S3method(format,orderly_query) S3method(print,orderly_cleanup_status) +S3method(print,orderly_comparison) S3method(print,orderly_query_explain) export(orderly_artefact) export(orderly_cleanup) export(orderly_cleanup_status) export(orderly_compare_packets) +export(orderly_comparison_explain) export(orderly_config) export(orderly_config_set) export(orderly_copy_files) diff --git a/R/compare.R b/R/compare.R index c78423c3..10f8dad2 100644 --- a/R/compare.R +++ b/R/compare.R @@ -1,31 +1,18 @@ -is_binary_file <- function(path, n = 1024) { - # This is a pretty crude heuristic, but it seems good enough. - # It is actually similar to what the `diff` tool does. - data <- readBin(path, "raw", n) - as.raw(0) %in% data +trivial_differences <- c("id", "time") +vectorized_identical <- function(left, right) { + stopifnot(length(left) == length(right)) + vlapply(seq_along(left), function(i) { + identical(left[[i]], right[[i]]) + }) } -compare_metadata <- function(target, current) { - # id and time will almost always differ, but not in any interesting way. - # files may differ (especially the hashes), but we compare the files in - # detail seperately. - exclude <- c("id", "time", "files") - - target_filtered <- target[!(names(target) %in% exclude)] - current_filtered <- current[!(names(current) %in% exclude)] - if (!identical(target_filtered, current_filtered)) { - diffobj::diffPrint( - target_filtered, - current_filtered, - tar.banner = target$id, - cur.banner = current$id, - rds = FALSE, - mode = "unified", - style = list(pad = FALSE, wrap = FALSE), - interactive = FALSE) - } else { - NULL - } +is_binary_file <- function(path, n = 1024) { + vlapply(path, function(p) { + # This is a pretty crude heuristic, but it seems good enough. + # It is actually similar to what the `diff` tool does. + data <- readBin(p, "raw", n) + as.raw(0) %in% data + }) } compare_filesets <- function(target, current) { @@ -37,238 +24,281 @@ compare_filesets <- function(target, current) { data.frame(path = files$path, status = status) } -compare_files <- function(target, current, files, root, search_options) { - path_target <- withr::local_tempdir() - path_current <- withr::local_tempdir() - - # Copying the files into a temporary directory is fairly wasteful and, as - # long as the packet is unpacked already, we could read the files from the - # archive or file store directly. Nevertheless this makes accessing the files - # very straightforward, and covers the case where the file only exists - # remotely transparent. - - orderly_copy_files(target, dest = path_target, files = files, - options = search_options, root = root) - - orderly_copy_files(current, dest = path_current, files = files, - options = search_options, root = root) - - ret <- lapply(files, function(p) { - if (is_binary_file(file.path(path_target, p)) || - is_binary_file(file.path(path_current, p))) { - NULL - } else { - diffobj::diffChr( - read_file_lossy(file.path(path_target, p)), - read_file_lossy(file.path(path_current, p)), - tar.banner = file.path(target, p), - cur.banner = file.path(current, p), - rds = FALSE, - mode = "unified", - style = list(pad = FALSE, wrap = FALSE), - interactive = FALSE) - } - }) - names(ret) <- files - - ret -} - ##' Compare the metadata and contents of two packets. ##' ##' Insignificant differences in the metadata (eg. different dates and packet ##' IDs) are excluded from the comparison. ##' -##' If either packet is not unpacked, but `search_options`'s `allow_remote` is -##' `TRUE`, we will try to request files from remote locations, as necessary. -##' -##' @title Compare two packets -##' ##' @param target The id of the packet to use in the comparison. ##' @param current The id of the other packet against which to compare. -##' @param what One or more of "metadata", "files" and "artefacts", retricting -##' what components of the packet to compare. This is useful when it is known -##' for example that the source code of a report what changed, and one is only -##' interested in the effect on its output. -##' @param search_options Options for locating packet files. If there are no -##' copies of the files locally, they can be downloaded automatically from a -##' remote location on-demand if `allow_remote` is `TRUE`. -##' +##' @inheritParams orderly_search_options ##' @inheritParams orderly_metadata -##' -##' @return An R6 object of class `orderly_packet_diff` is returned. Printing -##' this object will show the difference in the two packets. Additionally, the -##' `is_equal` method of the object return a logical indicating whether any -##' difference was found. +##' @return An object of class orderly_comparison. The object can be printed to +##' get a summary description of the differences, or passed to +##' [orderly2::orderly_comparison_explain] to display more details. ##' ##' @export -orderly_compare_packets <- function( - target, current, search_options = NULL, root = NULL, locate = TRUE, - what = c("metadata", "files")) { - what <- rlang::arg_match(what, multiple = TRUE, - values = c("metadata", "files", "artefacts")) - if (length(what) == 0) { - cli::cli_abort("{.code what} must not be empty") - } - if ("artefacts" %in% what && "files" %in% what) { - cli::cli_abort('{.code what} must contain both "files" and "artefacts"') - } - +orderly_compare_packets <- function(target, current, + location = NULL, + allow_remote = NULL, + pull_metadata = FALSE, + root = NULL) { + root <- root_open(root, require_orderly = FALSE) validate_outpack_id(target, call = environment()) validate_outpack_id(current, call = environment()) - root <- root_open(root, locate = locate, require_orderly = FALSE, - call = environment()) + options <- build_search_options(location = location, + allow_remote = allow_remote, + pull_metadata = pull_metadata) - meta_target <- orderly_metadata(target, root = root) - meta_current <- orderly_metadata(current, root = root) + if (options$pull_metadata) { + orderly_location_pull_metadata(options$location, root = root) + } - if ("metadata" %in% what) { - metadata <- compare_metadata(meta_target, meta_current) - } else { - metadata <- NULL + target_metadata <- orderly_metadata(target, root = root) + current_metdata <- orderly_metadata(current, root = root) + + ret <- list( + root = root, + target = target_metadata, + current = current_metdata, + status = compare_attribute_list(target_metadata, current_metdata), + search_options = options) + + class(ret) <- "orderly_comparison" + ret +} + +compare_files <- function(cmp, verbose, name) { + diff <- compare_filesets(cmp$target$files, cmp$current$files) + + modified <- diff[diff$status == "modified", ] + target_only <- diff[diff$status == "removed", ] + current_only <- diff[diff$status == "added", ] + + if (nrow(target_only) > 0) { + cli::cli_alert_info( + "The following files only exist in packet {cmp$target$id}") + cli::cli_ul(target_only$path) } + if (nrow(current_only) > 0) { + cli::cli_alert_info( + "The following files only exist in packet {cmp$current$id}") + cli::cli_ul(current_only$path) + } + if (nrow(modified) > 0) { + if (verbose) { + compare_file_contents(cmp, modified$path) + } else { + cli::cli_alert_info(paste("The following files exist in both packets", + "but have different contents:")) + cli::cli_ul(modified$path) - if ("files" %in% what) { - files <- compare_filesets(meta_target$files, meta_current$files) - } else if ("artefacts" %in% what) { - if (is.null(meta_target$custom$orderly) || - is.null(meta_current$custom$orderly)) { - cli::cli_abort("Cannot compare artefacts of non-orderly packets") + hint <- cli::format_inline( + "orderly_comparison_explain({name}, \"files\", verbose = TRUE)") + cli::cli_alert_info("Use {.code {hint}} to compare the files' contents.") } + } + if (nrow(modified) == 0 && + nrow(target_only) == 0 && + nrow(current_only) == 0) { + cli::cli_alert_info("The files across the two packets are identical.") + } +} - artefacts_target <- unlist(meta_target$custom$orderly$artefacts$paths) - artefacts_current <- unlist(meta_current$custom$orderly$artefacts$paths) - files <- compare_filesets( - meta_target$files[meta_target$files$path %in% artefacts_target, ], - meta_current$files[meta_current$files$path %in% artefacts_current, ]) - } else { - files <- data.frame(path = character(0), status = character(0)) +compare_file_contents <- function(cmp, files) { + path_target <- withr::local_tempdir() + path_current <- withr::local_tempdir() + + # Copying the files into a temporary directory is fairly wasteful and, as + # long as the packet is unpacked already, we could read the files from the + # archive or file store directly. Nevertheless this makes accessing the files + # very straightforward, and covers the case where the file only exists + # remotely transparent. + # + # pull_metadata is intentionally hardcoded to FALSE, regardless of what the + # user may have specified as an argument to orderly_compare_packets: the + # latter would have already pulled the files once, and we don't need to do it + # again. + orderly_copy_files(cmp$target$id, + dest = path_target, + files = files, + location = cmp$search_options$location, + allow_remote = cmp$search_options$allow_remote, + pull_metadata = FALSE, + root = cmp$root) + + orderly_copy_files(cmp$current$id, + dest = path_current, + files = files, + location = cmp$search_options$location, + allow_remote = cmp$search_options$allow_remote, + pull_metadata = FALSE, + root = cmp$root) + + + binary_files <- is_binary_file(file.path(path_target, files)) | + is_binary_file(file.path(path_current, files)) + + for (f in files[!binary_files]) { + cli::cli_verbatim(as.character(diffobj::diffChr( + read_file_lossy(file.path(path_target, f)), + read_file_lossy(file.path(path_current, f)), + tar.banner = file.path(cmp$target$id, f), + cur.banner = file.path(cmp$current$id, f), + rds = FALSE, + mode = "unified", + style = list(wrap = FALSE), + interactive = FALSE + ))) } - idx <- files$status == "modified" - files$diff[idx] <- compare_files(target, current, files[idx, ]$path, - search_options = search_options, - root = root) + if (any(binary_files)) { + cli::cli_alert_warning( + paste("The following files differ across packets, but could not be", + "compared as their content is binary:")) + cli::cli_ul(files[binary_files]) + } +} + +compare_attribute <- function(cmp, attribute, verbose, name) { + assert_is(cmp, "orderly_comparison") + assert_scalar_character(attribute) + assert_logical(verbose) - orderly_packet_diff$new(what, target, current, metadata, files) + if (attribute == "files") { + compare_files(cmp, verbose, name) + } else { + cli::cli_alert_info("Comparing attribute {.code {attribute}}") + cli::cli_verbatim(as.character(diffobj::diffPrint( + cmp$target[[attribute]], + cmp$current[[attribute]], + tar.banner = sprintf("%s$%s", cmp$target$id, attribute), + cur.banner = sprintf("%s$%s", cmp$current$id, attribute), + rds = FALSE, + mode = "unified", + style = list(wrap = FALSE), + interactive = FALSE + ))) + } } +compare_attribute_list <- function(target, current) { + target_names <- names(target) + current_names <- names(current) -orderly_packet_diff <- R6::R6Class( - "orderly_packet_diff", - private = list( - what = NULL, - target = NULL, - current = NULL, - metadata = NULL, - files = NULL, - - print_metadata = function() { - if (!is.null(private$metadata)) { - cli::cli_alert_warning("Packet metadata differs:") - cli::cli_div(theme = list(div = list("margin-left" = 2))) - cli::cli_verbatim(as.character(private$metadata)) - cli::cli_end() - } - }, + all_names <- union(target_names, current_names) + status <- rep(NA_character_, length(all_names)) - print_files = function(verbose) { - name <- if ("artefacts" %in% private$what) { - "artefacts" - } else { - "files" - } + in_target <- all_names %in% target_names + in_current <- all_names %in% current_names + in_both <- in_target & in_current - removed <- private$files[private$files$status == "removed", ] - if (nrow(removed) > 0) { - cli::cli_alert_warning( - "The following {name} only exist in packet {private$current}:") - cli::cli_ul(removed$path) - } + status[!in_target] <- "removed" + status[!in_current] <- "added" - added <- private$files[private$files$status == "added", ] - if (nrow(added) > 0) { - cli::cli_alert_warning( - "The following {name} only exist in packet {private$target}:") - cli::cli_ul(added$path) - } + intersected_names <- all_names[in_both] + status[in_both] <- ifelse(vectorized_identical(target[intersected_names], + current[intersected_names]), + "identical", + "different") - modified <- private$files[private$files$status == "modified", ] - if (nrow(modified) > 0) { - binary <- sapply(modified$diff, is.null) - - cli::cli_alert_warning( - paste("The following {name} exist in both packets but have", - "different contents:")) - - cli::cli_ul() - for (i in seq_len(nrow(modified))) { - cli::cli_li("{modified$path[[i]]}") - if (verbose) { - if (!binary[[i]]) { - cli::cli_div(theme = list(div = list("margin-left" = 2))) - cli::cli_verbatim(as.character(modified$diff[[i]])) - cli::cli_end() - } - } - } - if (verbose && any(binary)) { - cli::cli_alert_warning( - "Contents of binary file{?s} {modified$path[binary]} were omitted") - } - if (!verbose) { - cli::cli_alert_info(paste( - "Print the comparison with {.code verbose = TRUE} to display the", - "differences in the {name}' contents")) - } + names(status) <- all_names + status +} + +##' @export +format.orderly_comparison <- function(x, ..., name = deparse(substitute(x))) { + cli::cli_format_method({ + orderly_comparison_explain(x, verbose = "summary", name = name) + }) +} + +##' @export +print.orderly_comparison <- function(x, ..., name = deparse(substitute(x))) { + cat(format(x, ..., name = name), sep = "\n") +} + +##' Print the details of a packet comparison. +##' +##' This function allows to select what part of the packet to compare, and in +##' how much details. +##' +##' @param cmp An orderly_comparison object, as returned by +##' [orderly2::orderly_compare_packets]. +##' @param attributes A character vector of attributes to include in the +##' comparison. The values are keys of the packets' metadata, such as +##' `parameters` or `files`. If NULL, the default, all attributes are compared, +##' except those that differ in trivial way (ie. `id` and `time`). +##' @param verbose Control over how much information is printed. It can either +##' be a logical, or a character scalar `silent` or `summary`. +##' @return Invisibly, a logical indicating whether the packets are equivalent, +##' up to the given attributes. +##' +##' @export +orderly_comparison_explain <- function(cmp, attributes = NULL, ..., + verbose = FALSE, + name = deparse(substitute(cmp))) { + rlang::check_dots_empty() + + assert_is(cmp, "orderly_comparison") + if (!is.null(attributes)) { + assert_character(attributes) + } + + if (is.null(attributes)) { + status <- cmp$status[setdiff(names(cmp$status), trivial_differences)] + } else { + status <- cmp$status[intersect(attributes, names(cmp$status))] + } + + if (verbose != "silent") { + cli::cli_alert_info( + "Comparing packets {cmp$target$id} and {cmp$current$id}...") + + # All packets we produce have the same attributes. This is really more about + # future-proofing for a time where we may add new ones, and may want to + # compare a packet produced before vs after the new attributes were added. + if (any(status == "added")) { + cli::cli_alert_info( + "The following attributes only exist in packet {cmp$target$id}:") + cli::cli_ul(names(which(status == "added"))) + } + if (any(status == "removed")) { + cli::cli_alert_info( + "The following attributes only exist in packet {cmp$current$id}:") + cli::cli_ul(names(which(status == "removed"))) + } + if (any(status == "different")) { + if (verbose == "summary") { + cli::cli_alert_info( + "The following attributes are different across the two packets:") + cli::cli_ul(names(which(status == "different"))) + cli::cli_div() + cli::cli_alert_info(paste( + "Use {.code orderly_comparison_explain({name})} to examine the", + "differences in more detail.")) cli::cli_end() + } else { + for (n in names(which(status == "different"))) { + compare_attribute(cmp, attribute = n, verbose = verbose, name = name) + } } } - ), - - public = list( - initialize = function(what, target, current, metadata, files) { - private$what <- what - private$target <- target - private$current <- current - private$metadata <- metadata - private$files <- files - }, - - is_equal = function() { - is.null(private$metadata) && all(private$files$status == "unchanged") - }, - - format = function(verbose = FALSE, ...) { - target <- private$target - current <- private$current - - cli::cli_format_method({ - if (self$is_equal()) { - msg <- if (setequal(private$what, c("metadata", "files"))) { - "Packets {target} and {current} are identical" - } else if (identical(private$what, "metadata")) { - "Metadata of packets {target} and {current} is identical" - } else if (identical(private$what, "files")) { - "Files of packets {target} and {current} are identical" - } else if (identical(private$what, "artefacts")) { - "Artefacts of packets {target} and {current} are identical" - } else if (setequal(private$what, c("metadata", "artefacts"))) { - paste("Metadata and artefacts of packets {target} and {current}", - "are identical") - } else { - stop("Unhandled combination of `what`") - } - cli::cli_alert_success(msg) + if (all(status == "identical")) { + if (is.null(attributes)) { + if (all(cmp$status == "identical")) { + cli::cli_alert_success("The two packets are identical.") } else { - cli::cli_alert_info( - "Comparing packets {private$target} and {private$current}") - - private$print_metadata() - private$print_files(verbose = verbose) + cli::cli_alert_success( + "The two packets are equivalent, up to trivial differences.") } - }) + } else { + cli::cli_alert_success( + "The specified attributes are identical across the two packets.") + } } - ) -) + } + + invisible(all(status == "identical")) +} diff --git a/R/location.R b/R/location.R index cae1d2dc..a362a061 100644 --- a/R/location.R +++ b/R/location.R @@ -997,11 +997,11 @@ location_pull_files <- function(files, root) { cleanup <- function() invisible() i <- store$exists(files$hash) if (any(i)) { - cli::cli_alert_success("Found {sum(i)} file{?s} in the file store") + cli_alert_success("Found {sum(i)} file{?s} in the file store") files <- files[!i, ] } } else { - cli::cli_alert_info("Looking for suitable files already on disk") + cli_alert_info("Looking for suitable files already on disk") store <- temporary_filestore(root) cleanup <- function() store$destroy() on_disk <- vlapply(files$hash, function(hash) { @@ -1019,10 +1019,10 @@ location_pull_files <- function(files, root) { } if (nrow(files) == 0) { - cli::cli_alert_success("All files available locally, no need to fetch any") + cli_alert_success("All files available locally, no need to fetch any") } else { locations <- unique(files$location) - cli::cli_alert_info(paste( + cli_alert_info(paste( "Need to fetch {nrow(files)} file{?s} ({pretty_bytes(sum(files$size))})", "from {length(locations)} location{?s}")) for (loc in locations) { diff --git a/man/orderly_compare_packets.Rd b/man/orderly_compare_packets.Rd index 84194223..84e427c9 100644 --- a/man/orderly_compare_packets.Rd +++ b/man/orderly_compare_packets.Rd @@ -2,15 +2,15 @@ % Please edit documentation in R/compare.R \name{orderly_compare_packets} \alias{orderly_compare_packets} -\title{Compare two packets} +\title{Compare the metadata and contents of two packets.} \usage{ orderly_compare_packets( target, current, - search_options = NULL, - root = NULL, - locate = TRUE, - what = c("metadata", "files") + location = NULL, + allow_remote = NULL, + pull_metadata = FALSE, + root = NULL ) } \arguments{ @@ -18,40 +18,39 @@ orderly_compare_packets( \item{current}{The id of the other packet against which to compare.} -\item{search_options}{Options for locating packet files. If there are no -copies of the files locally, they can be downloaded automatically from a -remote location on-demand if \code{allow_remote} is \code{TRUE}.} +\item{location}{Optional vector of locations to pull from. We +might in future expand this to allow wildcards or exceptions.} -\item{root}{The path to the root directory, or \code{NULL} (the -default) to search for one from the current working directory if -\code{locate} is \code{TRUE}. This function does not require that the -directory is configured for orderly, and can be any \code{outpack} -root (see \link{orderly_init} for details).} +\item{allow_remote}{Logical, indicating if we should allow packets +to be found that are not currently unpacked (i.e., are known +only to a location that we have metadata from). If this is +\code{TRUE}, then in conjunction with \link{orderly_dependency} +you might pull a large quantity of data. The default is \code{NULL}. This is +\code{TRUE} if remote locations are listed explicitly as a character +vector in the \code{location} argument, or if you have specified +\code{pull_metadata = TRUE}, otherwise \code{FALSE}.} -\item{locate}{Logical, indicating if the root should be searched -for. If \code{TRUE}, then we looks in the directory given for \code{root} -(or the working directory if \code{NULL}) and then up through its -parents until it finds an \code{.outpack} directory or -\code{orderly_config.yml}} +\item{pull_metadata}{Logical, indicating if we should pull +metadata immediately before the search. If \code{location} is given, +then we will pass this through to +\link{orderly_location_pull_metadata} to filter locations +to update. If pulling many packets in sequence, you \emph{will} want +to update this option to \code{FALSE} after the first pull, otherwise +it will update the metadata between every packet, which will be +needlessly slow.} -\item{what}{One or more of "metadata", "files" and "artefacts", retricting -what components of the packet to compare. This is useful when it is known -for example that the source code of a report what changed, and one is only -interested in the effect on its output.} +\item{root}{The path to the root directory, or \code{NULL} (the +default) to search for one from the current working +directory. This function does not require that the directory is +configured for orderly, and can be any \code{outpack} root (see +\link{orderly_init} for details).} } \value{ -An R6 object of class \code{orderly_packet_diff} is returned. Printing -this object will show the difference in the two packets. Additionally, the -\code{is_equal} method of the object return a logical indicating whether any -difference was found. +An object of class orderly_comparison. The object can be printed to +get a summary description of the differences, or passed to +\link{orderly_comparison_explain} to display more details. } \description{ -Compare the metadata and contents of two packets. -} -\details{ Insignificant differences in the metadata (eg. different dates and packet IDs) are excluded from the comparison. - -If either packet is not unpacked, but \code{search_options}'s \code{allow_remote} is -\code{TRUE}, we will try to request files from remote locations, as necessary. } diff --git a/man/orderly_comparison_explain.Rd b/man/orderly_comparison_explain.Rd new file mode 100644 index 00000000..510de2a3 --- /dev/null +++ b/man/orderly_comparison_explain.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compare.R +\name{orderly_comparison_explain} +\alias{orderly_comparison_explain} +\title{Print the details of a packet comparison.} +\usage{ +orderly_comparison_explain( + cmp, + attributes = NULL, + ..., + verbose = FALSE, + name = deparse(substitute(cmp)) +) +} +\arguments{ +\item{cmp}{An orderly_comparison object, as returned by +\link{orderly_compare_packets}.} + +\item{attributes}{A character vector of attributes to include in the +comparison. The values are keys of the packets' metadata, such as +\code{parameters} or \code{files}. If NULL, the default, all attributes are compared, +except those that differ in trivial way (ie. \code{id} and \code{time}).} + +\item{verbose}{Control over how much information is printed. It can either +be a logical, or a character scalar \code{silent} or \code{summary}.} +} +\value{ +Invisibly, a logical indicating whether the packets are equivalent, +up to the given attributes. +} +\description{ +This function allows to select what part of the packet to compare, and in how +much details. +} diff --git a/tests/testthat/_snaps/compare.md b/tests/testthat/_snaps/compare.md index d63badb0..6bd08c74 100644 --- a/tests/testthat/_snaps/compare.md +++ b/tests/testthat/_snaps/compare.md @@ -3,141 +3,179 @@ Code print(result) Output - v Packets 19700101-000000-00000001 and 19700101-000000-00000001 are identical + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000001... + v The two packets are identical. # Comparing packets ignores ID and time differences Code print(result) Output - v Packets 19700101-000000-00000001 and 19700101-000000-00000002 are identical + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + v The two packets are equivalent, up to trivial differences. + +# Can explicitly compare trivial fields + + Code + orderly_comparison_explain(result, "id") + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i Comparing attribute `id` + < 19700101-000000-00000001$id + > 19700101-000000-00000002$id + @@ 1 / 1 @@ + < [1] "19700101-000000-00000001" + > [1] "19700101-000000-00000002" # Can compare packets with different metadata Code - print(everything) + print(result) Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! Packet metadata differs: - < 19700101-000000-00000001 - > 19700101-000000-00000002 - @@ 7,5 / 7,5 @@ - $parameters - $parameters$A - < [1] "foo" - > [1] "bar" - - + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i The following attributes are different across the two packets: + * parameters + i Use `orderly_comparison_explain(result)` to examine the differences in more detail. --- Code - print(metadata) - Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! Packet metadata differs: - < 19700101-000000-00000001 - > 19700101-000000-00000002 - @@ 7,5 / 7,5 @@ - $parameters - $parameters$A - < [1] "foo" - > [1] "bar" - - + orderly_comparison_explain(result) + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i Comparing attribute `parameters` + < 19700101-000000-00000001$parameters + > 19700101-000000-00000002$parameters + @@ 1,3 / 1,3 @@ + $A + < [1] "foo" + > [1] "bar" + --- Code - print(files) - Output - v Files of packets 19700101-000000-00000001 and 19700101-000000-00000002 are identical + orderly_comparison_explain(result, "files") + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + v The specified attributes are identical across the two packets. # Can compare packets with different file contents Code - print(everything) + print(result) Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! The following files exist in both packets but have different contents: - * data.R - * data.txt - i Print the comparison with `verbose = TRUE` to display the differences in the files' contents + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i The following attributes are different across the two packets: + * files + i Use `orderly_comparison_explain(result)` to examine the differences in more detail. --- Code - print(metadata) - Output - v Metadata of packets 19700101-000000-00000001 and 19700101-000000-00000002 is identical + orderly_comparison_explain(result, "files") + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i The following files exist in both packets but have different contents: + * data.R + * data.txt + i Use `orderly_comparison_explain(result, "files", verbose = TRUE)` to compare the files' contents. --- Code - print(files) + orderly_comparison_explain(result, "files", verbose = TRUE) + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + < 19700101-000000-00000001/data.R + > 19700101-000000-00000002/data.R + @@ 1 / 1 @@ + < writeLines("Hello", "data.txt") + > writeLines("World", "data.txt") + < 19700101-000000-00000001/data.txt + > 19700101-000000-00000002/data.txt + @@ 1 / 1 @@ + < Hello + > World + +# Can compare packets with binary contents + + Code + print(result) Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! The following files exist in both packets but have different contents: - * data.R - * data.txt - i Print the comparison with `verbose = TRUE` to display the differences in the files' contents + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i The following attributes are different across the two packets: + * files + i Use `orderly_comparison_explain(result)` to examine the differences in more detail. --- Code - print(files, verbose = TRUE) - Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! The following files exist in both packets but have different contents: - * data.R - < 19700101-000000-00000001/data.R - > 19700101-000000-00000002/data.R - @@ 1 / 1 @@ - < writeLines("Hello", "data.txt") - > writeLines("World", "data.txt") - * data.txt - < 19700101-000000-00000001/data.txt - > 19700101-000000-00000002/data.txt - @@ 1 / 1 @@ - < Hello - > World - -# Can compare artefacts only - - Code - print(files) - Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! The following files exist in both packets but have different contents: - * data.R - i Print the comparison with `verbose = TRUE` to display the differences in the files' contents + orderly_comparison_explain(result, "files") + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i The following files exist in both packets but have different contents: + * data.R + * data.rds + i Use `orderly_comparison_explain(result, "files", verbose = TRUE)` to compare the files' contents. -# Can detect newly declared artefact +--- Code - print(artefacts) - Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! The following artefacts only exist in packet 19700101-000000-00000001: - * hello.txt + orderly_comparison_explain(result, "files", verbose = TRUE) + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + < 19700101-000000-00000001/data.R + > 19700101-000000-00000002/data.R + @@ 1,4 / 1,4 @@ + { + orderly_artefact(description = "Output", "data.rds") + < saveRDS(1:10, "data.rds") + > saveRDS(11:20, "data.rds") + } + ! The following files differ across packets, but could not be compared as their content is binary: + * data.rds + +# Can compare packets from remote -# Can compare packets with binary contents + Code + orderly_comparison_explain(result, "files") + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i The following files exist in both packets but have different contents: + * data.rds + i Use `orderly_comparison_explain(result, "files", verbose = TRUE)` to compare the files' contents. + +--- + + Code + orderly_comparison_explain(result, "files", verbose = TRUE) + Message + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + ! The following files differ across packets, but could not be compared as their content is binary: + * data.rds + +# Handles new attributes gracefully Code print(result) Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! The following artefacts exist in both packets but have different contents: - * data.rds - i Print the comparison with `verbose = TRUE` to display the differences in the artefacts' contents + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002... + i The following attributes only exist in packet 19700101-000000-00000001: + * new_key + i The following attributes are different across the two packets: + * files + i Use `orderly_comparison_explain(result)` to examine the differences in more detail. --- Code - print(result, verbose = TRUE) + print(result_swap) Output - i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 - ! The following artefacts exist in both packets but have different contents: - * data.rds - ! Contents of binary file data.rds were omitted + i Comparing packets 19700101-000000-00000002 and 19700101-000000-00000001... + i The following attributes only exist in packet 19700101-000000-00000001: + * new_key + i The following attributes are different across the two packets: + * files + i Use `orderly_comparison_explain(result_swap)` to examine the differences in more detail. diff --git a/tests/testthat/test-compare.R b/tests/testthat/test-compare.R index 0a8826c7..09a3ad1c 100644 --- a/tests/testthat/test-compare.R +++ b/tests/testthat/test-compare.R @@ -3,7 +3,7 @@ test_that("Comparing a packet to itself returns an empty diff", { id <- create_random_packet(root) result <- orderly_compare_packets(id, id, root = root) - expect_true(result$is_equal()) + expect_true(orderly_comparison_explain(result, verbose = "silent")) expect_snapshot(print(result), transform = scrub_packets(id)) }) @@ -19,180 +19,132 @@ test_that("Comparing packets ignores ID and time differences", { expect_false(isTRUE(all.equal(meta1$time, meta2$time))) result <- orderly_compare_packets(id1, id2, root = root) - expect_true(result$is_equal()) + expect_true(orderly_comparison_explain(result, verbose = "silent")) expect_snapshot(print(result), transform = scrub_packets(id1, id2)) }) -test_that("Can compare packets with different metadata", { +test_that("Can explicitly compare trivial fields", { root <- create_temporary_root() + id1 <- create_deterministic_packet(root) + id2 <- create_deterministic_packet(root) - p1 <- create_deterministic_packet(root, "data", list(A = "foo")) - p2 <- create_deterministic_packet(root, "data", list(A = "bar")) - - everything <- orderly_compare_packets(p1, p2, root = root) - metadata <- orderly_compare_packets(p1, p2, root = root, what = "metadata") - files <- orderly_compare_packets(p1, p2, root = root, what = "files") - - expect_false(everything$is_equal()) - expect_false(metadata$is_equal()) - expect_true(files$is_equal()) - - expect_snapshot(print(everything), transform = scrub_packets(p1, p2)) - expect_snapshot(print(metadata), transform = scrub_packets(p1, p2)) - expect_snapshot(print(files), transform = scrub_packets(p1, p2)) -}) - - -test_that("Can compare packets with different file contents", { - root <- create_temporary_root() - - p1 <- orderly_run_snippet(root, "data", writeLines("Hello", "data.txt")) - p2 <- orderly_run_snippet(root, "data", writeLines("World", "data.txt")) - - everything <- orderly_compare_packets(p1, p2, root = root) - metadata <- orderly_compare_packets(p1, p2, root = root, what = "metadata") - files <- orderly_compare_packets(p1, p2, root = root, what = "files") + result <- orderly_compare_packets(id1, id2, root = root) - expect_false(everything$is_equal()) - expect_true(metadata$is_equal()) - expect_false(files$is_equal()) + expect_true(orderly_comparison_explain(result, verbose = "silent")) + expect_false(orderly_comparison_explain(result, c("id", "time"), + verbose = "silent")) - expect_snapshot(print(everything), transform = scrub_packets(p1, p2)) - expect_snapshot(print(metadata), transform = scrub_packets(p1, p2)) - expect_snapshot(print(files), transform = scrub_packets(p1, p2)) - expect_snapshot(print(files, verbose = TRUE), - transform = scrub_packets(p1, p2)) + expect_snapshot(orderly_comparison_explain(result, "id"), + transform = scrub_packets(id1, id2)) }) -test_that("Can compare artefacts only", { +test_that("Can compare packets with different metadata", { root <- create_temporary_root() - p1 <- orderly_run_snippet(root, "data", { - orderly_artefact(description = "Output", "output.txt") - writeLines(toString(2 + 1), "output.txt") - }) + id1 <- create_deterministic_packet(root, "data", list(A = "foo")) + id2 <- create_deterministic_packet(root, "data", list(A = "bar")) - p2 <- orderly_run_snippet(root, "data", { - orderly_artefact(description = "Output", "output.txt") - writeLines(toString(1 + 2), "output.txt") - }) - - files <- orderly_compare_packets(p1, p2, what = "files", root = root) - artefacts <- orderly_compare_packets(p1, p2, what = "artefacts", root = root) - - # The packet files, in particular the source code, are different. However the - # different snippets produce identical artefacts. - expect_false(files$is_equal()) - expect_true(artefacts$is_equal()) + result <- orderly_compare_packets(id1, id2, root = root) + expect_false(orderly_comparison_explain(result, verbose = "silent")) + expect_true(orderly_comparison_explain(result, "files", verbose = "silent")) - expect_snapshot(print(files), transform = scrub_packets(p1, p2)) + expect_snapshot(print(result), transform = scrub_packets(id1, id2)) + expect_snapshot(orderly_comparison_explain(result), + transform = scrub_packets(id1, id2)) + expect_snapshot(orderly_comparison_explain(result, "files"), + transform = scrub_packets(id1, id2)) }) - -test_that("Can detect newly declared artefact", { +test_that("Can compare packets with different file contents", { root <- create_temporary_root() - p1 <- orderly_run_snippet(root, "data", { - writeLines("Hello", "hello.txt") - }) + id1 <- orderly_run_snippet(root, "data", writeLines("Hello", "data.txt")) + id2 <- orderly_run_snippet(root, "data", writeLines("World", "data.txt")) - p2 <- orderly_run_snippet(root, "data", { - orderly_artefact(description = "Output", "hello.txt") - writeLines("Hello", "hello.txt") - }) + result <- orderly_compare_packets(id1, id2, root = root) - artefacts <- orderly_compare_packets(p1, p2, what = "artefacts", root = root) + expect_false(orderly_comparison_explain(result, verbose = "silent")) + expect_false(orderly_comparison_explain(result, "files", verbose = "silent")) + expect_true(orderly_comparison_explain(result, "parameters", + verbose = "silent")) - expect_false(artefacts$is_equal()) - expect_snapshot(print(artefacts), transform = scrub_packets(p1, p2)) + expect_snapshot(print(result), transform = scrub_packets(id1, id2)) + expect_snapshot(orderly_comparison_explain(result, "files"), + transform = scrub_packets(id1, id2)) + expect_snapshot(orderly_comparison_explain(result, "files", verbose = TRUE), + transform = scrub_packets(id1, id2)) }) - test_that("Can compare packets with binary contents", { root <- create_temporary_root() - p1 <- orderly_run_snippet(root, "data", { + id1 <- orderly_run_snippet(root, "data", { orderly_artefact(description = "Output", "data.rds") saveRDS(1:10, "data.rds") }) - p2 <- orderly_run_snippet(root, "data", { + id2 <- orderly_run_snippet(root, "data", { orderly_artefact(description = "Output", "data.rds") saveRDS(11:20, "data.rds") }) - result <- orderly_compare_packets(p1, p2, what = "artefacts", root = root) - expect_false(result$is_equal()) - expect_snapshot(print(result), transform = scrub_packets(p1, p2)) - expect_snapshot(print(result, verbose = TRUE), - transform = scrub_packets(p1, p2)) -}) - - -test_that("Cannot compare artefacts of non-orderly packets", { - root <- create_temporary_root() + result <- orderly_compare_packets(id1, id2, root = root) - src <- withr::local_tempdir() - writeLines("World", file.path(src, "data.txt")) - p <- outpack_packet_start_quietly(src, "data", root = root) - outpack_packet_end_quietly(p) + expect_false(orderly_comparison_explain(result, verbose = "silent")) + expect_false(orderly_comparison_explain(result, "files", verbose = "silent")) + expect_true(orderly_comparison_explain(result, "parameters", + verbose = "silent")) - # Only orderly packets have the right metadata needed to infer what is or - # isn't an artefact. - expect_error( - orderly_compare_packets(p$id, p$id, what = "artefacts", root = root), - "Cannot compare artefacts of non-orderly packets") - - # We can still do full packet comparison, even without the orderly-specific - # metadata. - expect_no_error(orderly_compare_packets(p$id, p$id, root = root)) - expect_no_error( - orderly_compare_packets(p$id, p$id, what = "files", root = root)) + expect_snapshot(print(result), transform = scrub_packets(id1, id2)) + expect_snapshot(orderly_comparison_explain(result, "files"), + transform = scrub_packets(id1, id2)) + expect_snapshot(orderly_comparison_explain(result, "files", verbose = TRUE), + transform = scrub_packets(id1, id2)) }) - test_that("Can compare packets from remote", { here <- create_temporary_root() there <- create_temporary_root() - p1 <- create_random_packet(there) - p2 <- create_random_packet(here) + id1 <- create_random_packet(here) + id2 <- create_random_packet(there) orderly_location_add("there", "path", list(path = there$path), root = here) - orderly_location_pull_metadata(root = here) expect_error( - orderly_compare_packets(p1, p2, root = here), - "Unable to copy files, as they are not available locally") + orderly_compare_packets(id1, id2, root = here), + "Packet .* not found in outpack index") - result <- suppressMessages({ - orderly_compare_packets(p1, p2, search_options = list(allow_remote = TRUE), - root = here) - }) - expect_false(result$is_equal()) + result <- orderly_compare_packets(id1, id2, root = here, + pull_metadata = TRUE) + + expect_snapshot(orderly_comparison_explain(result, "files"), + transform = scrub_packets(id1, id2)) + expect_snapshot(orderly_comparison_explain(result, "files", verbose = TRUE), + transform = scrub_packets(id1, id2)) }) -test_that("Checks bad what argument", { +test_that("Handles new attributes gracefully", { root <- create_temporary_root() - id <- create_random_packet(root) - expect_error( - orderly_compare_packets(id, id, root = root, what = "xx"), - '`what` must be one of .*, not "xx"') + id1 <- create_random_packet(root) + id2 <- create_random_packet(root) - expect_error( - orderly_compare_packets(id, id, root = root, what = character(0)), - "`what` must not be empty") + # This is a horrible hack, designed to mimick the day where we may add new + # attributes to the metadata. + f <- file.path(root$path, ".outpack", "metadata", id1) + metadata <- parse_json(file(f)) + metadata$new_key <- "value" + writeLines(to_json(metadata, auto_unbox = TRUE), f) - expect_error( - orderly_compare_packets(id, id, root = root, what = TRUE), - "`what` must be a character vector") + result <- orderly_compare_packets(id1, id2, root = root) + expect_snapshot(print(result), + transform = scrub_packets(id1, id2)) - expect_error( - orderly_compare_packets(id, id, root = root, - what = c("files", "artefacts")), - '`what` must contain both "files" and "artefacts"') + result_swap <- orderly_compare_packets(id2, id1, root = root) + expect_snapshot(print(result_swap), + transform = scrub_packets(id1, id2)) }) diff --git a/tests/testthat/test-location.R b/tests/testthat/test-location.R index 73c91b4a..17809600 100644 --- a/tests/testthat/test-location.R +++ b/tests/testthat/test-location.R @@ -1063,11 +1063,13 @@ test_that("skip files in the file store", { orderly_location_pull_metadata(root = root$dst) suppressMessages(orderly_location_pull_packet(id[[1]], root = root$dst)) - res <- testthat::evaluate_promise( - orderly_location_pull_packet(id[[2]], root = root$dst)) - expect_match(res$messages, "Found 1 file in the file store", all = FALSE) - expect_match(res$messages, "Need to fetch 2 files.+from 1 location", - all = FALSE) + withr::with_options(list(orderly.quiet = FALSE), { + res <- testthat::evaluate_promise( + orderly_location_pull_packet(id[[2]], root = root$dst)) + expect_match(res$messages, "Found 1 file in the file store", all = FALSE) + expect_match(res$messages, "Need to fetch 2 files.+from 1 location", + all = FALSE) + }) }) @@ -1082,11 +1084,13 @@ test_that("skip files known elsewhere on disk", { orderly_location_pull_metadata(root = root$dst) suppressMessages(orderly_location_pull_packet(id[[1]], root = root$dst)) - res <- testthat::evaluate_promise( - orderly_location_pull_packet(id[[2]], root = root$dst)) - expect_match(res$messages, "Found 1 file on disk", all = FALSE) - expect_match(res$messages, "Need to fetch 2 files.+from 1 location", - all = FALSE) + withr::with_options(list(orderly.quiet = FALSE), { + res <- testthat::evaluate_promise( + orderly_location_pull_packet(id[[2]], root = root$dst)) + expect_match(res$messages, "Found 1 file on disk", all = FALSE) + expect_match(res$messages, "Need to fetch 2 files.+from 1 location", + all = FALSE) + }) })