From 459390242ba2eab4c6b9df6863e70d5746337244 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20Li=C3=A9tar?= <pl2113@ic.ac.uk> Date: Wed, 23 Oct 2024 15:06:25 +0100 Subject: [PATCH] WIP --- DESCRIPTION | 2 +- NAMESPACE | 2 + R/compare.R | 354 ++++++++++++++------------------- man/orderly_compare_packets.Rd | 28 +-- 4 files changed, 162 insertions(+), 224 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c1570a8c..c016dbef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: orderly2 Title: Orderly Next Generation -Version: 1.99.49 +Version: 1.99.50 Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"), email = "rich.fitzjohn@gmail.com"), person("Robert", "Ashton", role = "aut"), diff --git a/NAMESPACE b/NAMESPACE index 804ca6aa..40c895bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,9 @@ # Generated by roxygen2: do not edit by hand +S3method(format,orderly_comparison) S3method(format,orderly_query) S3method(print,orderly_cleanup_status) +S3method(print,orderly_comparison) S3method(print,orderly_query_explain) export(orderly_artefact) export(orderly_cleanup) diff --git a/R/compare.R b/R/compare.R index c78423c3..d332714b 100644 --- a/R/compare.R +++ b/R/compare.R @@ -1,31 +1,10 @@ is_binary_file <- function(path, n = 1024) { - # This is a pretty crude heuristic, but it seems good enough. - # It is actually similar to what the `diff` tool does. - data <- readBin(path, "raw", n) - as.raw(0) %in% data -} - -compare_metadata <- function(target, current) { - # id and time will almost always differ, but not in any interesting way. - # files may differ (especially the hashes), but we compare the files in - # detail seperately. - exclude <- c("id", "time", "files") - - target_filtered <- target[!(names(target) %in% exclude)] - current_filtered <- current[!(names(current) %in% exclude)] - if (!identical(target_filtered, current_filtered)) { - diffobj::diffPrint( - target_filtered, - current_filtered, - tar.banner = target$id, - cur.banner = current$id, - rds = FALSE, - mode = "unified", - style = list(pad = FALSE, wrap = FALSE), - interactive = FALSE) - } else { - NULL - } + vlapply(path, function(p) { + # This is a pretty crude heuristic, but it seems good enough. + # It is actually similar to what the `diff` tool does. + data <- readBin(path, "raw", n) + as.raw(0) %in% data + }) } compare_filesets <- function(target, current) { @@ -37,43 +16,6 @@ compare_filesets <- function(target, current) { data.frame(path = files$path, status = status) } -compare_files <- function(target, current, files, root, search_options) { - path_target <- withr::local_tempdir() - path_current <- withr::local_tempdir() - - # Copying the files into a temporary directory is fairly wasteful and, as - # long as the packet is unpacked already, we could read the files from the - # archive or file store directly. Nevertheless this makes accessing the files - # very straightforward, and covers the case where the file only exists - # remotely transparent. - - orderly_copy_files(target, dest = path_target, files = files, - options = search_options, root = root) - - orderly_copy_files(current, dest = path_current, files = files, - options = search_options, root = root) - - ret <- lapply(files, function(p) { - if (is_binary_file(file.path(path_target, p)) || - is_binary_file(file.path(path_current, p))) { - NULL - } else { - diffobj::diffChr( - read_file_lossy(file.path(path_target, p)), - read_file_lossy(file.path(path_current, p)), - tar.banner = file.path(target, p), - cur.banner = file.path(current, p), - rds = FALSE, - mode = "unified", - style = list(pad = FALSE, wrap = FALSE), - interactive = FALSE) - } - }) - names(ret) <- files - - ret -} - ##' Compare the metadata and contents of two packets. ##' ##' Insignificant differences in the metadata (eg. different dates and packet @@ -102,173 +44,173 @@ compare_files <- function(target, current, files, root, search_options) { ##' difference was found. ##' ##' @export -orderly_compare_packets <- function( - target, current, search_options = NULL, root = NULL, locate = TRUE, - what = c("metadata", "files")) { - what <- rlang::arg_match(what, multiple = TRUE, - values = c("metadata", "files", "artefacts")) - if (length(what) == 0) { - cli::cli_abort("{.code what} must not be empty") - } - if ("artefacts" %in% what && "files" %in% what) { - cli::cli_abort('{.code what} must contain both "files" and "artefacts"') - } - +orderly_compare_packets <- function(target, current, + root = NULL, + location = NULL, + allow_remote = NULL, + pull_metadata = NULL) { + root <- root_open(root, require_orderly = FALSE) validate_outpack_id(target, call = environment()) validate_outpack_id(current, call = environment()) - root <- root_open(root, locate = locate, require_orderly = FALSE, - call = environment()) + ret <- list( + root = root, + target = orderly_metadata(target, root = root), + current = orderly_metadata(current, root = root)) + class(ret) <- "orderly_comparison" + ret +} - meta_target <- orderly_metadata(target, root = root) - meta_current <- orderly_metadata(current, root = root) +compare_files <- function(x, verbose, name) { + diff <- compare_filesets(x$target$files, x$current$files) - if ("metadata" %in% what) { - metadata <- compare_metadata(meta_target, meta_current) - } else { - metadata <- NULL - } + modified <- diff[diff$status == "modified", ] + target_only <- diff[diff$status == "removed", ] + current_only <- diff[diff$status == "added", ] - if ("files" %in% what) { - files <- compare_filesets(meta_target$files, meta_current$files) - } else if ("artefacts" %in% what) { - if (is.null(meta_target$custom$orderly) || - is.null(meta_current$custom$orderly)) { - cli::cli_abort("Cannot compare artefacts of non-orderly packets") + if (nrow(target_only) > 0) { + cli::cli_alert_info("The following files only exist in packet {x$target$id}") + cli::cli_ul(target_only$path) + } + if (nrow(current_only) > 0) { + cli::cli_alert_info("The following files only exist in packet {x$current$id}") + cli::cli_ul(current_only$path) + } + if (nrow(modified) > 0) { + if (verbose) { + compare_file_contents(x, modified$path) + } else { + cli::cli_alert_info("The following files exist in both packets but have different contents:") + cli::cli_ul(modified$path) + cli::cli_alert_info("Use {.code orderly_comparison_explain({name}, \"files\", verbose = TRUE)} to compare the files' contents.") } + } + if (nrow(modified) == 0 && nrow(target_only) == 0 && nrow(current_only) == 0) { + cli::cli_alert_info("The files across the two packets are identical.") + } +} - artefacts_target <- unlist(meta_target$custom$orderly$artefacts$paths) - artefacts_current <- unlist(meta_current$custom$orderly$artefacts$paths) - files <- compare_filesets( - meta_target$files[meta_target$files$path %in% artefacts_target, ], - meta_current$files[meta_current$files$path %in% artefacts_current, ]) - } else { - files <- data.frame(path = character(0), status = character(0)) +compare_file_contents <- function(x, files) { + path_target <- withr::local_tempdir() + path_current <- withr::local_tempdir() + + # Copying the files into a temporary directory is fairly wasteful and, as + # long as the packet is unpacked already, we could read the files from the + # archive or file store directly. Nevertheless this makes accessing the files + # very straightforward, and covers the case where the file only exists + # remotely transparent. + + orderly_copy_files(x$target$id, dest = path_target, files = files, root = root) + orderly_copy_files(x$current$id, dest = path_current, files = files, root = root) + + binary_files <- is_binary_file(file.path(path_target, files)) || + is_binary_file(file.path(path_current, files)) + + for (f in files[!binary_files]) { + cli::cli_verbatim(diffobj::diffChr( + read_file_lossy(file.path(path_target, f)), + read_file_lossy(file.path(path_current, f)), + tar.banner = file.path(x$target$id, f), + cur.banner = file.path(x$current$id, f), + rds = FALSE, + mode = "unified")) } - idx <- files$status == "modified" - files$diff[idx] <- compare_files(target, current, files[idx, ]$path, - search_options = search_options, - root = root) + if (any(binary_files)) { + cli::cli_alert_warning("The following files differ across packets, but could not be compared as their content is binary:") + cli::cli_ul(files[binary_files]) + } +} - orderly_packet_diff$new(what, target, current, metadata, files) +compare_metadata_attribute <- function(x, attribute, verbose, name) { + if (attribute == "files") { + cli::cli_alert_info("Comparing files") + compare_files(x, verbose, name) + } else { + cli::cli_alert_info("Comparing attribute {.code {attribute}}") + cli::cli_verbatim(as.character(diffobj::diffPrint( + x$target[[attribute]], + x$current[[attribute]], + tar.banner = sprintf("%s$%s", x$target$id, attribute), + cur.banner = sprintf("%s$%s", x$current$id, attribute), + rds = FALSE, + mode = "unified" + ))) + } } +compare_metadata_attribute_list <- function(target, current) { + target_names <- setdiff(names(target), c("id", "time")) + current_names <- setdiff(names(current), c("id", "time")) -orderly_packet_diff <- R6::R6Class( - "orderly_packet_diff", - private = list( - what = NULL, - target = NULL, - current = NULL, - metadata = NULL, - files = NULL, + intersected <- intersect(target_names, current_names) + differences <- vlapply(intersected, function(n) { + !identical(target[[n]], current[[n]]) + }) - print_metadata = function() { - if (!is.null(private$metadata)) { - cli::cli_alert_warning("Packet metadata differs:") - cli::cli_div(theme = list(div = list("margin-left" = 2))) - cli::cli_verbatim(as.character(private$metadata)) - cli::cli_end() - } - }, + target_only <- setdiff(target_names, current_names) + current_only <- setdiff(current_names, target_names) - print_files = function(verbose) { - name <- if ("artefacts" %in% private$what) { - "artefacts" - } else { - "files" - } + list(differences = intersected[differences], + target_only = target_only, + current_only = current_only) +} - removed <- private$files[private$files$status == "removed", ] - if (nrow(removed) > 0) { - cli::cli_alert_warning( - "The following {name} only exist in packet {private$current}:") - cli::cli_ul(removed$path) - } +##' @export +format.orderly_comparison <- function(x, ..., name = deparse(substitute(x))) { + cli::cli_format_method({ + cli::cli_alert_info("Comparing packets {x$target$id} and {x$current$id}...") + cli::cli_div(theme = list(div = list("margin-left" = 2))) - added <- private$files[private$files$status == "added", ] - if (nrow(added) > 0) { - cli::cli_alert_warning( - "The following {name} only exist in packet {private$target}:") - cli::cli_ul(added$path) - } + diff <- compare_metadata_attribute_list(x$target, x$current) - modified <- private$files[private$files$status == "modified", ] - if (nrow(modified) > 0) { - binary <- sapply(modified$diff, is.null) + if (length(diff$differences) > 0) { + cli::cli_alert_info("The following attributes are different across the two packets:") + cli::cli_ul(diff$differences) + } - cli::cli_alert_warning( - paste("The following {name} exist in both packets but have", - "different contents:")) + if (length(diff$target_only) > 0) { + cli::cli_alert_info("The following attributes only exist in packet {x$target$only}:") + cli::cli_ul(diff$target_only) + } - cli::cli_ul() - for (i in seq_len(nrow(modified))) { - cli::cli_li("{modified$path[[i]]}") - if (verbose) { - if (!binary[[i]]) { - cli::cli_div(theme = list(div = list("margin-left" = 2))) - cli::cli_verbatim(as.character(modified$diff[[i]])) - cli::cli_end() - } - } - } - if (verbose && any(binary)) { - cli::cli_alert_warning( - "Contents of binary file{?s} {modified$path[binary]} were omitted") - } - if (!verbose) { - cli::cli_alert_info(paste( - "Print the comparison with {.code verbose = TRUE} to display the", - "differences in the {name}' contents")) - } - cli::cli_end() - } + if (length(diff$current_only) > 0) { + cli::cli_alert_info("The following attributes only exist in packet {x$current$only}:") + cli::cli_ul(diff$current_only) } - ), - public = list( - initialize = function(what, target, current, metadata, files) { - private$what <- what - private$target <- target - private$current <- current - private$metadata <- metadata - private$files <- files - }, + if (length(diff$differences) > 0 || + length(diff$target_only) > 0 || + length(diff$current_only) > 0) { + cli::cli_alert_info("Use {.code orderly_comparison_explain({name})} to examine the differences in detail.") + } else { + cli::cli_alert_success("The two packets are identical, up to trivial differences.") + } + }) +} - is_equal = function() { - is.null(private$metadata) && all(private$files$status == "unchanged") - }, +##' @export +print.orderly_comparison <- function(x, ..., name = deparse(substitute(x))) { + cat(format(x, ..., name = name), sep = "\n") +} - format = function(verbose = FALSE, ...) { - target <- private$target - current <- private$current +orderly_comparison_explain <- function(x, attributes = NULL, ..., verbose = FALSE, name = deparse(substitute(x))) { + rlang::check_dots_empty() - cli::cli_format_method({ - if (self$is_equal()) { - msg <- if (setequal(private$what, c("metadata", "files"))) { - "Packets {target} and {current} are identical" - } else if (identical(private$what, "metadata")) { - "Metadata of packets {target} and {current} is identical" - } else if (identical(private$what, "files")) { - "Files of packets {target} and {current} are identical" - } else if (identical(private$what, "artefacts")) { - "Artefacts of packets {target} and {current} are identical" - } else if (setequal(private$what, c("metadata", "artefacts"))) { - paste("Metadata and artefacts of packets {target} and {current}", - "are identical") - } else { - stop("Unhandled combination of `what`") - } - cli::cli_alert_success(msg) - } else { - cli::cli_alert_info( - "Comparing packets {private$target} and {private$current}") + cli::cli_alert_info("Comparing packets {x$target$id} and {x$current$id}...") - private$print_metadata() - private$print_files(verbose = verbose) - } - }) + if (is.null(attributes)) { + diff <- compare_metadata_attribute_list(x$target, x$current) + attributes <- union(diff$differences, + union(diff$target_only, diff$current_only)) + if (length(attributes) == 0) { + cli::cli_alert_success("The two packets are identical, up to trivial differences.") + return() } - ) -) + } + + for (n in attributes) { + compare_metadata_attribute( + x, attribute = n, verbose = verbose, name = name) + } +} diff --git a/man/orderly_compare_packets.Rd b/man/orderly_compare_packets.Rd index 84194223..8d7e71e7 100644 --- a/man/orderly_compare_packets.Rd +++ b/man/orderly_compare_packets.Rd @@ -7,10 +7,10 @@ orderly_compare_packets( target, current, - search_options = NULL, root = NULL, - locate = TRUE, - what = c("metadata", "files") + location = NULL, + allow_remote = NULL, + pull_metadata = NULL ) } \arguments{ @@ -18,26 +18,20 @@ orderly_compare_packets( \item{current}{The id of the other packet against which to compare.} -\item{search_options}{Options for locating packet files. If there are no -copies of the files locally, they can be downloaded automatically from a -remote location on-demand if \code{allow_remote} is \code{TRUE}.} - \item{root}{The path to the root directory, or \code{NULL} (the -default) to search for one from the current working directory if -\code{locate} is \code{TRUE}. This function does not require that the -directory is configured for orderly, and can be any \code{outpack} -root (see \link{orderly_init} for details).} - -\item{locate}{Logical, indicating if the root should be searched -for. If \code{TRUE}, then we looks in the directory given for \code{root} -(or the working directory if \code{NULL}) and then up through its -parents until it finds an \code{.outpack} directory or -\code{orderly_config.yml}} +default) to search for one from the current working +directory. This function does not require that the directory is +configured for orderly, and can be any \code{outpack} root (see +\link{orderly_init} for details).} \item{what}{One or more of "metadata", "files" and "artefacts", retricting what components of the packet to compare. This is useful when it is known for example that the source code of a report what changed, and one is only interested in the effect on its output.} + +\item{search_options}{Options for locating packet files. If there are no +copies of the files locally, they can be downloaded automatically from a +remote location on-demand if \code{allow_remote} is \code{TRUE}.} } \value{ An R6 object of class \code{orderly_packet_diff} is returned. Printing