diff --git a/DESCRIPTION b/DESCRIPTION index 8ffc3210..5270b0e8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,6 +17,7 @@ BugReports: https://github.com/mrc-ide/orderly2/issues Imports: R6, cli, + diffobj, fs, gert (>= 1.9.3), jsonlite, @@ -35,6 +36,7 @@ Suggests: pkgload, processx, rmarkdown, + stringr, testthat (>= 3.0.0) Config/testthat/edition: 3 Remotes: diff --git a/NAMESPACE b/NAMESPACE index 4c96b240..00a3f725 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,11 +1,14 @@ # Generated by roxygen2: do not edit by hand +S3method(format,orderly_compare_packets) S3method(format,orderly_query) S3method(print,orderly_cleanup_status) +S3method(print,orderly_compare_packets) S3method(print,orderly_query_explain) export(orderly_artefact) export(orderly_cleanup) export(orderly_cleanup_status) +export(orderly_compare_packets) export(orderly_config) export(orderly_config_set) export(orderly_copy_files) diff --git a/R/compare.R b/R/compare.R new file mode 100644 index 00000000..e8b671e7 --- /dev/null +++ b/R/compare.R @@ -0,0 +1,211 @@ +is_binary_file <- function(path, n = 1024) { + # This is a pretty crude heuristic, but it seems good enough. + # It is actually similar to what the `diff` tool does. + data <- readBin(path, "raw", n) + as.raw(0) %in% data +} + +compare_metadata <- function(target, current) { + # id and time will almost always differ, but not in any interesting way. + # files may differ (especially the hashes), but we compare the files in + # detail seperately. + exclude <- c("id", "time", "files") + + target_filtered <- target[!(names(target) %in% exclude)] + current_filtered <- current[!(names(current) %in% exclude)] + if (!identical(target_filtered, current_filtered)) { + diffobj::diffPrint( + target_filtered, + current_filtered, + tar.banner = target$id, + cur.banner = current$id, + rds = FALSE, + mode = "unified", + style = list(pad = FALSE, wrap = FALSE), + interactive = FALSE) + } else { + NULL + } +} + +compare_filesets <- function(target, current) { + files <- merge(x = target, y = current, by = "path", all = TRUE) + status <- ifelse( + is.na(files$hash.x), "added", + ifelse(is.na(files$hash.y), "removed", + ifelse(files$hash.x == files$hash.y, "unchanged", "modified"))) + data.frame(path = files$path, status = status) +} + +compare_files <- function(target, current, files, root, search_options) { + if (is.null(files)) { + return(NULL) + } + + path_target <- withr::local_tempdir() + path_current <- withr::local_tempdir() + + # Copying the files into a temporary directory is fairly wasteful and, as + # long as the packet is unpacked already, we could read the files from the + # archive or file store directly. Nevertheless this makes accessing the files + # very straightforward, and covers the case where the file only exists + # remotely transparent. + + orderly_copy_files(target, dest = path_target, files = files, + options = search_options, root = root) + + orderly_copy_files(current, dest = path_current, files = files, + options = search_options, root = root) + + ret <- lapply(files, function(p) { + if (is_binary_file(file.path(path_target, p)) || + is_binary_file(file.path(path_current, p))) { + NULL + } else { + diffobj::diffFile( + file.path(path_target, p), + file.path(path_current, p), + tar.banner = file.path(target, p), + cur.banner = file.path(current, p), + rds = FALSE, + mode = "unified", + style = list(pad = FALSE, wrap = FALSE), + interactive = FALSE) + } + }) + names(ret) <- files + + ret +} + +##' Compare the metadata and contents of two packets. +##' +##' Insignificant differences in the metadata (eg. different dates and packet +##' IDs) are excluded from the comparison. +##' +##' If either packet is not unpacked, but `search_options`'s `allow_remote` is +##' `TRUE`, we will try to request files from remote locations, as necessary. +##' +##' @title Compare two packets +##' +##' @param target The id of the packet to use in the comparison. +##' @param current The id of the other packet against which to compare. +##' @param what One of "everything" (the default), "metadata", "files" or +##' "artefacts", retricting what components of the packet to compare. This is +##' useful when it is known for example that the source code of a report what +##' changed, and one is only interested in the effect on its output. +##' +##' @inheritParams orderly_metadata +##' +##' @return If the packets have identical contents, TRUE is returned. Otherwise +##' an object detailing the differences is returned. While the object can be +##' inspected, its contents is subject to change. In both cases, the returned +##' value has class `orderly_compare_packets`, allowing a user friendly +##' display of the result. +##' +##' @export +orderly_compare_packets <- function( + target, current, what = c("everything", "metadata", "files", "artefacts"), + search_options = NULL, root = NULL, locate = TRUE) { + validate_outpack_id(target, call = environment()) + validate_outpack_id(current, call = environment()) + what <- rlang::arg_match(what) + + root <- root_open(root, locate = locate, require_orderly = FALSE, + call = environment()) + + meta_target <- orderly_metadata(target, root = root) + meta_current <- orderly_metadata(current, root = root) + if (what %in% c("everything", "metadata")) { + metadata_diff <- compare_metadata(meta_target, meta_current) + } else { + metadata_diff <- NULL + } + + if (what == "artefacts") { + if (is.null(meta_target$custom$orderly) || + is.null(meta_current$custom$orderly)) { + cli::cli_abort("Cannot compare artefacts of non-orderly packets") + } + + artefacts_target <- unlist(meta_target$custom$orderly$artefacts$paths) + artefacts_current <- unlist(meta_current$custom$orderly$artefacts$paths) + files <- compare_filesets( + meta_target$files[meta_target$files$path %in% artefacts_target, ], + meta_current$files[meta_current$files$path %in% artefacts_current, ]) + } else if (what %in% c("everything", "files")) { + files <- compare_filesets(meta_target$files, meta_current$files) + } else { + files <- data.frame(path = NULL, status = NULL) + } + + if (is.null(metadata_diff) && all(files$status == "unchanged")) { + ret <- TRUE + } else { + idx <- files$status == "modified" + files$diff[idx] <- compare_files(target, current, files[idx, ]$path, + search_options = search_options, + root = root) + + ret <- list(packets = c(target = target, current = current), + metadata_diff = metadata_diff, + files = files) + } + + class(ret) <- "orderly_compare_packets" + + ret +} + + +#' @export +format.orderly_compare_packets <- function(x) { + cli::cli_format_method({ + if (isTRUE(x)) { + cli::cli_alert_success("Packets are identical") + } else { + target <- x$packets[[1]] + current <- x$packets[[2]] + + cli::cli_alert_info("Comparing packets {target} and {current}") + + if (!is.null(x$metadata_diff)) { + cli::cli_alert_warning("Packet metadata differs:") + cli::cli_div(theme = list(div = list("margin-left" = 2))) + cli::cli_verbatim(as.character(x$metadata_diff)) + cli::cli_end() + } + + removed <- x$files[x$files$status == "removed", ] + if (nrow(removed) > 0) { + cli::cli_alert_warning("The following files only exist in packet {current}:") + cli::cli_ul(removed$path) + } + + added <- x$files[x$files$status == "added", ] + if (nrow(added) > 0) { + cli::cli_alert_warning("The following files only exist in packet {target}:") + cli::cli_ul(added$path) + } + + modified <- x$files[x$files$status == "modified", ] + if (nrow(modified) > 0) { + cli::cli_alert_warning("The following files exist in both packets but have different contents:") + cli::cli_ul() + for (i in seq(nrow(modified))) { + cli::cli_li("{modified$path[[i]]}") + if (!is.null(modified$diff[[i]])) { + cli::cli_verbatim(as.character(modified$diff[[i]])) + } + } + cli::cli_end() + } + } + }) +} + + +#' @export +print.orderly_compare_packets <- function(x, ...) { + cat(format(x, ...), sep = "\n") +} diff --git a/R/util.R b/R/util.R index ee7e8f24..e0928989 100644 --- a/R/util.R +++ b/R/util.R @@ -646,6 +646,7 @@ is_testing <- function() { } #' Given a character vector, missing names are filled using the value. +#' @noRd fill_missing_names <- function(x) { if (is.null(names(x))) { names(x) <- x diff --git a/man/orderly_compare_packets.Rd b/man/orderly_compare_packets.Rd new file mode 100644 index 00000000..a03de707 --- /dev/null +++ b/man/orderly_compare_packets.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/compare.R +\name{orderly_compare_packets} +\alias{orderly_compare_packets} +\title{Compare two packets} +\usage{ +orderly_compare_packets( + target, + current, + what = c("everything", "metadata", "files", "artefacts"), + search_options = NULL, + root = NULL, + locate = TRUE +) +} +\arguments{ +\item{target}{The id of the packet to use in the comparison.} + +\item{current}{The id of the other packet against which to compare.} + +\item{what}{One of "everything" (the default), "metadata", "files" or +"artefacts", retricting what components of the packet to compare. This is +useful when it is known for example that the source code of a report what +changed, and one is only interested in the effect on its output.} + +\item{root}{The path to the root directory, or \code{NULL} (the +default) to search for one from the current working directory if +\code{locate} is \code{TRUE}. This function does not require that the +directory is configured for orderly, and can be any \code{outpack} +root (see \link{orderly_init} for details).} + +\item{locate}{Logical, indicating if the root should be searched +for. If \code{TRUE}, then we looks in the directory given for \code{root} +(or the working directory if \code{NULL}) and then up through its +parents until it finds an \code{.outpack} directory or +\code{orderly_config.yml}} +} +\value{ +If the packets have identical contents, TRUE is returned. Otherwise +an object detailing the differences is returned. While the object can be +inspected, its contents is subject to change. In both cases, the returned +value has class \code{orderly_compare_packets}, allowing a user friendly +display of the result. +} +\description{ +Compare the metadata and contents of two packets. +} +\details{ +Insignificant differences in the metadata (eg. different dates and packet +IDs) are excluded from the comparison. + +If either packet is not unpacked, but \code{search_options}'s \code{allow_remote} is +\code{TRUE}, we will try to request files from remote locations, as necessary. +} diff --git a/man/orderly_shared_resource.Rd b/man/orderly_shared_resource.Rd index a30d15b0..167b4724 100644 --- a/man/orderly_shared_resource.Rd +++ b/man/orderly_shared_resource.Rd @@ -7,9 +7,16 @@ orderly_shared_resource(...) } \arguments{ -\item{...}{Named arguments corresponding to shared resources to -copy. The name will be the destination filename, while the value -is the filename within the shared resource directory.} +\item{...}{The shared resources to copy. If arguments are named, the name +will be the destination file while the value is the filename within the +shared resource directory. + +You can use a limited form of string interpolation in the names of +this argument; using \verb{$\{variable\}} will pick up values from +\code{envir} and substitute them into your string. This is similar +to the interpolation you might be familiar with from +\code{glue::glue} or similar, but much simpler with no concatenation +or other fancy features supported.} } \value{ Invisibly, a data.frame with columns \code{here} (the fileames diff --git a/tests/testthat/_snaps/compare.md b/tests/testthat/_snaps/compare.md new file mode 100644 index 00000000..68689446 --- /dev/null +++ b/tests/testthat/_snaps/compare.md @@ -0,0 +1,83 @@ +# Comparing a packet to itself returns TRUE + + Code + print(result) + Output + v Packets are identical + +# Comparing packets ignores ID and time differences + + Code + print(result) + Output + v Packets are identical + +# Can compare packets with different metadata + + Code + print(all) + Output + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 + ! Packet metadata differs: + < 19700101-000000-00000001 + > 19700101-000000-00000002 + @@ 7,5 / 7,5 @@ + $parameters + $parameters$A + < [1] "foo" + > [1] "bar" + + + +# Can compare packets with different file contents + + Code + print(all) + Output + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 + ! The following files exist in both packets but have different contents: + * data.R + < 19700101-000000-00000001/data.R + > 19700101-000000-00000002/data.R + @@ 1 / 1 @@ + < writeLines("Hello", "data.txt") + > writeLines("World", "data.txt") + * data.txt + < 19700101-000000-00000001/data.txt + > 19700101-000000-00000002/data.txt + @@ 1 / 1 @@ + < Hello + > World + +# Can compare artefacts only + + Code + print(files) + Output + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 + ! The following files exist in both packets but have different contents: + * data.R + < 19700101-000000-00000001/data.R + > 19700101-000000-00000002/data.R + @@ 1,4 / 1,4 @@ + { + orderly_artefact("Output", "output.txt") + < writeLines(toString(2 + 1), "output.txt") + > writeLines(toString(1 + 2), "output.txt") + } + +# Can compare packets with binary contents + + Code + print(result) + Output + i Comparing packets 19700101-000000-00000001 and 19700101-000000-00000002 + ! The following files exist in both packets but have different contents: + * data.rds + * data.txt + < 19700101-000000-00000001/data.txt + > 19700101-000000-00000002/data.txt + @@ 1 / 1 @@ + < Hello + > World + diff --git a/tests/testthat/helper-orderly.R b/tests/testthat/helper-orderly.R index 373786c6..0040e582 100644 --- a/tests/testthat/helper-orderly.R +++ b/tests/testthat/helper-orderly.R @@ -125,3 +125,10 @@ orderly_init_quietly <- function(...) { orderly_run_quietly <- function(..., echo = FALSE) { suppressMessages(orderly_run(..., echo = echo)) } + +orderly_run_snippet <- function(root, name, expr, ...) { + fs::dir_create(file.path(root$path, "src", name)) + writeLines(deparse(substitute(expr)), + file.path(root$path, "src", name, sprintf("%s.R", name))) + orderly_run_quietly(name, root = root, ...) +} diff --git a/tests/testthat/helper-outpack.R b/tests/testthat/helper-outpack.R index c6c5e9cc..d7aebccb 100644 --- a/tests/testthat/helper-outpack.R +++ b/tests/testthat/helper-outpack.R @@ -185,3 +185,18 @@ forcibly_truncate_file <- function(path) { file.create(path) fs::file_chmod(path, permissions) } + +#' Scrub packets from an output. +#' +#' This returns a transformation suitable to be passed to `expect_snapshot`. +#' The specified packet IDs are removed from the output, and replaced with +#' stable strings of the same length. +#' +#' @param ... the list of packet IDs to remove from the output. +#' @noRd +scrub_packets <- function(...) { + ids <- c(...) + replacements <- sprintf("19700101-000000-%08x", seq_along(ids)) + names(replacements) <- ids + function(x) stringr::str_replace_all(x, replacements) +} diff --git a/tests/testthat/test-compare.R b/tests/testthat/test-compare.R new file mode 100644 index 00000000..8575acd6 --- /dev/null +++ b/tests/testthat/test-compare.R @@ -0,0 +1,161 @@ +test_that("Comparing a packet to itself returns TRUE", { + root <- create_temporary_root() + id <- create_random_packet(root) + + result <- orderly_compare_packets(id, id, root = root) + expect_true(result) + expect_snapshot(print(result), transform = scrub_packets(id)) +}) + + +test_that("Comparing packets ignores ID and time differences", { + root <- create_temporary_root() + id1 <- create_deterministic_packet(root) + id2 <- create_deterministic_packet(root) + + meta1 <- orderly_metadata(id1, root = root) + meta2 <- orderly_metadata(id2, root = root) + expect_false(isTRUE(all.equal(meta1$id, meta2$id))) + expect_false(isTRUE(all.equal(meta1$time, meta2$time))) + + result <- orderly_compare_packets(id1, id2, root = root) + expect_true(result) + expect_snapshot(print(result), transform = scrub_packets(id1, id2)) +}) + + +test_that("Can compare packets with different metadata", { + root <- create_temporary_root() + + p1 <- create_deterministic_packet(root, "data", list(A = "foo")) + p2 <- create_deterministic_packet(root, "data", list(A = "bar")) + + all <- orderly_compare_packets(p1, p2, root = root) + metadata <- orderly_compare_packets(p1, p2, what = "metadata", root = root) + files <- orderly_compare_packets(p1, p2, what = "files", root = root) + + expect_false(isTRUE(all)) + expect_false(isTRUE(metadata)) + expect_true(files) + + expect_snapshot(print(all), transform = scrub_packets(p1, p2)) +}) + + +test_that("Can compare packets with different file contents", { + root <- create_temporary_root() + + p1 <- orderly_run_snippet(root, "data", writeLines("Hello", "data.txt")) + p2 <- orderly_run_snippet(root, "data", writeLines("World", "data.txt")) + + all <- orderly_compare_packets(p1, p2, root = root) + metadata <- orderly_compare_packets(p1, p2, what = "metadata", root = root) + files <- orderly_compare_packets(p1, p2, what = "files", root = root) + + expect_false(isTRUE(all)) + expect_true(metadata) + expect_false(isTRUE(files)) + + expect_snapshot(print(all), transform = scrub_packets(p1, p2)) +}) + + +test_that("Can compare artefacts only", { + root <- create_temporary_root() + + p1 <- orderly_run_snippet(root, "data", { + orderly_artefact("Output", "output.txt") + writeLines(toString(2 + 1), "output.txt") + }) + + p2 <- orderly_run_snippet(root, "data", { + orderly_artefact("Output", "output.txt") + writeLines(toString(1 + 2), "output.txt") + }) + + files <- orderly_compare_packets(p1, p2, what = "files", root = root) + artefacts <- orderly_compare_packets(p1, p2, what = "artefacts", root = root) + + # The packet files, in particular the source code, are different. However the + # different snippets produce identical artefacts. + expect_false(isTRUE(files)) + expect_true(artefacts) + + expect_snapshot(print(files), transform = scrub_packets(p1, p2)) +}) + + +test_that("Can detect newly declared artefact", { + root <- create_temporary_root() + + p1 <- orderly_run_snippet(root, "data", { + writeLines("Hello", "hello.txt") + }) + + p2 <- orderly_run_snippet(root, "data", { + orderly_artefact("Output", "hello.txt") + writeLines("Hello", "hello.txt") + }) + + files <- orderly_compare_packets(p1, p2, what = "files", root = root) + artefacts <- orderly_compare_packets(p1, p2, what = "artefacts", root = root) + + f <- files$files[files$files$path == "hello.txt", ] + expect_equal(f$status, "unchanged") + + # When comparing artefacts, the file is reported as "added", because while it + # exists in the original packet, it was not an artefact. + f <- artefacts$files[artefacts$files$path == "hello.txt", ] + expect_equal(f$status, "added") +}) + + +test_that("Can compare packets with binary contents", { + root <- create_temporary_root() + + p1 <- orderly_run_snippet(root, "data", { + orderly_artefact("Outputs", c("data.txt", "data.rds")) + writeLines("Hello", "data.txt") + saveRDS(1:10, "data.rds") + }) + + p2 <- orderly_run_snippet(root, "data", { + orderly_artefact("Outputs", c("data.txt", "data.rds")) + writeLines("World", "data.txt") + saveRDS(11:20, "data.rds") + }) + + result <- orderly_compare_packets(p1, p2, what = "artefacts", root = root) + expect_false(isTRUE(result)) + + # We can't render a diff for binary files, so these are set to NULL, unlike + # the text file. + text <- result$files[result$files$path == "data.txt", ] + binary <- result$files[result$files$path == "data.rds", ] + expect_false(is.null(text$diff[[1]])) + expect_true(is.null(binary$diff[[1]])) + + expect_snapshot(print(result), transform = scrub_packets(p1, p2)) +}) + + +test_that("Cannot compare artefacts of non-orderly packets", { + root <- create_temporary_root() + + src <- withr::local_tempdir() + writeLines("World", file.path(src, "data.txt")) + p <- outpack_packet_start_quietly(src, "data", root = root) + outpack_packet_end_quietly(p) + + # Only orderly packets have the right metadata needed to infer what is or + # isn't an artefact. + expect_error( + orderly_compare_packets(p$id, p$id, what = "artefacts", root = root), + "Cannot compare artefacts of non-orderly packets") + + # We can still do full packet comparison, even without the orderly-specific + # metadata. + expect_no_error(orderly_compare_packets(p$id, p$id, root = root)) + expect_no_error( + orderly_compare_packets(p$id, p$id, what = "files", root = root)) +})