diff --git a/DESCRIPTION b/DESCRIPTION index 1ece516..a6bef21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: R6, redux, rrq (>= 0.7.15), + stringr, withr Suggests: fs, diff --git a/R/api.R b/R/api.R index 4b89b24..dcfdea8 100644 --- a/R/api.R +++ b/R/api.R @@ -93,25 +93,13 @@ repository_branches <- function(repositories_base_path, url) { ##' query ref :: string ##' state root :: root report_list <- function(root, ref) { - contents <- gert::git_ls(root, ref = ref) - re <- "^src/([^/]+)/(\\1|orderly)\\.R$" - nms <- sub(re, "\\1", - grep(re, contents$path, value = TRUE, perl = TRUE), - perl = TRUE - ) - last_changed <- function(nm) { - max(contents$modified[startsWith(contents$path, sprintf("src/%s", nm))]) - } - updatedTime <- vnapply(nms, last_changed, USE.NAMES = FALSE) - modified_sources <- git_get_modified(ref, relative_dir = "src/", repo = root) - modified_reports <- unique(first_dirname(modified_sources)) - hasModifications <- vlapply(nms, function(report_name) { - report_name %in% modified_reports - }, USE.NAMES = FALSE) + base <- git_remote_default_branch_ref(root) + reports <- get_reports(root = root, ref = ref, base = base) + data.frame( - name = nms, - updatedTime = updatedTime, - hasModifications = hasModifications + name = reports$name, + updatedTime = as.numeric(reports$updated_at), + hasModifications = reports$has_changes ) } diff --git a/R/git.R b/R/git.R index 750e656..154dc33 100644 --- a/R/git.R +++ b/R/git.R @@ -43,13 +43,13 @@ git_remote_list_branches <- function(repo) { branches } -git_run <- function(args, repo = NULL, check = FALSE) { +git_run <- function(args, repo = NULL) { git <- sys_which("git") if (!is.null(repo)) { args <- c("-C", repo, args) } res <- system3(git, args) - if (check && !res$success) { + if (!res$success) { stop(sprintf("Error code %d running command:\n%s", res$code, paste0(" > ", res$output, collapse = "\n"))) } @@ -76,21 +76,37 @@ git_remote_default_branch_name <- function(repo) { } -git_get_modified <- function(ref, base = NULL, - relative_dir = NULL, repo = NULL) { - if (is.null(base)) { - base <- git_remote_default_branch_ref(repo) - } - if (is.null(relative_dir)) { - relative <- "" - additional_args <- "" - } else { - relative <- sprintf("--relative=%s", relative_dir) - additional_args <- sprintf("-- %s", relative_dir) - } - git_run( - c("diff", "--name-only", relative, - sprintf("%s...%s", base, gert::git_commit_id(ref, repo = repo)), - additional_args), - repo = repo, check = TRUE)$output +#' Get the last commit to have modified the given path. +#' +#' If the path is a directory, any modification to files contained within +#' it are considered. +#' +#' @param path path to the file of directory to search for +#' @param ref the Git commit from which to start the search. Only ancestors of +#' that commit will be considered. +#' @param repo the path to the Git repo to use. +git_get_latest_commit <- function(path, ref, repo = NULL) { + # libgit2 (and thus gert) doesn't really have an interface for this. + # See https://github.com/libgit2/libgit2/issues/495 + git_run(c("rev-list", "--max-count=1", ref, "--", path), + repo = repo)$output +} + + +#' Get the difference between two tree-ish +#' +#' @param left the tree-ish to use as the base for the comparison. +#' @param right the tree-ish to compare to the base. +#' @param repo the path to the Git repo to use. +#' @return a dataframe for each differing entry in the trees, with columns +#' `mode1`, `mode2`, `hash1`, `hash2`, `status`, `score`, `src` and `dst`. +git_diff_tree <- function(left, right, repo = NULL) { + output <- git_run(c("diff-tree", left, right), repo = repo)$output + + # See https://git-scm.com/docs/git-diff-tree#_raw_output_format for a + # description of the format. + re <- paste0( + "^:(?\\d+) (?\\d+) (?[0-9a-f]+) (?[0-9a-f]+)", + " (?[A-Z])(?\\d+)?\\t(?[^\\t]*)(?:\\t(?[^\\t]*))?$") + as.data.frame(stringr::str_match(output, re)[, -1, drop = FALSE]) } diff --git a/R/reports.R b/R/reports.R index 00bf0a3..c916886 100644 --- a/R/reports.R +++ b/R/reports.R @@ -1,8 +1,40 @@ +extract_report_names <- function(paths) { + re <- "^src/([^/]+)/(\\1|orderly)\\.R$" + sub(re, "\\1", grep(re, paths, value = TRUE, perl = TRUE)) +} + +get_reports <- function(ref, root, base = NULL) { + contents <- gert::git_ls(repo = root, ref = ref) + names <- extract_report_names(contents$path) + + paths <- paste0("src/", names) + times <- as.POSIXct(vnapply(paths, function(p) { + commit <- git_get_latest_commit(p, ref, repo = root) + gert::git_commit_info(commit, repo = root)$time + })) + + result <- data.frame( + name = names, + updated_at = times, + row.names = NULL + ) + + if (!is.null(base)) { + diff <- git_diff_tree(paste0(base, ":src"), + paste0(ref, ":src"), + repo = root) + result$has_changes <- names %in% diff$src + } + + result +} + + get_report_parameters <- function(name, ref, root) { path <- get_orderly_script_path(name, ref, root) sha <- gert::git_commit_id(ref, repo = root) contents <- git_run( - c("show", sprintf("%s:%s", sha, path)), repo = root, check = TRUE + c("show", sprintf("%s:%s", sha, path)), repo = root )$output exprs <- parse(text = contents) orderly2::orderly_parse_expr(exprs, filename = basename(path))$parameters diff --git a/docker/Dockerfile b/docker/Dockerfile index ff696d0..e39a6af 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,27 +1,15 @@ -FROM rocker/r-ver:4.1 +FROM rocker/r-ver:4.4 -RUN apt-get update && apt-get install -y --no-install-recommends \ - git \ - libcurl4-openssl-dev \ - libhiredis-dev \ - libssl-dev \ - zlib1g-dev \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +RUN install2.r pak -# Without this, we are unable to pick up more recent packages -COPY docker/Rprofile.site /usr/local/lib/R/etc/Rprofile.site - -COPY DESCRIPTION /tmp/DESCRIPTION - -RUN install2.r --error remotes && \ - Rscript -e 'remotes::install_deps("/tmp")' +COPY DESCRIPTION /src/DESCRIPTION +RUN Rscript -e "pak::local_install_deps('/src')" COPY . /src -RUN R CMD INSTALL --install-tests /src && rm -rf /src +RUN Rscript -e "pak::local_install('/src')" COPY docker/bin /usr/local/bin/ - + RUN git config --global --add safe.directory "*" RUN echo ".packit" > /.gitignore RUN git config --global core.excludesFile "/.gitignore" diff --git a/docker/Rprofile.site b/docker/Rprofile.site deleted file mode 100644 index 22b4c2f..0000000 --- a/docker/Rprofile.site +++ /dev/null @@ -1,4 +0,0 @@ -options(repos = c(CRAN = 'https://packagemanager.rstudio.com/all/__linux__/focal/latest'), download.file.method = 'libcurl') -options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), - paste(getRversion(), R.version$platform, - R.version$arch, R.version$os))) diff --git a/man/git_diff_tree.Rd b/man/git_diff_tree.Rd new file mode 100644 index 0000000..45e3743 --- /dev/null +++ b/man/git_diff_tree.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/git.R +\name{git_diff_tree} +\alias{git_diff_tree} +\title{Get the difference between two tree-ish} +\usage{ +git_diff_tree(left, right, repo = NULL) +} +\arguments{ +\item{left}{the tree-ish to use as the base for the comparison.} + +\item{right}{the tree-ish to compare to the base.} + +\item{repo}{the path to the Git repo to use.} +} +\value{ +a dataframe for each differing entry in the trees, with columns +\code{mode1}, \code{mode2}, \code{hash1}, \code{hash2}, \code{status}, \code{score}, \code{src} and \code{dst}. +} +\description{ +Get the difference between two tree-ish +} diff --git a/man/git_get_latest_commit.Rd b/man/git_get_latest_commit.Rd new file mode 100644 index 0000000..3bd5c4c --- /dev/null +++ b/man/git_get_latest_commit.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/git.R +\name{git_get_latest_commit} +\alias{git_get_latest_commit} +\title{Get the last commit to have modified the given path.} +\usage{ +git_get_latest_commit(path, ref, repo = NULL) +} +\arguments{ +\item{path}{path to the file of directory to search for} + +\item{ref}{the Git commit from which to start the search. Only ancestors of +that commit will be considered.} + +\item{repo}{the path to the Git repo to use.} +} +\description{ +If the path is a directory, any modification to files contained within +it are considered. +} diff --git a/tests/testthat/helper-orderly-runner.R b/tests/testthat/helper-orderly-runner.R index c42ece6..eca4c4c 100644 --- a/tests/testthat/helper-orderly-runner.R +++ b/tests/testthat/helper-orderly-runner.R @@ -165,8 +165,15 @@ git_add_and_commit <- function(path, add = ".", message = "new commit") { } +git_add_and_commit <- function(path, add = ".", message = "new commit") { + gert::git_add(add, repo = path) + user <- "author " + gert::git_commit(message, author = user, committer = user, repo = path) +} + + create_new_commit <- function(path, new_file = "new", add = ".", ...) { - writeLines("new file", file.path(path, new_file)) + writeLines(ids::random_id(), file.path(path, new_file)) git_add_and_commit(path, add, ...) } diff --git a/tests/testthat/test-git.R b/tests/testthat/test-git.R index b65bc2b..7641701 100644 --- a/tests/testthat/test-git.R +++ b/tests/testthat/test-git.R @@ -1,11 +1,9 @@ test_that("handle failure", { testthat::skip_on_cran() repo <- initialise_git_repo() - r <- git_run("unknown-command", repo = repo$path) - expect_false(r$success) expect_error( - git_run("unknown-command", repo = repo$path, check = TRUE), - r$output, fixed = TRUE) + git_run("unknown-command", repo = repo$path), + "'unknown-command' is not a git command", fixed = TRUE) }) @@ -28,29 +26,61 @@ test_that("can get default branch when remote origin is set", { "main") }) -test_that("can get files which have been modified", { - testthat::skip_on_cran() - repo <- test_prepare_orderly_remote_example("data") - copy_examples("parameters", repo$local) - git_add_and_commit(repo$local) - - log <- gert::git_log(repo = repo$local) - expect_equal(git_get_modified(log$commit[[2]], repo = repo$local), - character(0)) - expect_equal(git_get_modified(log$commit[[1]], repo = repo$local), - "src/parameters/parameters.R") - expect_equal(git_get_modified(log$commit[[1]], relative = "src/", - repo = repo$local), - "parameters/parameters.R") - expect_equal(git_get_modified(log$commit[[1]], base = log$commit[[2]], - repo = repo$local), - "src/parameters/parameters.R") - expect_equal(git_get_modified(log$commit[[2]], base = log$commit[[1]], - repo = repo$local), - character(0)) - expect_equal(git_get_modified(log$commit[[2]], base = log$commit[[2]], - repo = repo$local), - character(0)) +test_that("can get last commit for a path", { + repo <- initialise_git_repo() + c1 <- create_new_commit(repo$path, "hello.txt") + c2 <- create_new_commit(repo$path, "world.txt") + c3 <- create_new_commit(repo$path, "hello.txt") + c4 <- create_new_commit(repo$path, "world.txt") + + expect_equal(git_get_latest_commit("hello.txt", "HEAD", repo$path), c3) + expect_equal(git_get_latest_commit("world.txt", "HEAD", repo$path), c4) + + # If we start at c2, only it and its ancestors (ie. c1) are considered. + expect_equal(git_get_latest_commit("hello.txt", c2, repo$path), c1) + expect_equal(git_get_latest_commit("world.txt", c2, repo$path), c2) +}) + + +test_that("can diff trees", { + repo <- test_prepare_orderly_remote_example("data") + copy_examples("parameters", repo$local) + git_add_and_commit(repo$local) + + result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local) + expect_equal(nrow(result), 1) + + expect_equal(result$mode1, "000000") + expect_equal(result$mode2, "040000") + expect_match(result$hash1, "[0-9a-f]{20}") + expect_match(result$hash2, "[0-9a-f]{20}") + expect_equal(result$status, "A") + expect_equal(result$src, "parameters") + + create_new_commit(repo$local, "src/parameters/hello.txt") + + result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local) + expect_equal(nrow(result), 1) + + expect_equal(result$mode1, "040000") + expect_equal(result$mode2, "040000") + expect_match(result$hash1, "[0-9a-f]{20}") + expect_match(result$hash2, "[0-9a-f]{20}") + expect_equal(result$status, "M") + expect_equal(result$src, "parameters") + + fs::file_move(file.path(repo$local, "src", "parameters"), + file.path(repo$local, "src", "zparameters")) + git_add_and_commit(repo$local) + + # diff-tree never detects renames or copies. They are instead represented as + # a add and delete. + result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local) + expect_equal(nrow(result), 2) + expect_equal(result$status[[1]], "D") + expect_equal(result$src[[1]], "parameters") + expect_equal(result$status[[2]], "A") + expect_equal(result$src[[2]], "zparameters") }) diff --git a/tests/testthat/test-reports.R b/tests/testthat/test-reports.R index afe6486..e72bb81 100644 --- a/tests/testthat/test-reports.R +++ b/tests/testthat/test-reports.R @@ -1,3 +1,55 @@ +test_that("can get list of reports", { + root <- test_prepare_orderly_example(c("data", "parameters")) + + reports <- get_reports(root = root, ref = "HEAD") + expect_setequal(reports$name, c("data", "parameters")) +}) + + +test_that("report list includes last modification time", { + root <- test_prepare_orderly_example(c("data", "parameters")) + + writeLines("Hello", file.path(root, "src/data/hello.txt")) + writeLines("World", file.path(root, "src/parameters/world.txt")) + + t1 <- as.POSIXct("2000-01-01T00:00:00") + t2 <- as.POSIXct("2010-01-01T00:00:00") + + gert::git_add("src/data/hello.txt", repo = root) + gert::git_commit( + "hello", + author = gert::git_signature("author", "email", t1), + repo = root + ) + + gert::git_add("src/parameters/world.txt", repo = root) + gert::git_commit( + "world", + author = gert::git_signature("author", "email", t2), + repo = root + ) + + reports <- get_reports(root = root, ref = "HEAD") + expect_setequal(reports$name, c("data", "parameters")) + expect_equal(reports[reports$name == "data",]$updated_at, t1) + expect_equal(reports[reports$name == "parameters",]$updated_at, t2) +}) + + +test_that("report list includes modification status", { + root <- test_prepare_orderly_example(c("data", "parameters")) + + writeLines("Hello", file.path(root, "src/data/hello.txt")) + git_add_and_commit(root) + + reports <- get_reports(root = root, ref = "HEAD", base = "HEAD^") + expect_setequal(reports$name, c("data", "parameters")) + expect_true(reports[reports$name == "data",]$has_changes) + expect_true(!reports[reports$name == "parameters",]$has_changes) + +}) + + test_that("can get orderly script name", { root <- test_prepare_orderly_example(c("data", "parameters")) expect_equal(get_orderly_script_path("data", "HEAD", root), "src/data/data.R")