From 18e054678ed8a71622211ff05ea90abd49204fe6 Mon Sep 17 00:00:00 2001 From: Paul Lietar Date: Fri, 10 Jan 2025 15:40:41 +0000 Subject: [PATCH 1/3] Fix the reported modification time of reports. The `/report/list` endpoint uses the `git_ls` function to enumerate reports, and supposedly includes the modification time is the response. Unfortunately the modification time returned by `git_ls` is pretty useless, and actually refers to the modification times in the local worktree, which has nothing to do with commit times. The proper way to do this is to identify the last commit to have touched the file, and then get the time of that commit. Unfortunately gert does not have an easy way to list commits on just one file (and in fact neither does libgit2), so we fallback to using the `git rev-list` command. Additionally I have re-written the code that was used to compare reports against the default branch to use Git plumbing commands instead of the high-level porcelain ones. --- DESCRIPTION | 1 + R/api.R | 24 ++------ R/git.R | 54 +++++++++++------ R/reports.R | 34 ++++++++++- man/git_diff_tree.Rd | 22 +++++++ man/git_get_latest_commit.Rd | 20 ++++++ tests/testthat/helper-orderly-runner.R | 13 +++- tests/testthat/test-git.R | 84 +++++++++++++++++--------- tests/testthat/test-reports.R | 55 +++++++++++++++++ 9 files changed, 239 insertions(+), 68 deletions(-) create mode 100644 man/git_diff_tree.Rd create mode 100644 man/git_get_latest_commit.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 1ece516..a6bef21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,6 +26,7 @@ Imports: R6, redux, rrq (>= 0.7.15), + stringr, withr Suggests: fs, diff --git a/R/api.R b/R/api.R index 4b89b24..215cb06 100644 --- a/R/api.R +++ b/R/api.R @@ -93,25 +93,13 @@ repository_branches <- function(repositories_base_path, url) { ##' query ref :: string ##' state root :: root report_list <- function(root, ref) { - contents <- gert::git_ls(root, ref = ref) - re <- "^src/([^/]+)/(\\1|orderly)\\.R$" - nms <- sub(re, "\\1", - grep(re, contents$path, value = TRUE, perl = TRUE), - perl = TRUE - ) - last_changed <- function(nm) { - max(contents$modified[startsWith(contents$path, sprintf("src/%s", nm))]) - } - updatedTime <- vnapply(nms, last_changed, USE.NAMES = FALSE) - modified_sources <- git_get_modified(ref, relative_dir = "src/", repo = root) - modified_reports <- unique(first_dirname(modified_sources)) - hasModifications <- vlapply(nms, function(report_name) { - report_name %in% modified_reports - }, USE.NAMES = FALSE) + base <- git_get_default_branch(root) + reports <- get_reports(root = root, ref = ref, base = base) + data.frame( - name = nms, - updatedTime = updatedTime, - hasModifications = hasModifications + name = reports$name, + updatedTime = as.numeric(reports$updated_at), + hasModifications = reports$has_changes ) } diff --git a/R/git.R b/R/git.R index 750e656..154dc33 100644 --- a/R/git.R +++ b/R/git.R @@ -43,13 +43,13 @@ git_remote_list_branches <- function(repo) { branches } -git_run <- function(args, repo = NULL, check = FALSE) { +git_run <- function(args, repo = NULL) { git <- sys_which("git") if (!is.null(repo)) { args <- c("-C", repo, args) } res <- system3(git, args) - if (check && !res$success) { + if (!res$success) { stop(sprintf("Error code %d running command:\n%s", res$code, paste0(" > ", res$output, collapse = "\n"))) } @@ -76,21 +76,37 @@ git_remote_default_branch_name <- function(repo) { } -git_get_modified <- function(ref, base = NULL, - relative_dir = NULL, repo = NULL) { - if (is.null(base)) { - base <- git_remote_default_branch_ref(repo) - } - if (is.null(relative_dir)) { - relative <- "" - additional_args <- "" - } else { - relative <- sprintf("--relative=%s", relative_dir) - additional_args <- sprintf("-- %s", relative_dir) - } - git_run( - c("diff", "--name-only", relative, - sprintf("%s...%s", base, gert::git_commit_id(ref, repo = repo)), - additional_args), - repo = repo, check = TRUE)$output +#' Get the last commit to have modified the given path. +#' +#' If the path is a directory, any modification to files contained within +#' it are considered. +#' +#' @param path path to the file of directory to search for +#' @param ref the Git commit from which to start the search. Only ancestors of +#' that commit will be considered. +#' @param repo the path to the Git repo to use. +git_get_latest_commit <- function(path, ref, repo = NULL) { + # libgit2 (and thus gert) doesn't really have an interface for this. + # See https://github.com/libgit2/libgit2/issues/495 + git_run(c("rev-list", "--max-count=1", ref, "--", path), + repo = repo)$output +} + + +#' Get the difference between two tree-ish +#' +#' @param left the tree-ish to use as the base for the comparison. +#' @param right the tree-ish to compare to the base. +#' @param repo the path to the Git repo to use. +#' @return a dataframe for each differing entry in the trees, with columns +#' `mode1`, `mode2`, `hash1`, `hash2`, `status`, `score`, `src` and `dst`. +git_diff_tree <- function(left, right, repo = NULL) { + output <- git_run(c("diff-tree", left, right), repo = repo)$output + + # See https://git-scm.com/docs/git-diff-tree#_raw_output_format for a + # description of the format. + re <- paste0( + "^:(?\\d+) (?\\d+) (?[0-9a-f]+) (?[0-9a-f]+)", + " (?[A-Z])(?\\d+)?\\t(?[^\\t]*)(?:\\t(?[^\\t]*))?$") + as.data.frame(stringr::str_match(output, re)[, -1, drop = FALSE]) } diff --git a/R/reports.R b/R/reports.R index 00bf0a3..c916886 100644 --- a/R/reports.R +++ b/R/reports.R @@ -1,8 +1,40 @@ +extract_report_names <- function(paths) { + re <- "^src/([^/]+)/(\\1|orderly)\\.R$" + sub(re, "\\1", grep(re, paths, value = TRUE, perl = TRUE)) +} + +get_reports <- function(ref, root, base = NULL) { + contents <- gert::git_ls(repo = root, ref = ref) + names <- extract_report_names(contents$path) + + paths <- paste0("src/", names) + times <- as.POSIXct(vnapply(paths, function(p) { + commit <- git_get_latest_commit(p, ref, repo = root) + gert::git_commit_info(commit, repo = root)$time + })) + + result <- data.frame( + name = names, + updated_at = times, + row.names = NULL + ) + + if (!is.null(base)) { + diff <- git_diff_tree(paste0(base, ":src"), + paste0(ref, ":src"), + repo = root) + result$has_changes <- names %in% diff$src + } + + result +} + + get_report_parameters <- function(name, ref, root) { path <- get_orderly_script_path(name, ref, root) sha <- gert::git_commit_id(ref, repo = root) contents <- git_run( - c("show", sprintf("%s:%s", sha, path)), repo = root, check = TRUE + c("show", sprintf("%s:%s", sha, path)), repo = root )$output exprs <- parse(text = contents) orderly2::orderly_parse_expr(exprs, filename = basename(path))$parameters diff --git a/man/git_diff_tree.Rd b/man/git_diff_tree.Rd new file mode 100644 index 0000000..45e3743 --- /dev/null +++ b/man/git_diff_tree.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/git.R +\name{git_diff_tree} +\alias{git_diff_tree} +\title{Get the difference between two tree-ish} +\usage{ +git_diff_tree(left, right, repo = NULL) +} +\arguments{ +\item{left}{the tree-ish to use as the base for the comparison.} + +\item{right}{the tree-ish to compare to the base.} + +\item{repo}{the path to the Git repo to use.} +} +\value{ +a dataframe for each differing entry in the trees, with columns +\code{mode1}, \code{mode2}, \code{hash1}, \code{hash2}, \code{status}, \code{score}, \code{src} and \code{dst}. +} +\description{ +Get the difference between two tree-ish +} diff --git a/man/git_get_latest_commit.Rd b/man/git_get_latest_commit.Rd new file mode 100644 index 0000000..3bd5c4c --- /dev/null +++ b/man/git_get_latest_commit.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/git.R +\name{git_get_latest_commit} +\alias{git_get_latest_commit} +\title{Get the last commit to have modified the given path.} +\usage{ +git_get_latest_commit(path, ref, repo = NULL) +} +\arguments{ +\item{path}{path to the file of directory to search for} + +\item{ref}{the Git commit from which to start the search. Only ancestors of +that commit will be considered.} + +\item{repo}{the path to the Git repo to use.} +} +\description{ +If the path is a directory, any modification to files contained within +it are considered. +} diff --git a/tests/testthat/helper-orderly-runner.R b/tests/testthat/helper-orderly-runner.R index c42ece6..8435791 100644 --- a/tests/testthat/helper-orderly-runner.R +++ b/tests/testthat/helper-orderly-runner.R @@ -165,9 +165,16 @@ git_add_and_commit <- function(path, add = ".", message = "new commit") { } -create_new_commit <- function(path, new_file = "new", add = ".", ...) { - writeLines("new file", file.path(path, new_file)) - git_add_and_commit(path, add, ...) +git_add_and_commit <- function(path, add = ".") { + gert::git_add(add, repo = path) + user <- "author " + gert::git_commit("new commit", author = user, committer = user, repo = path) +} + + +create_new_commit <- function(path, new_file = "new", add = ".") { + writeLines(ids::random_id(), file.path(path, new_file)) + git_add_and_commit(path, add) } diff --git a/tests/testthat/test-git.R b/tests/testthat/test-git.R index b65bc2b..7641701 100644 --- a/tests/testthat/test-git.R +++ b/tests/testthat/test-git.R @@ -1,11 +1,9 @@ test_that("handle failure", { testthat::skip_on_cran() repo <- initialise_git_repo() - r <- git_run("unknown-command", repo = repo$path) - expect_false(r$success) expect_error( - git_run("unknown-command", repo = repo$path, check = TRUE), - r$output, fixed = TRUE) + git_run("unknown-command", repo = repo$path), + "'unknown-command' is not a git command", fixed = TRUE) }) @@ -28,29 +26,61 @@ test_that("can get default branch when remote origin is set", { "main") }) -test_that("can get files which have been modified", { - testthat::skip_on_cran() - repo <- test_prepare_orderly_remote_example("data") - copy_examples("parameters", repo$local) - git_add_and_commit(repo$local) - - log <- gert::git_log(repo = repo$local) - expect_equal(git_get_modified(log$commit[[2]], repo = repo$local), - character(0)) - expect_equal(git_get_modified(log$commit[[1]], repo = repo$local), - "src/parameters/parameters.R") - expect_equal(git_get_modified(log$commit[[1]], relative = "src/", - repo = repo$local), - "parameters/parameters.R") - expect_equal(git_get_modified(log$commit[[1]], base = log$commit[[2]], - repo = repo$local), - "src/parameters/parameters.R") - expect_equal(git_get_modified(log$commit[[2]], base = log$commit[[1]], - repo = repo$local), - character(0)) - expect_equal(git_get_modified(log$commit[[2]], base = log$commit[[2]], - repo = repo$local), - character(0)) +test_that("can get last commit for a path", { + repo <- initialise_git_repo() + c1 <- create_new_commit(repo$path, "hello.txt") + c2 <- create_new_commit(repo$path, "world.txt") + c3 <- create_new_commit(repo$path, "hello.txt") + c4 <- create_new_commit(repo$path, "world.txt") + + expect_equal(git_get_latest_commit("hello.txt", "HEAD", repo$path), c3) + expect_equal(git_get_latest_commit("world.txt", "HEAD", repo$path), c4) + + # If we start at c2, only it and its ancestors (ie. c1) are considered. + expect_equal(git_get_latest_commit("hello.txt", c2, repo$path), c1) + expect_equal(git_get_latest_commit("world.txt", c2, repo$path), c2) +}) + + +test_that("can diff trees", { + repo <- test_prepare_orderly_remote_example("data") + copy_examples("parameters", repo$local) + git_add_and_commit(repo$local) + + result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local) + expect_equal(nrow(result), 1) + + expect_equal(result$mode1, "000000") + expect_equal(result$mode2, "040000") + expect_match(result$hash1, "[0-9a-f]{20}") + expect_match(result$hash2, "[0-9a-f]{20}") + expect_equal(result$status, "A") + expect_equal(result$src, "parameters") + + create_new_commit(repo$local, "src/parameters/hello.txt") + + result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local) + expect_equal(nrow(result), 1) + + expect_equal(result$mode1, "040000") + expect_equal(result$mode2, "040000") + expect_match(result$hash1, "[0-9a-f]{20}") + expect_match(result$hash2, "[0-9a-f]{20}") + expect_equal(result$status, "M") + expect_equal(result$src, "parameters") + + fs::file_move(file.path(repo$local, "src", "parameters"), + file.path(repo$local, "src", "zparameters")) + git_add_and_commit(repo$local) + + # diff-tree never detects renames or copies. They are instead represented as + # a add and delete. + result <- git_diff_tree("HEAD^:src", "HEAD:src", repo = repo$local) + expect_equal(nrow(result), 2) + expect_equal(result$status[[1]], "D") + expect_equal(result$src[[1]], "parameters") + expect_equal(result$status[[2]], "A") + expect_equal(result$src[[2]], "zparameters") }) diff --git a/tests/testthat/test-reports.R b/tests/testthat/test-reports.R index afe6486..69e6a02 100644 --- a/tests/testthat/test-reports.R +++ b/tests/testthat/test-reports.R @@ -1,3 +1,58 @@ +test_that("can get list of reports", { + root <- test_prepare_orderly_example(c("data", "parameters")) + helper_add_git(root) + + reports <- get_reports(root = root, ref = "HEAD") + expect_setequal(reports$name, c("data", "parameters")) +}) + + +test_that("report list includes last modification time", { + root <- test_prepare_orderly_example(c("data", "parameters")) + helper_add_git(root) + + writeLines("Hello", file.path(root, "src/data/hello.txt")) + writeLines("World", file.path(root, "src/parameters/world.txt")) + + t1 <- as.POSIXct("2000-01-01T00:00:00") + t2 <- as.POSIXct("2010-01-01T00:00:00") + + gert::git_add("src/data/hello.txt", repo = root) + gert::git_commit( + "hello", + author = gert::git_signature("author", "email", t1), + repo = root + ) + + gert::git_add("src/parameters/world.txt", repo = root) + gert::git_commit( + "world", + author = gert::git_signature("author", "email", t2), + repo = root + ) + + reports <- get_reports(root = root, ref = "HEAD") + expect_setequal(reports$name, c("data", "parameters")) + expect_equal(reports[reports$name == "data",]$updated_at, t1) + expect_equal(reports[reports$name == "parameters",]$updated_at, t2) +}) + + +test_that("report list includes modification status", { + root <- test_prepare_orderly_example(c("data", "parameters")) + helper_add_git(root) + + writeLines("Hello", file.path(root, "src/data/hello.txt")) + git_add_and_commit(root) + + reports <- get_reports(root = root, ref = "HEAD", base = "HEAD^") + expect_setequal(reports$name, c("data", "parameters")) + expect_true(reports[reports$name == "data",]$has_changes) + expect_true(!reports[reports$name == "parameters",]$has_changes) + +}) + + test_that("can get orderly script name", { root <- test_prepare_orderly_example(c("data", "parameters")) expect_equal(get_orderly_script_path("data", "HEAD", root), "src/data/data.R") From 6b3e83a2fb4f8101fc3d50902549201016507689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20Li=C3=A9tar?= Date: Tue, 14 Jan 2025 17:37:59 +0000 Subject: [PATCH 2/3] Update base R image. The `as.POSIXct.numeric` function was relaxed a bit in R 4.3.0 and made its "origin" parameter optional, and we need that feature. For some reason the dependency installation was failing. Using `pak` seems to fix it. pak is generally a bit faster and more reliable anyway. --- docker/Dockerfile | 24 ++++++------------------ docker/Rprofile.site | 4 ---- 2 files changed, 6 insertions(+), 22 deletions(-) delete mode 100644 docker/Rprofile.site diff --git a/docker/Dockerfile b/docker/Dockerfile index ff696d0..e39a6af 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,27 +1,15 @@ -FROM rocker/r-ver:4.1 +FROM rocker/r-ver:4.4 -RUN apt-get update && apt-get install -y --no-install-recommends \ - git \ - libcurl4-openssl-dev \ - libhiredis-dev \ - libssl-dev \ - zlib1g-dev \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* +RUN install2.r pak -# Without this, we are unable to pick up more recent packages -COPY docker/Rprofile.site /usr/local/lib/R/etc/Rprofile.site - -COPY DESCRIPTION /tmp/DESCRIPTION - -RUN install2.r --error remotes && \ - Rscript -e 'remotes::install_deps("/tmp")' +COPY DESCRIPTION /src/DESCRIPTION +RUN Rscript -e "pak::local_install_deps('/src')" COPY . /src -RUN R CMD INSTALL --install-tests /src && rm -rf /src +RUN Rscript -e "pak::local_install('/src')" COPY docker/bin /usr/local/bin/ - + RUN git config --global --add safe.directory "*" RUN echo ".packit" > /.gitignore RUN git config --global core.excludesFile "/.gitignore" diff --git a/docker/Rprofile.site b/docker/Rprofile.site deleted file mode 100644 index 22b4c2f..0000000 --- a/docker/Rprofile.site +++ /dev/null @@ -1,4 +0,0 @@ -options(repos = c(CRAN = 'https://packagemanager.rstudio.com/all/__linux__/focal/latest'), download.file.method = 'libcurl') -options(HTTPUserAgent = sprintf("R/%s R (%s)", getRversion(), - paste(getRversion(), R.version$platform, - R.version$arch, R.version$os))) From 2d078a4bd903143670355a0d2fc75e3b0d44191f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Paul=20Li=C3=A9tar?= Date: Tue, 14 Jan 2025 18:11:41 +0000 Subject: [PATCH 3/3] fixes --- R/api.R | 2 +- tests/testthat/helper-orderly-runner.R | 8 ++++---- tests/testthat/test-reports.R | 3 --- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/R/api.R b/R/api.R index 215cb06..dcfdea8 100644 --- a/R/api.R +++ b/R/api.R @@ -93,7 +93,7 @@ repository_branches <- function(repositories_base_path, url) { ##' query ref :: string ##' state root :: root report_list <- function(root, ref) { - base <- git_get_default_branch(root) + base <- git_remote_default_branch_ref(root) reports <- get_reports(root = root, ref = ref, base = base) data.frame( diff --git a/tests/testthat/helper-orderly-runner.R b/tests/testthat/helper-orderly-runner.R index 8435791..eca4c4c 100644 --- a/tests/testthat/helper-orderly-runner.R +++ b/tests/testthat/helper-orderly-runner.R @@ -165,16 +165,16 @@ git_add_and_commit <- function(path, add = ".", message = "new commit") { } -git_add_and_commit <- function(path, add = ".") { +git_add_and_commit <- function(path, add = ".", message = "new commit") { gert::git_add(add, repo = path) user <- "author " - gert::git_commit("new commit", author = user, committer = user, repo = path) + gert::git_commit(message, author = user, committer = user, repo = path) } -create_new_commit <- function(path, new_file = "new", add = ".") { +create_new_commit <- function(path, new_file = "new", add = ".", ...) { writeLines(ids::random_id(), file.path(path, new_file)) - git_add_and_commit(path, add) + git_add_and_commit(path, add, ...) } diff --git a/tests/testthat/test-reports.R b/tests/testthat/test-reports.R index 69e6a02..e72bb81 100644 --- a/tests/testthat/test-reports.R +++ b/tests/testthat/test-reports.R @@ -1,6 +1,5 @@ test_that("can get list of reports", { root <- test_prepare_orderly_example(c("data", "parameters")) - helper_add_git(root) reports <- get_reports(root = root, ref = "HEAD") expect_setequal(reports$name, c("data", "parameters")) @@ -9,7 +8,6 @@ test_that("can get list of reports", { test_that("report list includes last modification time", { root <- test_prepare_orderly_example(c("data", "parameters")) - helper_add_git(root) writeLines("Hello", file.path(root, "src/data/hello.txt")) writeLines("World", file.path(root, "src/parameters/world.txt")) @@ -40,7 +38,6 @@ test_that("report list includes last modification time", { test_that("report list includes modification status", { root <- test_prepare_orderly_example(c("data", "parameters")) - helper_add_git(root) writeLines("Hello", file.path(root, "src/data/hello.txt")) git_add_and_commit(root)