Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor directory expansion. #179

Merged
merged 1 commit into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: orderly2
Title: Orderly Next Generation
Version: 1.99.36
Version: 1.99.37
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
email = "[email protected]"),
person("Robert", "Ashton", role = "aut"),
Expand Down
26 changes: 6 additions & 20 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -391,28 +391,14 @@ copy_shared_resource <- function(path_root, path_dest, config, files, call) {
shared_dir))
}

here <- files$here
there <- files$there

assert_file_exists_relative(there, workdir = shared_path,
assert_file_exists_relative(files$there, workdir = shared_path,
name = "Shared resource file", call = call)
src <- file.path(shared_path, there)
dst <- file.path(path_dest, here)

is_dir <- is_directory(file.path(shared_path, there))
fs::dir_create(file.path(path_dest, dirname(here)))
if (any(is_dir)) {
fs::dir_copy(src[is_dir], dst[is_dir])
## Update the names that will be used in the metadata:
files <- lapply(src[is_dir], dir)
here <- replace_ragged(here, is_dir, Map(file.path, here[is_dir], files))
there <- replace_ragged(there, is_dir, Map(file.path, there[is_dir], files))
}
if (any(!is_dir)) {
copy_files(src[!is_dir], dst[!is_dir], overwrite = TRUE)
}

data_frame(here = here, there = there)
files_expanded <- expand_dirs(files, shared_path)
copy_files(fs::path(shared_path, files_expanded$there),
fs::path(path_dest, files_expanded$here),
overwrite = TRUE)
files_expanded
}


Expand Down
28 changes: 10 additions & 18 deletions R/outpack_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ orderly_copy_files <- function(..., files, dest, overwrite = TRUE,
}
}

plan <- plan_copy_files(root, id, files$there, files$here, environment())
plan <- plan_copy_files(root, id, files, environment())
name <- outpack_metadata_core(id, root)$name

tryCatch(
Expand Down Expand Up @@ -149,25 +149,17 @@ orderly_copy_files <- function(..., files, dest, overwrite = TRUE,
}


plan_copy_files <- function(root, id, there, here, call = NULL) {
assert_relative_path(there, name = "File", workdir = id, call = call)
validate_packet_has_file(root, id, there, call)
is_dir <- grepl("/$", there)
if (any(is_dir)) {
meta <- outpack_metadata_core(id, root)
files <- meta$files$path
expanded <- lapply(which(is_dir), function(i) {
p <- there[[i]]
j <- string_starts_with(p, files)
nms <- file.path(sub("/+$", "", here[[i]]),
string_drop_prefix(p, files[j]))
set_names(files[j], nms)
})
plan_copy_files <- function(root, id, files, call = NULL) {
assert_relative_path(files$there, name = "File", workdir = id, call = call)
validate_packet_has_file(root, id, files$there, call)

there <- replace_ragged(there, is_dir, lapply(expanded, unname))
here <- replace_ragged(here, is_dir, lapply(expanded, names))
meta <- outpack_metadata_core(id, root)
is_dir <- function(p) grepl("/$", p)
list_files <- function(p) {
j <- string_starts_with(p, meta$files$path)
string_drop_prefix(p, meta$files$path[j])
}
data_frame(there, here)
expand_dirs_virtual(files, is_dir, list_files)
}


Expand Down
57 changes: 45 additions & 12 deletions R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,20 +134,53 @@ vcapply <- function(X, FUN, ...) { # nolint
}


## TODO: also replace copy_shared with this
expand_dirs <- function(paths, workdir) {
if (length(paths) == 0) {
return(character())
#' Expand directories into their content lists.
#'
#' This function does not access the filesystem directly and instead calls the
#' given `is_dir` and `list_files` callback. This allows using the function with
#' files that do not exist on disk yet, such as those listed in a packet's
#' metadata.
#'
#' @param files either a character vector or a dataframe with columns `there`
#' and `here`.
#' @param is_dir a function from a character vector to a logical vector,
#' indicating whether each path is a directory needing expansion or not.
#' @param list_files a function from a character scalar to a character vector,
#' enumerating the contents of the directory. The return values must *not*
#' include the directory path as a prefix.
#' @return a modified version of `files`, where directories have been replaced
#' by their contents. If `files` was a data_frame, both the `there` and `here`
#' columns are modified.
#' @noRd
expand_dirs_virtual <- function(files, is_dir, list_files) {
if (is.character(files)) {
dirs <- is_dir(files)
expanded <- lapply(files[dirs], list_files)
replace_ragged(files, dirs, Map(fs::path, files[dirs], expanded))
} else {
dirs <- is_dir(files$there)
expanded <- lapply(files$there[dirs], list_files)

there <- replace_ragged(files$there, dirs,
Map(fs::path, files$there[dirs], expanded))
here <- replace_ragged(files$here, dirs,
Map(fs::path, files$here[dirs], expanded))

data_frame(here, there)
}
withr::local_dir(workdir)
i <- is_directory(paths)
if (any(i)) {
contents <- lapply(paths[i], function(p) {
as.character(fs::dir_ls(p, all = TRUE, type = "file", recurse = TRUE))
})
paths <- replace_ragged(paths, i, contents)
}


expand_dirs <- function(files, workdir) {
assert_scalar_character(workdir)

is_dir <- function(p) is_directory(fs::path(workdir, p))
list_files <- function(p) {
full_path <- fs::path(workdir, p)
files <- fs::dir_ls(full_path, all = TRUE, type = "file", recurse = TRUE)
fs::path_rel(files, full_path)
}
paths
expand_dirs_virtual(files, is_dir, list_files)
}


Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-outpack-packet.R
Original file line number Diff line number Diff line change
Expand Up @@ -583,15 +583,15 @@ test_that("can depend based on a simple query", {
p$depends[[1]],
list(packet = id$b[[3]],
query = "latest()",
files = data.frame(there = "data.rds", here = "1.rds")))
files = data.frame(here = "1.rds", there = "data.rds")))

query <- orderly_query("latest(parameter:i < 3)", name = "a")
outpack_packet_use_dependency(p, query, c("2.rds" = "data.rds"))
expect_mapequal(
p$depends[[2]],
list(packet = id$a[[2]],
query = 'latest(parameter:i < 3 && name == "a")',
files = data.frame(there = "data.rds", here = "2.rds")))
files = data.frame(here = "2.rds", there = "data.rds")))
})


Expand Down Expand Up @@ -790,8 +790,8 @@ test_that("can pull in directories", {
p2 <- outpack_packet_start_quietly(path_src2, "b", root = root)
outpack_packet_use_dependency(p2, 'latest(name == "a")', c(d = "data/"))
expect_equal(p2$depends[[1]]$files,
data_frame(there = file.path("data", letters[1:6]),
here = file.path("d", letters[1:6])))
data_frame(here = file.path("d", letters[1:6]),
there = file.path("data", letters[1:6])))
})


Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-run.R
Original file line number Diff line number Diff line change
Expand Up @@ -1347,7 +1347,7 @@ test_that("can read about dependencies", {
r <- readRDS(file.path(path, "archive", "depends", id2, "depends.rds"))
expect_equal(r$id, id1)
expect_equal(r$name, "data")
expect_equal(r$files, data_frame(there = "data.rds", here = "input.rds"))
expect_equal(r$files, data_frame(here = "input.rds", there = "data.rds"))

res <- withr::with_dir(
path_src,
Expand Down
98 changes: 98 additions & 0 deletions tests/testthat/test-util.R
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,104 @@ test_that("read_string strips newlines", {
expect_equal(result, "12345678")
})

describe("expand_dirs_virtual", {
files <- list(
"d1" = c("f2", "f3"),
"d2" = c("f4", "d3/f5"),
"d2/d3" = c("f5"))
is_dir <- function(p) p %in% c("d1", "d2", "d2/d3")
list_files <- function(p) files[[p]]

check <- function(object, expected) {
expect_equal(expand_dirs_virtual(object, is_dir, list_files), expected)
}

it("accepts a character vector", {
check("f1", "f1")
check("d1", c("d1/f2", "d1/f3"))
check(c("f1", "d1"), c("f1", "d1/f2", "d1/f3"))
check("d2", c("d2/f4", "d2/d3/f5"))
})

it("accepts a dataframe", {
check(
data_frame(here = "g1", there = "f1"),
data_frame(here = "g1", there = "f1"))

check(
data_frame(here = "dest", there = "d1"),
data_frame(here = c("dest/f2", "dest/f3"),
there = c("d1/f2", "d1/f3")))

check(
data_frame(here = c("g1", "dest"), there = c("f1", "d1")),
data_frame(here = c("g1", "dest/f2", "dest/f3"),
there = c("f1", "d1/f2", "d1/f3")))

check(
data_frame(here = "dest", there = "d2"),
data_frame(here = c("dest/f4", "dest/d3/f5"),
there = c("d2/f4", "d2/d3/f5")))

check(
data_frame(here = c("foo", "bar"), there = c("d2", "d2")),
data_frame(here = c("foo/f4", "foo/d3/f5", "bar/f4", "bar/d3/f5"),
there = c("d2/f4", "d2/d3/f5", "d2/f4", "d2/d3/f5")))
})
})

describe("expand_dirs", {
p <- withr::local_tempdir()
files <- c("f1", "d1/f2", "d1/f3", "d2/f4", "d2/d3/f5")
fs::dir_create(fs::path(p, unique(dirname(files))))
fs::file_create(fs::path(p, files))

it("accepts a character vector", {
check <- function(object, expected) {
expect_setequal(expand_dirs(object, p), expected)
}

check("f1", "f1")
check("d1", c("d1/f2", "d1/f3"))
check(c("f1", "d1"), c("f1", "d1/f2", "d1/f3"))
check("d2", c("d2/f4", "d2/d3/f5"))
})

it("accepts a dataframe", {
check <- function(object, expected) {
result <- expand_dirs(object, p)
# This compares the dataframes ignoring the order, which is not
# deterministic
expect_setequal(unname(split(result, seq_len(nrow(result)))),
unname(split(expected, seq_len(nrow(expected)))))
}

check(
data_frame(here = "g1", there = "f1"),
data_frame(here = "g1", there = "f1"))

check(
data_frame(here = "dest", there = "d1"),
data_frame(here = c("dest/f2", "dest/f3"), there = c("d1/f2", "d1/f3")))

check(
data_frame(here = c("g1", "dest"), there = c("f1", "d1")),
data_frame(here = c("g1", "dest/f2", "dest/f3"),
there = c("f1", "d1/f2", "d1/f3")))

check(
data_frame(here = "dest", there = "d2"),
data_frame(here = c("dest/f4", "dest/d3/f5"),
there = c("d2/f4", "d2/d3/f5")))

check(
data_frame(here = c("foo", "bar"), there = c("d2", "d2")),
data_frame(here = c("foo/f4", "foo/d3/f5", "bar/f4", "bar/d3/f5"),
there = c("d2/f4", "d2/d3/f5", "d2/f4", "d2/d3/f5")))
})
})


test_that("fill_missing_names works", {
expect_equal(fill_missing_names(NULL), NULL)
expect_equal(fill_missing_names(c("a", "b")), c(a = "a", b = "b"))
Expand Down
Loading