Skip to content

Commit

Permalink
Add an outpack.json at the root of the export.
Browse files Browse the repository at this point in the history
  • Loading branch information
plietar committed Mar 19, 2024
1 parent 5338daf commit 35036c1
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 47 deletions.
105 changes: 58 additions & 47 deletions R/export.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ orderly_export_zip <- function(path, packets, root = NULL, locate = TRUE) {
root <- root_open(root, locate = locate, require_orderly = FALSE,
call = environment())

metadata <- root$index$data()$metadata
packets <- find_all_dependencies(packets, metadata)
files <- find_all_files(packets, metadata)
index <- root$index$data()
packets <- find_all_dependencies(packets, index$metadata)
files <- find_all_files(packets, index$metadata)

dest <- withr::local_tempfile()
fs::dir_create(dest)
Expand All @@ -36,7 +36,16 @@ orderly_export_zip <- function(path, packets, root = NULL, locate = TRUE) {
store$put(find_file_by_hash(root, hash), hash)
}

zip::zip(fs::path_abs(path), root = dest, files = c("metadata", "files"))
packet_list <- index$location[
match(index$location$packet, packets), c("packet", "hash")]
contents <- list(packets=packet_list)

writeLines(to_json(contents, "orderly/export.json"),
file.path(dest, "outpack.json"))

zip::zip(fs::path_abs(path), root = dest,
files = c("outpack.json", "metadata", "files"))

invisible()
}

Expand All @@ -51,60 +60,38 @@ orderly_export_zip <- function(path, packets, root = NULL, locate = TRUE) {
orderly_import_zip <- function(path, root = NULL, locate = TRUE) {
root <- root_open(root, locate = locate, require_orderly = FALSE,
call = environment())
index <- root$index$data()

hash_algorithm <- root$config$core$hash_algorithm

src <- withr::local_tempfile()
zip::unzip(path, exdir = src)
store <- file_store$new(file.path(src, "files"))

ids <- dir(file.path(src, "metadata"))
contents <- jsonlite::read_json(file.path(src, "outpack.json"),
simplifyVector=TRUE)

# TODO: is using the root's hash algorithm correct? What if the origin had
# used a different hash, now there are two hashes for the same packet. We
# don't record the hash algorithm anywhere in the zip files, maybe we should.
metadata_hashes <- hash_metadata_files(
file.path(src, "metadata", ids), hash_algorithm)
import_zip_metadata(root, src, contents$packets, call = environment())
import_zip_packets(root, src, contents$packets)

known_packets <- ids %in% names(index$metadata)
missing_packets <- !(ids %in% index$unpacked)
invisible(contents$packets$packet)
}

import_check_hashes(src, ids[known_packets], metadata_hashes[known_packets],
root, call = environment())
import_zip_metadata <- function(root, src, packets, call) {
index <- root$index$data()
new_packets <- !(packets$packet %in% names(index$metadata))

fs::file_copy(
file.path(src, "metadata", ids[!known_packets]),
file.path(root$path, ".outpack", "metadata", ids[!known_packets]))
ids <- packets$packet[new_packets]
src_paths <- file.path(src, "metadata", ids)
dst_paths <- file.path(root$path, ".outpack", "metadata", ids)
expected_hash <- packets$hash[new_packets]

if (root$config$core$use_file_store) {
# The index needs reloading to take into account the new metadata we just
# pulled.
index <- root$index$data()
files <- find_all_files(ids, index$metadata)
files <- files[!root$files$exists(files)]
for (hash in files) {
file_path <- store$get(hash, root$files$tmp(), overwrite = FALSE)
root$files$put(file_path, hash, move = TRUE)
}
for (i in seq_along(src_paths)) {
metadata <- read_string(src_paths[[i]])
hash_validate_data(metadata, expected_hash[[i]], sprintf("metadata for '%s'", id), call = call)
writeLines(metadata, dst_paths[[i]])
}

for (i in which(missing_packets)) {
if (!is.null(root$config$core$path_archive)) {
location_pull_files_archive(ids[[i]], store, root)
}
mark_packet_known(ids[[i]], local, metadata_hashes[[i]], Sys.time(), root)
}

invisible(ids)
}

import_check_hashes <- function(src, ids, hashes, root, call) {
index <- root$index$data()
hash_algorithm <- root$config$core$hash_algorithm

hash_here <- index$location$hash[match(ids, index$location$packet)]
err <- hashes != hash_here
seen_before <- intersect(packets$packet, index$location$packet)
hash_there <- packets$hash[match(seen_before, packets$packet)]
hash_here <- index$location$hash[match(seen_before, index$location$packet)]
err <- hash_there != hash_here
if (any(err)) {
cli::cli_abort(
c("Imported file has conflicting metadata",
Expand All @@ -117,5 +104,29 @@ import_check_hashes <- function(src, ids, hashes, root, call) {
i = "We would be interested in this case, please let us know"),
call = call)
}

invisible()
}

import_zip_packets <- function(root, src, packets) {
store <- file_store$new(file.path(src, "files"))
index <- root$index$data()
missing_packets <- packets[!(packets$packet %in% index$unpacked),]

if (root$config$core$use_file_store) {
files <- find_all_files(missing_packets$packet, index$metadata)
files <- files[!root$files$exists(files)]
for (hash in files) {
file_path <- store$get(hash, root$files$tmp(), overwrite = FALSE)
root$files$put(file_path, hash, move = TRUE)
}
}

for (i in seq_along(missing_packets$packet)) {
if (!is.null(root$config$core$path_archive)) {
location_pull_files_archive(missing_packets$packet[[i]], store, root)
}
mark_packet_known(missing_packets$packet[[i]], local,
missing_packets$hash[[i]], Sys.time(), root)
}
}
21 changes: 21 additions & 0 deletions inst/schema/orderly/export.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "index of a exported zip file",
"version": "0.0.1",

"type": "object",
"properties": {
"packets": {
"type": "array",
"items": {
"packet": {
"$ref": "../outpack/packet-id.json"
},

"hash": {
"$ref": "../outpack/hash.json"
}
}
}
}
}

0 comments on commit 35036c1

Please sign in to comment.