Skip to content

Commit

Permalink
Don't add a trailing newline to metadata files.
Browse files Browse the repository at this point in the history
By default, the base writeLines function adds a trailing newline to the
end of the file, even when the input data did not contain any. While
this is generally an acceptable behaviour, in the case of metadata
files, we also hash the data and embed it in location files. Because the
hashing happens over the in-memory data, it does not include the
trailing newline. The hash of the on-disk file therefore ends up being
different from the hash stored in the location file.

It hasn't caused any practical issues because we trim leading and
trailing whitespaces when reading the files back, getting the original
serialized metadata that had been hashed. Similarly, when pushing a
metadata file to a remote server, the client trims whitespaces from the
metadata file. The behaviour is nevertheless confusing and it would be
better to have consitent contents and hashes all around.

The old behaviour of trimming whitespaces on read is preserved, as we
must maintain compatibility with old files that do contain the newline.
  • Loading branch information
plietar committed Mar 25, 2024
1 parent 9222c8f commit 1cd3d07
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 3 deletions.
2 changes: 1 addition & 1 deletion R/location.R
Original file line number Diff line number Diff line change
Expand Up @@ -474,7 +474,7 @@ location_pull_metadata <- function(location_name, root, call) {
"Please let us know how this might have happened."),
i = hint_remove),
call)
writeLines(metadata[[i]], filename[[i]])
writeLines(metadata[[i]], filename[[i]], sep="")
}
}

Expand Down
2 changes: 1 addition & 1 deletion R/location_path.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ location_path_import_metadata <- function(str, hash, root) {
root$files$get(meta$files$hash, dst, TRUE)
}

writeLines(str, file.path(root$path, ".outpack", "metadata", id))
writeLines(str, file.path(root$path, ".outpack", "metadata", id), sep="")
time <- Sys.time()
mark_packet_known(id, local, hash, time, root)
}
Expand Down
2 changes: 1 addition & 1 deletion R/outpack_insert.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ outpack_insert_packet <- function(path, json, root = NULL) {
}

path_meta <- file.path(root$path, ".outpack", "metadata", id)
writeLines(json, path_meta)
writeLines(json, path_meta, sep="")

## TODO: once we get more flexible remotes, this will get moved into
## its own thing.
Expand Down
30 changes: 30 additions & 0 deletions tests/testthat/test-location-path.R
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,33 @@ test_that("Push single packet", {
files_used <- lapply(id, function(id) client$index$metadata(id)$files$hash)
expect_setequal(plan$files, unique(unlist(files_used, FALSE, FALSE)))
})


test_that("Can read metadata files with a trailing newline", {
# Past versions of orderly2 wrote metadata files with a trailing newline
# character, despite the fact that the newline was not included when hashing.
#
# This has been fixed by not writing the newline anymore, but for
# compatibility we need to ensure we can still read those metadata files and
# get a correct hash.

root <- create_temporary_root()
id <- create_random_packet(root)
path <- file.path(root$path, ".outpack", "metadata", id)

# Calling writeLines adds the trailing newline and mimicks the old orderly2
# behaviour.
old_size <- file.info(path)$size
writeLines(read_string(path), path)
expect_equal(file.info(path)$size, old_size + 1)

# Reading the metadata from a location at that path correctly strips the
# newline and hashes correctly.
loc <- orderly_location_path$new(root$path)
packets <- loc$list()
data <- loc$metadata(id)
expect_equal(nchar(data), old_size, ignore_attr = TRUE)

expected_hash <- packets[packets$packet == id]$hash
expect_no_error(hash_validate_data(data, expected_hash))
})
12 changes: 12 additions & 0 deletions tests/testthat/test-outpack-packet.R
Original file line number Diff line number Diff line change
Expand Up @@ -823,3 +823,15 @@ test_that("can overwrite dependencies", {
hash_file(file.path(path_src, "data.rds")),
hash_file(file.path(root$path, "archive", "data", id, "data.rds")))
})


test_that("metadata files match their hash", {
root <- create_temporary_root()
id <- create_random_packet(root)

location <- root$index$location(local)
expected_hash <- location[location$packet == id]$hash

path <- file.path(root$path, ".outpack", "metadata", id)
expect_no_error(hash_validate_file(expected_hash, path))
})
12 changes: 12 additions & 0 deletions tests/testthat/test-util.R
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,15 @@ test_that("can gracefully cope with rds save failure", {
"some error")
expect_equal(dir(tmp), character())
})


test_that("read_string strips newlines", {
path <- tempfile()
writeLines(c("", "12345678"), path)

# 8 characters, a leading newline and a trailing one
expect_equal(file.info(path)$size, 10)

result <- expect_silent(read_string(path))
expect_equal(result, "12345678")
})

0 comments on commit 1cd3d07

Please sign in to comment.