Skip to content

Commit

Permalink
Remove unnecessary rownames from the mcols when staging.
Browse files Browse the repository at this point in the history
These will get overridden by the names on the Vector object itself
anyway, so there's no need to waste a column storing them again.
  • Loading branch information
LTLA committed Nov 13, 2023
1 parent ab48df1 commit 2aac638
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 10 deletions.
23 changes: 16 additions & 7 deletions R/processMetadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
#' @details
#' If \code{mcols(x)} has no columns, nothing is saved by \code{.processMcols}.
#' Similarly, if \code{metadata(x)} is an empty list, nothing is saved by \code{.processMetadata}.
#'
#' If \code{mcols(x)} has non-\code{NULL} row names, these are removed prior to staging.
#' These names are redundant with the names attached to \code{x} itself.
#'
#' @seealso
#' \code{.restoreMetadata}, which does the loading.
Expand All @@ -43,14 +46,20 @@ processMetadata <- function(x, dir, path, meta.name) {
#' @rdname processMetadata
#' @importFrom S4Vectors mcols metadata
processMcols <- function(x, dir, path, mcols.name) {
if (!is.null(mcols.name) && !is.null(mcols(x)) && ncol(mcols(x))) {
tryCatch({
meta <- altStageObject(mcols(x), dir, paste0(path, "/", mcols.name), child=TRUE)
list(resource=writeMetadata(meta, dir=dir))
}, error=function(e) stop("failed to stage 'mcols(<", class(x)[1], ">)'\n - ", e$message))
} else {
NULL
output <- NULL

if (!is.null(mcols.name)) {
mc <- mcols(x, use.names=FALSE)
if (!is.null(mc) && ncol(mc)) {
rownames(mc) <- NULL # stripping out unnecessary row names.
output <- tryCatch({
meta <- altStageObject(mc, dir, paste0(path, "/", mcols.name), child=TRUE)
list(resource=writeMetadata(meta, dir=dir))
}, error=function(e) stop("failed to stage 'mcols(<", class(x)[1], ">)'\n - ", e$message))
}
}

return(output)
}

# Soft-deprecated back-compatibility fixes
Expand Down
28 changes: 25 additions & 3 deletions tests/testthat/test-DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -370,23 +370,45 @@ test_that("loaders work correctly from HDF5 with non-default placeholders", {
})

test_that("stageObject works with extra mcols", {
tmp <- tempfile()
dir.create(tmp)

df <- DataFrame(A=sample(3, 100, replace=TRUE), B=sample(letters[1:3], 100, replace=TRUE))
out <- stageObject(df, tmp, "raw_thing")
expect_null(out$data_frame$column_data)
expect_null(out$data_frame$other_data)

# Ignores it when the mcols have no columns.
mcols(df) <- make_zero_col_DFrame(2)
out <- stageObject(df, tmp, "raw_thing2")
expect_null(out$data_frame$column_data)
expect_null(out$data_frame$other_data)

# Alright, adding some mcols.
mcols(df)$stuff <- runif(ncol(df))
mcols(df)$foo <- sample(LETTERS, ncol(df), replace=TRUE)
metadata(df) <- list(WHEE="foo")

tmp <- tempfile()
dir.create(tmp)
out <- stageObject(df, tmp, "thing")
expect_false(is.null(out$data_frame$column_data))
expect_false(is.null(out$data_frame$other_data))

# Should write without errors.
resource <- writeMetadata(out, tmp)
expect_true(file.exists(file.path(tmp, resource$path)))
df2 <- loadDataFrame(out, tmp)
expect_equal(df, df2)

# Eliminates redundant row names.
mc <- mcols(df)
rownames(mc) <- c("C", "D")
mcols(df, use.names=FALSE) <- mc
out <- stageObject(df, tmp, "thing2")
resource <- writeMetadata(out, tmp)

df2 <- loadDataFrame(out, tmp)
expect_equal(df, df2)
mc <- mcols(df2, use.names=FALSE)
expect_null(rownames(mc))
})

test_that("DF staging preserves odd colnames", {
Expand Down

0 comments on commit 2aac638

Please sign in to comment.