Skip to content

Commit

Permalink
Added a saveObject,data.frame-method to avoid unexpected saving as a …
Browse files Browse the repository at this point in the history
…list.
  • Loading branch information
LTLA committed Oct 17, 2024
1 parent 9021de7 commit 98ba698
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 9 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: alabaster.base
Title: Save Bioconductor Objects To File
Version: 1.5.9
Date: 2024-09-22
Version: 1.5.10
Date: 2024-10-17
Authors@R: person("Aaron", "Lun", role=c("aut", "cre"), email="[email protected]")
License: MIT + file LICENSE
Description:
Expand Down
25 changes: 20 additions & 5 deletions R/saveDataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
#'
#' Stage a DataFrame by saving it to a HDF5 file.
#'
#' @param x A \linkS4class{DataFrame}.
#' @param x A \linkS4class{DataFrame} or data.frame.
#' @inheritParams saveObject
#'
#' @return
#' A named list containing the metadata for \code{x}.
#' \code{x} itself is written to a CSV or HDF5 file inside \code{path}.
#' \code{x} itself is written to a HDF5 file inside \code{path}.
#' Additional files may also be created inside \code{path} and referenced from the metadata.
#'
#' @details
Expand All @@ -21,6 +21,9 @@
#' If \code{\link{metadata}} or \code{\link{mcols}} are present,
#' they are saved to the \code{other_annotations} and \code{column_annotations} subdirectories, respectively, via \code{\link{saveObject}}.
#'
#' In the on-disk representation, no distinction is made between \linkS4class{DataFrame} and data.frame instances of \code{x}.
#' Calling \code{readDataFrame} will always produce a \linkS4class{DFrame} regardless of the class of \code{x}.
#'
#' @author Aaron Lun
#'
#' @examples
Expand Down Expand Up @@ -48,7 +51,7 @@ setMethod("saveObject", "DataFrame", function(x, path, ...) {
})

#' @importFrom rhdf5 h5write h5createGroup h5createFile H5Gopen H5Gclose H5Acreate H5Aclose H5Awrite H5Fopen H5Fclose H5Dopen H5Dclose
.write_hdf5_new <- function(x, path, ...) {
.write_hdf5_new <- function(x, path, row.names=rownames(x), ...) {
subpath <- "basic_columns.h5"
ofile <- paste0(path, "/", subpath)

Expand Down Expand Up @@ -133,11 +136,23 @@ setMethod("saveObject", "DataFrame", function(x, path, ...) {
}

h5_write_vector(ghandle, "column_names", colnames(x))
if (!is.null(rownames(x))) {
h5_write_vector(ghandle, "row_names", rownames(x))
if (!is.null(row.names)) {
h5_write_vector(ghandle, "row_names", row.names)
}
}

#' @export
#' @rdname stageDataFrame
setMethod("saveObject", "data.frame", function(x, path, ...) {
dir.create(path, showWarnings=FALSE)
rn <- attr(x, "row.names")
if (is.integer(rn)) {
rn <- NULL
}
.write_hdf5_new(x, path, row.names=rn, ...)
saveObjectFile(path, "data_frame", list(data_frame=list(version="1.0")))
})

#######################################
########### OLD STUFF HERE ############
#######################################
Expand Down
2 changes: 2 additions & 0 deletions inst/NEWS.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ to allow developers to declare that custom subclasses satisfy an interface or ha

\item Updated \code{validateDirectory()} so that it works with a directory of objects saved via \code{saveObject()}.
Objects saved under the old regime (i.e., \code{stageObject()}) are auto-detected but can also be explicitly validated by setting \code{legacy=FALSE}.

\item Added a data.frame method for \code{saveObject()}, to avoid fallback to the list method.
}}

\section{Version 1.4.0}{\itemize{
Expand Down
10 changes: 8 additions & 2 deletions man/stageDataFrame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 24 additions & 0 deletions tests/testthat/test-DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -677,3 +677,27 @@ test_that("staging of arrays continues to work with character matrices", {
roundtrip$Z <- as.matrix(roundtrip$Z)
expect_identical(roundtrip, input)
})

test_that("saving works for base data.frames", {
nrows <- 123
df <- data.frame(
stuff = rep(LETTERS[1:3], length.out=nrows),
foo = seq_len(nrows),
whee = as.numeric(10 + seq_len(nrows))
)
df$blah <- factor(df$stuff, LETTERS[10:1])
df$rabbit <- factor(df$stuff, LETTERS[1:3], ordered=TRUE)

tmp <- tempfile()
saveObject(df, tmp)
roundtrip <- readObject(tmp)
expect_null(rownames(roundtrip))
expect_identical(as.data.frame(roundtrip), df)

# Respects row names.
rownames(df) <- sprintf("GENE_%i", seq_len(nrows))
tmp <- tempfile()
saveObject(df, tmp)
roundtrip <- readObject(tmp)
expect_identical(as.data.frame(roundtrip), df)
})

0 comments on commit 98ba698

Please sign in to comment.