diff --git a/DESCRIPTION b/DESCRIPTION index 2a637a8..cbcab13 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: alabaster.base Title: Save Bioconductor Objects To File -Version: 1.5.9 -Date: 2024-09-22 +Version: 1.5.10 +Date: 2024-10-17 Authors@R: person("Aaron", "Lun", role=c("aut", "cre"), email="infinite.monkeys.with.keyboards@gmail.com") License: MIT + file LICENSE Description: diff --git a/R/saveDataFrame.R b/R/saveDataFrame.R index 84f2bf8..65bf9ff 100644 --- a/R/saveDataFrame.R +++ b/R/saveDataFrame.R @@ -2,12 +2,12 @@ #' #' Stage a DataFrame by saving it to a HDF5 file. #' -#' @param x A \linkS4class{DataFrame}. +#' @param x A \linkS4class{DataFrame} or data.frame. #' @inheritParams saveObject #' #' @return #' A named list containing the metadata for \code{x}. -#' \code{x} itself is written to a CSV or HDF5 file inside \code{path}. +#' \code{x} itself is written to a HDF5 file inside \code{path}. #' Additional files may also be created inside \code{path} and referenced from the metadata. #' #' @details @@ -21,6 +21,9 @@ #' If \code{\link{metadata}} or \code{\link{mcols}} are present, #' they are saved to the \code{other_annotations} and \code{column_annotations} subdirectories, respectively, via \code{\link{saveObject}}. #' +#' In the on-disk representation, no distinction is made between \linkS4class{DataFrame} and data.frame instances of \code{x}. +#' Calling \code{readDataFrame} will always produce a \linkS4class{DFrame} regardless of the class of \code{x}. +#' #' @author Aaron Lun #' #' @examples @@ -48,7 +51,7 @@ setMethod("saveObject", "DataFrame", function(x, path, ...) { }) #' @importFrom rhdf5 h5write h5createGroup h5createFile H5Gopen H5Gclose H5Acreate H5Aclose H5Awrite H5Fopen H5Fclose H5Dopen H5Dclose -.write_hdf5_new <- function(x, path, ...) { +.write_hdf5_new <- function(x, path, row.names=rownames(x), ...) { subpath <- "basic_columns.h5" ofile <- paste0(path, "/", subpath) @@ -133,11 +136,23 @@ setMethod("saveObject", "DataFrame", function(x, path, ...) { } h5_write_vector(ghandle, "column_names", colnames(x)) - if (!is.null(rownames(x))) { - h5_write_vector(ghandle, "row_names", rownames(x)) + if (!is.null(row.names)) { + h5_write_vector(ghandle, "row_names", row.names) } } +#' @export +#' @rdname stageDataFrame +setMethod("saveObject", "data.frame", function(x, path, ...) { + dir.create(path, showWarnings=FALSE) + rn <- attr(x, "row.names") + if (is.integer(rn)) { + rn <- NULL + } + .write_hdf5_new(x, path, row.names=rn, ...) + saveObjectFile(path, "data_frame", list(data_frame=list(version="1.0"))) +}) + ####################################### ########### OLD STUFF HERE ############ ####################################### diff --git a/inst/NEWS.Rd b/inst/NEWS.Rd index 8469cce..b0491ae 100644 --- a/inst/NEWS.Rd +++ b/inst/NEWS.Rd @@ -20,6 +20,8 @@ to allow developers to declare that custom subclasses satisfy an interface or ha \item Updated \code{validateDirectory()} so that it works with a directory of objects saved via \code{saveObject()}. Objects saved under the old regime (i.e., \code{stageObject()}) are auto-detected but can also be explicitly validated by setting \code{legacy=FALSE}. + +\item Added a data.frame method for \code{saveObject()}, to avoid fallback to the list method. }} \section{Version 1.4.0}{\itemize{ diff --git a/man/stageDataFrame.Rd b/man/stageDataFrame.Rd index afa8c20..18a12c6 100644 --- a/man/stageDataFrame.Rd +++ b/man/stageDataFrame.Rd @@ -3,12 +3,15 @@ \name{saveObject,DataFrame-method} \alias{saveObject,DataFrame-method} \alias{stageObject,DataFrame-method} +\alias{saveObject,data.frame-method} \title{Save a DataFrame to disk} \usage{ \S4method{saveObject}{DataFrame}(x, path, ...) + +\S4method{saveObject}{data.frame}(x, path, ...) } \arguments{ -\item{x}{A \linkS4class{DataFrame}.} +\item{x}{A \linkS4class{DataFrame} or data.frame.} \item{path}{String containing the path to a directory in which to save \code{x}.} @@ -16,7 +19,7 @@ } \value{ A named list containing the metadata for \code{x}. -\code{x} itself is written to a CSV or HDF5 file inside \code{path}. +\code{x} itself is written to a HDF5 file inside \code{path}. Additional files may also be created inside \code{path} and referenced from the metadata. } \description{ @@ -32,6 +35,9 @@ named after its zero-based positional index within \code{x}. If \code{\link{metadata}} or \code{\link{mcols}} are present, they are saved to the \code{other_annotations} and \code{column_annotations} subdirectories, respectively, via \code{\link{saveObject}}. + +In the on-disk representation, no distinction is made between \linkS4class{DataFrame} and data.frame instances of \code{x}. +Calling \code{readDataFrame} will always produce a \linkS4class{DFrame} regardless of the class of \code{x}. } \examples{ library(S4Vectors) diff --git a/tests/testthat/test-DataFrame.R b/tests/testthat/test-DataFrame.R index 50e3b54..9c1fde2 100644 --- a/tests/testthat/test-DataFrame.R +++ b/tests/testthat/test-DataFrame.R @@ -677,3 +677,27 @@ test_that("staging of arrays continues to work with character matrices", { roundtrip$Z <- as.matrix(roundtrip$Z) expect_identical(roundtrip, input) }) + +test_that("saving works for base data.frames", { + nrows <- 123 + df <- data.frame( + stuff = rep(LETTERS[1:3], length.out=nrows), + foo = seq_len(nrows), + whee = as.numeric(10 + seq_len(nrows)) + ) + df$blah <- factor(df$stuff, LETTERS[10:1]) + df$rabbit <- factor(df$stuff, LETTERS[1:3], ordered=TRUE) + + tmp <- tempfile() + saveObject(df, tmp) + roundtrip <- readObject(tmp) + expect_null(rownames(roundtrip)) + expect_identical(as.data.frame(roundtrip), df) + + # Respects row names. + rownames(df) <- sprintf("GENE_%i", seq_len(nrows)) + tmp <- tempfile() + saveObject(df, tmp) + roundtrip <- readObject(tmp) + expect_identical(as.data.frame(roundtrip), df) +})