From 6f444a2e54457cf302011978246c858cfd9e5500 Mon Sep 17 00:00:00 2001 From: Ethan Bass Date: Sat, 23 Nov 2024 21:00:02 -0500 Subject: [PATCH] fix: waters_raw, lowercase filenames, metadata, directory detection, v0.7.1 --- .gitignore | 3 +- NAMESPACE | 1 + NEWS.md | 5 ++- R/attach_metadata.R | 28 ++++++++++++++++ R/read_waters_raw.R | 55 +++++++++++++++++++++---------- tests/testthat/test-read_chroms.R | 4 +-- 6 files changed, 74 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index a8f2a9a..a275bc9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .Rproj.user docs -chromConverter.Rcheck \ No newline at end of file +chromConverter.Rcheck +.Rhistory diff --git a/NAMESPACE b/NAMESPACE index cc0b946..211e6de 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,6 +48,7 @@ importFrom(data.table,setorder) importFrom(purrr,partial) importFrom(readxl,read_xls) importFrom(stats,reshape) +importFrom(stats,setNames) importFrom(stringr,str_split_fixed) importFrom(utils,file_test) importFrom(utils,head) diff --git a/NEWS.md b/NEWS.md index e152329..c9d1542 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,9 @@ ## chromConverter 0.7.1 -* Fix automatic file detection for directories (e.g., Waters `.raw` and Agilent `.D`) +* Fixed automatic file detection for directories (e.g., Waters `.raw` and Agilent `.D`) +* Fixed bug preventing extraction of `Waters` chromatograms with lowercase filenames. +* Added support for extracting metadata from 'Waters' `.raw` header files. +* Added support for extraction of detector units from 'Waters' chromatograms. ## chromConverter 0.7.0 diff --git a/R/attach_metadata.R b/R/attach_metadata.R index 10c8e8d..b493599 100644 --- a/R/attach_metadata.R +++ b/R/attach_metadata.R @@ -174,6 +174,34 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, data_format = data_format, parser = "chromconverter", format_out = format_out) + }, "waters_raw" = { + structure(x, instrument = get_metadata_field(meta, "Instrument"), + detector = NA, + software = NA, + method = NA, + batch = NA, + operator = get_metadata_field(meta, "User_Name"), + run_datetime = as.POSIXct(paste(meta$Acquired_Date, meta$Acquired_Time, + collapse = " "), + format = "%d-%b-%Y %I:%M:%S", + tz = "UTC"), + sample_name = ifelse(is.null(meta$`Acquired Name`), + fs::path_ext_remove(basename(source_file)), + meta$`Acquired Name`), + sample_id = NA, + sample_injection_volume = NA, + sample_amount = NA, + time_range = NA, + time_interval = NA, + time_unit = NA, + detector_range = NA, + detector_y_unit = get_metadata_field(meta, "Detector_Unit"), + source_file = source_file, + source_file_format = source_file_format, + source_sha1 = NA, + data_format = data_format, + parser = "chromconverter", + format_out = format_out) }, "shimadzu_dad" = { structure(x, instrument = get_metadata_field(meta, "Instrument Name"), diff --git a/R/read_waters_raw.R b/R/read_waters_raw.R index 08b9822..f120d16 100644 --- a/R/read_waters_raw.R +++ b/R/read_waters_raw.R @@ -2,6 +2,7 @@ #' #' Parser for reading 'Waters MassLynx (.raw) files into R. #' +#' @importFrom stats setNames #' @param path Path to \code{.raw} file. #' @param format_out Class of output. Either \code{matrix}, \code{data.frame}, #' or \code{data.table}. @@ -23,23 +24,49 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t format_out <- check_format_out(format_out) data_format <- match.arg(data_format, c("wide", "long")) - metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- match.arg(tolower(metadata_format), + c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = "waters_raw", raw = "raw") + uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE, ignore.case = TRUE) + meta_path <- grep("\\.INF$", uv_paths, value = TRUE, ignore.case = TRUE) + uv_paths <- grep("\\.INF$", uv_paths, invert = TRUE, value = TRUE, ignore.case = TRUE) - uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE) - meta_path <- grep("\\.INF", uv_paths, value = TRUE) - uv_paths <- grep("\\.INF", uv_paths, invert = TRUE, value = TRUE) + if (read_metadata){ + hdr_path <- list.files(path, pattern="_HEADER.TXT", + full.names = TRUE, ignore.case = TRUE) + hdr <- readLines(hdr_path) + hdr <- gsub("\\$\\$ ", "", hdr) + hdr <- stringr::str_split_fixed(hdr, ":", n = 2) + hdr[,2] <- gsub("^ ", "", hdr[,2]) + hdr[hdr[,2] == "", 2] <- NA + hdr[,1] <- gsub(" ", "_", hdr[,1]) + hdr <- as.list(setNames(hdr[,2], hdr[,1])) + } dat <- lapply(uv_paths, read_waters_chro, format_out = format_out, - data_format = data_format, read_metadata = read_metadata, - metadata_format = metadata_format) + data_format = data_format) meta <- readLines(meta_path, skipNul = TRUE, warn = FALSE, encoding = "Latin-1") meta <- iconv(meta, sub = "") - meta <- strsplit(meta,"\\([0-9]\\)")[[1]][-1] - meta <- gsub("^ |\\$CC\\$", "", sapply(strsplit(meta, ","), function(x) x[1])) + meta <- strsplit(meta, "\001")[[1]][-c(1:3)] + nms <- gsub("^ |\\$CC\\$", "", sapply(strsplit(meta, ","), `[`, 1)) - names(dat) <- meta + if (read_metadata){ + detector_unit <- sapply(strsplit(meta, ","), `[`, 6) + dat <- lapply(seq_along(dat), function(i){ + attach_metadata(x = dat[[i]], meta = c(hdr, Detector_Unit = detector_unit[i]), + format_in = metadata_format, + format_out = format_out, + data_format = data_format, + parser = "chromconverter", + source_file = path, + source_file_format = "waters_raw", + scale = FALSE) + }) + } + names(dat) <- gsub("^\\([0-9]+\\)\\s*", "", nms) dat } @@ -51,9 +78,6 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t #' @param path Path to \code{.dat} file. #' @param format_out Matrix or data.frame. #' @param data_format Either \code{wide} (default) or \code{long}. -#' @param read_metadata Logical. Whether to attach metadata. -#' @param metadata_format Format to output metadata. Either \code{chromconverter} -#' or \code{raw}. #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). #' @author Ethan Bass @@ -62,14 +86,9 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t #magic 80000100 08000200 read_waters_chro <- function(path, format_out = "data.frame", - data_format = c("wide", "long"), - read_metadata = TRUE, - metadata_format = c("chromconverter", "raw")){ + data_format = c("wide", "long")){ data_format <- match.arg(data_format, c("wide", "long")) - metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) - # metadata_format <- switch(metadata_format, - # chromconverter = "waters_uv", raw = "raw") f <- file(path, "rb") on.exit(close(f)) diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index 8d8fdd1..5e7f32d 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -11,8 +11,8 @@ test_that("chromConverter can read `Agilent Chemstation` .csv file", { tolerance = .0001, ignore_attr = TRUE) expect_equal(head(rownames(x), n = 3), c("0.002", "0.0086666666667", "0.0153333333333")) - x1 <- read_chroms(path_csv, format_in="chemstation_csv", - format_out="data.table", progress_bar = FALSE)[[1]] + x1 <- read_chroms(path_csv, format_in = "chemstation_csv", + format_out = "data.table", progress_bar = FALSE)[[1]] expect_s3_class(x1, "data.table") x2 <- read_chroms(path_csv, format_in="chemstation_csv",