Skip to content

Commit

Permalink
fix: waters_raw, lowercase filenames, metadata, directory detection, …
Browse files Browse the repository at this point in the history
…v0.7.1
  • Loading branch information
ethanbass committed Nov 24, 2024
1 parent fc0e9da commit 6f444a2
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 22 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.Rproj.user
docs
chromConverter.Rcheck
chromConverter.Rcheck
.Rhistory
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ importFrom(data.table,setorder)
importFrom(purrr,partial)
importFrom(readxl,read_xls)
importFrom(stats,reshape)
importFrom(stats,setNames)
importFrom(stringr,str_split_fixed)
importFrom(utils,file_test)
importFrom(utils,head)
Expand Down
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
## chromConverter 0.7.1

* Fix automatic file detection for directories (e.g., Waters `.raw` and Agilent `.D`)
* Fixed automatic file detection for directories (e.g., Waters `.raw` and Agilent `.D`)
* Fixed bug preventing extraction of `Waters` chromatograms with lowercase filenames.
* Added support for extracting metadata from 'Waters' `.raw` header files.
* Added support for extraction of detector units from 'Waters' chromatograms.

## chromConverter 0.7.0

Expand Down
28 changes: 28 additions & 0 deletions R/attach_metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,34 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format,
data_format = data_format,
parser = "chromconverter",
format_out = format_out)
}, "waters_raw" = {
structure(x, instrument = get_metadata_field(meta, "Instrument"),
detector = NA,
software = NA,
method = NA,
batch = NA,
operator = get_metadata_field(meta, "User_Name"),
run_datetime = as.POSIXct(paste(meta$Acquired_Date, meta$Acquired_Time,
collapse = " "),
format = "%d-%b-%Y %I:%M:%S",
tz = "UTC"),
sample_name = ifelse(is.null(meta$`Acquired Name`),
fs::path_ext_remove(basename(source_file)),
meta$`Acquired Name`),
sample_id = NA,
sample_injection_volume = NA,
sample_amount = NA,
time_range = NA,
time_interval = NA,
time_unit = NA,
detector_range = NA,
detector_y_unit = get_metadata_field(meta, "Detector_Unit"),
source_file = source_file,
source_file_format = source_file_format,
source_sha1 = NA,
data_format = data_format,
parser = "chromconverter",
format_out = format_out)
}, "shimadzu_dad" = {
structure(x,
instrument = get_metadata_field(meta, "Instrument Name"),
Expand Down
55 changes: 37 additions & 18 deletions R/read_waters_raw.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#'
#' Parser for reading 'Waters MassLynx (.raw) files into R.
#'
#' @importFrom stats setNames
#' @param path Path to \code{.raw} file.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
Expand All @@ -23,23 +24,49 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t

format_out <- check_format_out(format_out)
data_format <- match.arg(data_format, c("wide", "long"))
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
metadata_format <- match.arg(tolower(metadata_format),
c("chromconverter", "raw"))
metadata_format <- switch(metadata_format,
chromconverter = "waters_raw", raw = "raw")
uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE, ignore.case = TRUE)
meta_path <- grep("\\.INF$", uv_paths, value = TRUE, ignore.case = TRUE)
uv_paths <- grep("\\.INF$", uv_paths, invert = TRUE, value = TRUE, ignore.case = TRUE)

uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE)
meta_path <- grep("\\.INF", uv_paths, value = TRUE)
uv_paths <- grep("\\.INF", uv_paths, invert = TRUE, value = TRUE)
if (read_metadata){
hdr_path <- list.files(path, pattern="_HEADER.TXT",
full.names = TRUE, ignore.case = TRUE)
hdr <- readLines(hdr_path)
hdr <- gsub("\\$\\$ ", "", hdr)
hdr <- stringr::str_split_fixed(hdr, ":", n = 2)
hdr[,2] <- gsub("^ ", "", hdr[,2])
hdr[hdr[,2] == "", 2] <- NA
hdr[,1] <- gsub(" ", "_", hdr[,1])
hdr <- as.list(setNames(hdr[,2], hdr[,1]))
}

dat <- lapply(uv_paths, read_waters_chro, format_out = format_out,
data_format = data_format, read_metadata = read_metadata,
metadata_format = metadata_format)
data_format = data_format)

meta <- readLines(meta_path, skipNul = TRUE, warn = FALSE,
encoding = "Latin-1")
meta <- iconv(meta, sub = "")
meta <- strsplit(meta,"\\([0-9]\\)")[[1]][-1]
meta <- gsub("^ |\\$CC\\$", "", sapply(strsplit(meta, ","), function(x) x[1]))
meta <- strsplit(meta, "\001")[[1]][-c(1:3)]
nms <- gsub("^ |\\$CC\\$", "", sapply(strsplit(meta, ","), `[`, 1))

names(dat) <- meta
if (read_metadata){
detector_unit <- sapply(strsplit(meta, ","), `[`, 6)
dat <- lapply(seq_along(dat), function(i){
attach_metadata(x = dat[[i]], meta = c(hdr, Detector_Unit = detector_unit[i]),
format_in = metadata_format,
format_out = format_out,
data_format = data_format,
parser = "chromconverter",
source_file = path,
source_file_format = "waters_raw",
scale = FALSE)
})
}
names(dat) <- gsub("^\\([0-9]+\\)\\s*", "", nms)
dat
}

Expand All @@ -51,9 +78,6 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t
#' @param path Path to \code{.dat} file.
#' @param format_out Matrix or data.frame.
#' @param data_format Either \code{wide} (default) or \code{long}.
#' @param read_metadata Logical. Whether to attach metadata.
#' @param metadata_format Format to output metadata. Either \code{chromconverter}
#' or \code{raw}.
#' @return A chromatogram in the format specified by \code{format_out}
#' (retention time x wavelength).
#' @author Ethan Bass
Expand All @@ -62,14 +86,9 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t
#magic 80000100 08000200

read_waters_chro <- function(path, format_out = "data.frame",
data_format = c("wide", "long"),
read_metadata = TRUE,
metadata_format = c("chromconverter", "raw")){
data_format = c("wide", "long")){

data_format <- match.arg(data_format, c("wide", "long"))
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
# metadata_format <- switch(metadata_format,
# chromconverter = "waters_uv", raw = "raw")

f <- file(path, "rb")
on.exit(close(f))
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-read_chroms.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ test_that("chromConverter can read `Agilent Chemstation` .csv file", {
tolerance = .0001, ignore_attr = TRUE)
expect_equal(head(rownames(x), n = 3), c("0.002", "0.0086666666667",
"0.0153333333333"))
x1 <- read_chroms(path_csv, format_in="chemstation_csv",
format_out="data.table", progress_bar = FALSE)[[1]]
x1 <- read_chroms(path_csv, format_in = "chemstation_csv",
format_out = "data.table", progress_bar = FALSE)[[1]]
expect_s3_class(x1, "data.table")

x2 <- read_chroms(path_csv, format_in="chemstation_csv",
Expand Down

0 comments on commit 6f444a2

Please sign in to comment.