From 6f444a2e54457cf302011978246c858cfd9e5500 Mon Sep 17 00:00:00 2001
From: Ethan Bass <ethanbass@gmail.com>
Date: Sat, 23 Nov 2024 21:00:02 -0500
Subject: [PATCH] fix: waters_raw, lowercase filenames, metadata, directory
 detection, v0.7.1

---
 .gitignore                        |  3 +-
 NAMESPACE                         |  1 +
 NEWS.md                           |  5 ++-
 R/attach_metadata.R               | 28 ++++++++++++++++
 R/read_waters_raw.R               | 55 +++++++++++++++++++++----------
 tests/testthat/test-read_chroms.R |  4 +--
 6 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/.gitignore b/.gitignore
index a8f2a9a..a275bc9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 .Rproj.user
 docs
-chromConverter.Rcheck
\ No newline at end of file
+chromConverter.Rcheck
+.Rhistory
diff --git a/NAMESPACE b/NAMESPACE
index cc0b946..211e6de 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -48,6 +48,7 @@ importFrom(data.table,setorder)
 importFrom(purrr,partial)
 importFrom(readxl,read_xls)
 importFrom(stats,reshape)
+importFrom(stats,setNames)
 importFrom(stringr,str_split_fixed)
 importFrom(utils,file_test)
 importFrom(utils,head)
diff --git a/NEWS.md b/NEWS.md
index e152329..c9d1542 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,9 @@
 ## chromConverter 0.7.1
 
-* Fix automatic file detection for directories (e.g., Waters `.raw` and Agilent `.D`)
+* Fixed automatic file detection for directories (e.g., Waters `.raw` and Agilent `.D`)
+* Fixed bug preventing extraction of `Waters` chromatograms with lowercase filenames.
+* Added support for extracting metadata from 'Waters' `.raw` header files.
+* Added support for extraction of detector units from 'Waters' chromatograms.
 
 ## chromConverter 0.7.0
 
diff --git a/R/attach_metadata.R b/R/attach_metadata.R
index 10c8e8d..b493599 100644
--- a/R/attach_metadata.R
+++ b/R/attach_metadata.R
@@ -174,6 +174,34 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format,
                 data_format = data_format,
                 parser = "chromconverter",
                 format_out = format_out)
+  }, "waters_raw" = {
+    structure(x, instrument = get_metadata_field(meta, "Instrument"),
+              detector = NA,
+              software = NA,
+              method = NA,
+              batch = NA,
+              operator = get_metadata_field(meta, "User_Name"),
+              run_datetime = as.POSIXct(paste(meta$Acquired_Date, meta$Acquired_Time,
+                                              collapse = " "),
+                                        format = "%d-%b-%Y %I:%M:%S",
+                                        tz = "UTC"),
+              sample_name = ifelse(is.null(meta$`Acquired Name`),
+                                   fs::path_ext_remove(basename(source_file)),
+                                   meta$`Acquired Name`),
+              sample_id = NA,
+              sample_injection_volume = NA,
+              sample_amount = NA,
+              time_range = NA,
+              time_interval = NA,
+              time_unit = NA,
+              detector_range = NA,
+              detector_y_unit = get_metadata_field(meta, "Detector_Unit"),
+              source_file = source_file,
+              source_file_format = source_file_format,
+              source_sha1 = NA,
+              data_format = data_format,
+              parser = "chromconverter",
+              format_out = format_out)
   }, "shimadzu_dad" = {
     structure(x,
               instrument = get_metadata_field(meta, "Instrument Name"),
diff --git a/R/read_waters_raw.R b/R/read_waters_raw.R
index 08b9822..f120d16 100644
--- a/R/read_waters_raw.R
+++ b/R/read_waters_raw.R
@@ -2,6 +2,7 @@
 #'
 #' Parser for reading 'Waters MassLynx (.raw) files into R.
 #'
+#' @importFrom stats setNames
 #' @param path Path to \code{.raw} file.
 #' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
 #' or \code{data.table}.
@@ -23,23 +24,49 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t
 
   format_out <- check_format_out(format_out)
   data_format <- match.arg(data_format, c("wide", "long"))
-  metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
+  metadata_format <- match.arg(tolower(metadata_format),
+                               c("chromconverter", "raw"))
+  metadata_format <- switch(metadata_format,
+                            chromconverter = "waters_raw", raw = "raw")
+  uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE, ignore.case = TRUE)
+  meta_path <- grep("\\.INF$", uv_paths, value = TRUE, ignore.case = TRUE)
+  uv_paths <- grep("\\.INF$", uv_paths, invert = TRUE, value = TRUE, ignore.case = TRUE)
 
-  uv_paths <- list.files(path, pattern="_CHRO", full.names = TRUE)
-  meta_path <- grep("\\.INF", uv_paths, value = TRUE)
-  uv_paths <- grep("\\.INF", uv_paths, invert = TRUE, value = TRUE)
+  if (read_metadata){
+    hdr_path <- list.files(path, pattern="_HEADER.TXT",
+                           full.names = TRUE, ignore.case = TRUE)
+    hdr <- readLines(hdr_path)
+    hdr <- gsub("\\$\\$ ", "", hdr)
+    hdr <- stringr::str_split_fixed(hdr, ":", n = 2)
+    hdr[,2] <- gsub("^ ", "", hdr[,2])
+    hdr[hdr[,2] == "", 2] <- NA
+    hdr[,1] <- gsub(" ", "_", hdr[,1])
+    hdr <- as.list(setNames(hdr[,2], hdr[,1]))
+  }
 
   dat <- lapply(uv_paths, read_waters_chro, format_out = format_out,
-                data_format = data_format, read_metadata = read_metadata,
-                metadata_format = metadata_format)
+                data_format = data_format)
 
   meta <- readLines(meta_path, skipNul = TRUE, warn = FALSE,
                     encoding = "Latin-1")
   meta <- iconv(meta, sub = "")
-  meta <- strsplit(meta,"\\([0-9]\\)")[[1]][-1]
-  meta <- gsub("^ |\\$CC\\$", "", sapply(strsplit(meta, ","), function(x) x[1]))
+  meta <- strsplit(meta, "\001")[[1]][-c(1:3)]
+  nms <- gsub("^ |\\$CC\\$", "", sapply(strsplit(meta, ","), `[`, 1))
 
-  names(dat) <- meta
+  if (read_metadata){
+    detector_unit <- sapply(strsplit(meta, ","), `[`, 6)
+    dat <- lapply(seq_along(dat), function(i){
+      attach_metadata(x = dat[[i]], meta = c(hdr, Detector_Unit = detector_unit[i]),
+                       format_in = metadata_format,
+                       format_out = format_out,
+                       data_format = data_format,
+                       parser = "chromconverter",
+                       source_file = path,
+                       source_file_format = "waters_raw",
+                       scale = FALSE)
+    })
+  }
+  names(dat) <- gsub("^\\([0-9]+\\)\\s*", "", nms)
   dat
 }
 
@@ -51,9 +78,6 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t
 #' @param path Path to \code{.dat} file.
 #' @param format_out Matrix or data.frame.
 #' @param data_format Either \code{wide} (default) or \code{long}.
-#' @param read_metadata Logical. Whether to attach metadata.
-#' @param metadata_format Format to output metadata. Either \code{chromconverter}
-#' or \code{raw}.
 #' @return A chromatogram in the format specified by \code{format_out}
 #' (retention time x wavelength).
 #' @author Ethan Bass
@@ -62,14 +86,9 @@ read_waters_raw <- function(path, format_out = c("matrix", "data.frame", "data.t
 #magic 80000100 08000200
 
 read_waters_chro <- function(path, format_out = "data.frame",
-                            data_format = c("wide", "long"),
-                            read_metadata = TRUE,
-                            metadata_format = c("chromconverter", "raw")){
+                            data_format = c("wide", "long")){
 
   data_format <- match.arg(data_format, c("wide", "long"))
-  metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
-  # metadata_format <- switch(metadata_format,
-  #                           chromconverter = "waters_uv", raw = "raw")
 
   f <- file(path, "rb")
   on.exit(close(f))
diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R
index 8d8fdd1..5e7f32d 100644
--- a/tests/testthat/test-read_chroms.R
+++ b/tests/testthat/test-read_chroms.R
@@ -11,8 +11,8 @@ test_that("chromConverter can read `Agilent Chemstation` .csv file", {
                tolerance = .0001, ignore_attr = TRUE)
   expect_equal(head(rownames(x), n = 3), c("0.002", "0.0086666666667",
                                            "0.0153333333333"))
-  x1 <- read_chroms(path_csv, format_in="chemstation_csv",
-                    format_out="data.table", progress_bar = FALSE)[[1]]
+  x1 <- read_chroms(path_csv, format_in = "chemstation_csv",
+                    format_out = "data.table", progress_bar = FALSE)[[1]]
   expect_s3_class(x1, "data.table")
 
   x2 <- read_chroms(path_csv, format_in="chemstation_csv",