diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index c4daa77..d8407d7 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -47,11 +47,12 @@ jobs: - name: Install dependencies run: | install.packages(c("remotes", "rcmdcheck")) - install.packages(c("ncdf4")) remotes::install_deps(dependencies = TRUE) + install.packages(c("ncdf4")) + install.packages("chromConverterExtraTests", repos = "https://ethanbass.github.io/drat/") reticulate::install_miniconda() reticulate::conda_create('r-reticulate', packages = c('python==3.9', 'numpy', 'scipy', 'pandas')) - reticulate::conda_install('r-reticulate', packages = c('aston'), pip=TRUE) + reticulate::conda_install('r-reticulate', packages = c('aston', "olefile"), pip=TRUE) shell: Rscript {0} - if: runner.os == 'macOS' diff --git a/DESCRIPTION b/DESCRIPTION index 5c3fed9..c98d9d3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: chromConverter Title: Chromatographic File Converter -Version: 0.4.3 +Version: 0.5.0 Authors@R: c( person(given = "Ethan", family = "Bass", email = "ethanbass@gmail.com", role = c("aut", "cre"), @@ -32,15 +32,17 @@ Imports: xml2 Suggests: entab, - mzR, ncdf4, pbapply, - testthat (>= 3.0.0) -Config/reticulate: list( packages = list( list(package = "scipy"), + testthat (>= 3.0.0), + mzR, + chromConverterExtraTests +Config/reticulate: list( packages = list(list(package = "scipy"), list(package="numpy"), list(package = "pandas"), list(package="aston", pip = TRUE), - list(package="rainbow-api", pip = TRUE)) ) + list(package="rainbow-api", pip = TRUE), + list(package = "olefile", pip = TRUE)) ) Encoding: UTF-8 Language: en-US Roxygen: list(markdown = TRUE) diff --git a/NAMESPACE b/NAMESPACE index 4e30146..d4c2cab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,18 +4,23 @@ export(call_entab) export(call_openchrom) export(call_rainbow) export(configure_aston) +export(configure_olefile) export(configure_openchrom) export(configure_rainbow) export(extract_metadata) +export(read_agilent_dx) export(read_cdf) export(read_chemstation_ch) export(read_chemstation_csv) +export(read_chemstation_reports) export(read_chemstation_uv) export(read_chromeleon) export(read_chroms) export(read_mdf) export(read_mzml) +export(read_peaklist) export(read_shimadzu) +export(read_shimadzu_lcd) export(read_thermoraw) export(read_varian_peaklist) export(read_waters_arw) @@ -31,12 +36,12 @@ importFrom(purrr,partial) importFrom(readxl,read_xls) importFrom(stats,reshape) importFrom(stringr,str_split_fixed) -importFrom(tidyr,pivot_wider) importFrom(utils,file_test) importFrom(utils,head) importFrom(utils,read.csv) importFrom(utils,read.table) importFrom(utils,tail) +importFrom(utils,unzip) importFrom(utils,write.csv) importFrom(utils,write.table) importFrom(xml2,read_xml) diff --git a/NEWS.md b/NEWS.md index 9873bc1..a11e7d7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,36 @@ +## chromConverter 0.5.0 + +### New features + +* Added support for parallel processing through `pbapply` package. (**Note**: The `pbapply` package must be manually installed to enable parallel processing). +* Added internal parser for 'Agilent Chemstation' version 31 files (through `read_chemstation_uv` function). +* Added support for 'Agilent OpenLab' version 131 files through internal parser. +* Added preliminary support for reading 'Agilent' (`.dx`) files (through `read_agilentdx` function). +* Added support for reading 'Chemstation' REPORT files. +* Added parser for Shimadzu `.lcd` files through the `read_shimadzu_lcd` function. Only the PDA stream (not MS) is currently supported. +* Added `read_peaklist` function for reading peak lists. Currently 'Agilent Chemstation' and 'Shimadzu ASCII' formats are supported. +* Added `verbose` argument to control console output for external parsers ('OpenChrom' and 'ThermoRawFileParser'). + +### Other Improvements + +* Improved automatic filetype detection by `read_chroms`. +* Refactored `read_thermoraw` function to simplify paths. +* The `thermoraw` and `openchrom` parsers now use a proper temp directory if an export directory is not specified through the `path_out` argument. +* Re-factored `reshape_chroms`, speeding up conversion from wide to long format. +* Added additional tests, attaining 82% test coverage. +* Changed default `openchrom` export format to `mzml`. +* Minor changes to some metadata fields to better standardize results across different file formats and parsers. + +### Bug fixes + +* Corrected 'Shimadzu' DAD parser so it reads wavelengths from the file instead of inferring them. +* Fixed bug causing failure of 'Shimadzu' ascii parser (when `what == "peak_table"` and `read_metadata == TRUE`). +* Fixed bug causing 'MDF' files to export as data.frames when `format_out == "matrix"`. +* Fixed misleading `data_format` attributes in 'Waters ARW' and 'Chromeleon' parsers. + ## chromConverter 0.4.3 -* Fixed bug in `chemstation_ch` parser ([#17](https://github.com/ethanbass/chromConverter/issues/17)) +* Fixed bug in `chemstation_ch` parser (version 130) ([#17](https://github.com/ethanbass/chromConverter/issues/17)) ## chromConverter 0.4.2 diff --git a/R/aston_parsers.R b/R/aston_parsers.R index 8a59dd4..e76e98e 100644 --- a/R/aston_parsers.R +++ b/R/aston_parsers.R @@ -11,28 +11,35 @@ #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Logical. Whether to read metadata and attach it to the #' chromatogram. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. #' @return A chromatogram in \code{data.frame} format (retention time x wavelength). #' @import reticulate #' @export sp_converter sp_converter <- function(file, format_out = c("matrix", "data.frame"), data_format = c("wide","long"), - read_metadata = TRUE){ + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ check_aston_configuration() format_out <- match.arg(format_out, c("matrix","data.frame")) data_format <- match.arg(data_format, c("wide","long")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = "masshunter_dad", raw = "raw") + x <- trace_file$agilent_uv$AgilentDAD(file) x <- pd$DataFrame(x$data$values, columns = x$data$columns, index = x$data$index) if (data_format == "long"){ - x <- reshape_chrom(x) + x <- reshape_chrom(x, data_format = "long") } if (format_out == "matrix"){ x <- as.matrix(x) } if (read_metadata){ meta <- read_masshunter_metadata(file) - x <- attach_metadata(x, meta, format_in = "masshunter_dad", + x <- attach_metadata(x, meta, format_in = metadata_format, format_out = format_out, data_format = "wide", parser = "aston", source_file = file) } @@ -54,22 +61,28 @@ sp_converter <- function(file, format_out = c("matrix", "data.frame"), #' TRUE. #' @param read_metadata Logical. Whether to read metadata and attach it to the #' chromatogram. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. #' @return A chromatogram in \code{data.frame} format (retention time x wavelength). #' @import reticulate #' @export uv_converter uv_converter <- function(file, format_out = c("matrix","data.frame"), data_format = c("wide","long"), - correction=TRUE, read_metadata = TRUE){ + correction = TRUE, read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ check_aston_configuration() format_out <- match.arg(format_out, c("matrix","data.frame")) data_format <- match.arg(data_format, c("wide","long")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = "chemstation_uv", raw = "raw") trace_file <- reticulate::import("aston.tracefile") pd <- reticulate::import("pandas") x <- trace_file$TraceFile(file) x <- pd$DataFrame(x$data$values, columns=x$data$columns, index=x$data$index) if (data_format == "long"){ - x <- reshape_chrom(x) + x <- reshape_chrom(x, data_format = "long") } if (format_out == "matrix"){ x <- as.matrix(x) @@ -81,7 +94,7 @@ uv_converter <- function(file, format_out = c("matrix","data.frame"), } if (read_metadata){ meta <- read_chemstation_metadata(file) - x <- attach_metadata(x, meta, format_in = "chemstation_uv", + x <- attach_metadata(x, meta, format_in = metadata_format, format_out = format_out, data_format = "wide", parser = "Aston", source_file = file) } @@ -100,17 +113,17 @@ uv_converter <- function(file, format_out = c("matrix","data.frame"), #' @import reticulate #' @noRd trace_converter <- function(file, format_out = c("matrix", "data.frame"), - data_format = c("wide","long")){ + data_format = c("wide", "long")){ check_aston_configuration() - format_out <- match.arg(format_out, c("matrix","data.frame")) - data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) trace_file <- reticulate::import("aston.tracefile") pd <- reticulate::import("pandas") x <- trace_file$TraceFile(file) - x <- pd$DataFrame(x$data$values, columns=x$data$columns, - index=x$data$index) + x <- pd$DataFrame(x$data$values, columns = x$data$columns, + index = x$data$index) if (data_format == "long"){ - x <- reshape_chrom(x) + x <- reshape_chrom(x, data_format = "long") } if (format_out == "matrix"){ x <- as.matrix(x) diff --git a/R/attach_metadata.R b/R/attach_metadata.R index f654135..650a896 100644 --- a/R/attach_metadata.R +++ b/R/attach_metadata.R @@ -13,7 +13,15 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser = NULL, source_file){ + if (grepl("chemstation", format_in)){ + format_in <- "chemstation" + } + switch(format_in, + "raw" = { + structure(x, metadata = meta, data_format = data_format, parser = parser, + source_file = source_file) + }, "waters_arw" = { structure(x, instrument = NA, detector = get_metadata_field(meta, "Channel Type"), @@ -36,8 +44,8 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser ), detector_unit = get_metadata_field(meta, "Det. Units"), source_file = source_file, - data_format = "long", - parser = "chromConverter", + data_format = data_format, + parser = "chromconverter", format_out = format_out) }, "shimadzu" = { structure(x, @@ -58,21 +66,21 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser # end_time = meta$`End Time(min)`, time_interval = meta$`Interval(msec)`, time_interval_unit = get_time_unit( - grep("Interval", names(meta), value=TRUE)[1], format_in = "shimadzu"), + grep("Interval", names(meta), value = TRUE)[1], format_in = "shimadzu"), time_unit = get_time_unit( grep("Start Time", names(meta), value=TRUE)[1], format_in = "shimadzu"), - detector_range = c(meta$`Start Wavelength(nm)`,meta$`End Wavelength(nm)`), + detector_range = c(meta$`Start Wavelength(nm)`, meta$`End Wavelength(nm)`), # detector_end = meta$`End Wavelength(nm)`, detector_unit = NA, source_file = source_file, data_format = data_format, - parser = "chromConverter", + parser = "chromconverter", format_out = format_out) }, "chromeleon" = { - datetime.idx <- unlist(sapply(c("Date$","Time$"), function(str) grep(str, names(meta)))) + datetime.idx <- unlist(sapply(c("Date$", "Time$"), function(str) grep(str, names(meta)))) datetime <- unlist(meta[datetime.idx]) if (length(datetime > 1)){ - datetime <- paste(datetime, collapse=" ") + datetime <- paste(datetime, collapse = " ") } datetime <- as.POSIXct(datetime, format = c("%m/%d/%Y %H:%M:%S", "%d.%m.%Y %H:%M:%S", "%m/%d/%Y %H:%M:%S %p %z")) @@ -107,8 +115,8 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser detector_unit = meta$`Signal Unit`, source_file = source_file, format_out = format_out, - data_format = "long", - parser = "chromConverter" + data_format = data_format, + parser = "chromconverter" ) # } else if (format_in == "entab"){ # structure(x, instrument = meta$instrument, @@ -127,18 +135,46 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser # format = data_format, # parser = "entab", # format_out = format_out) - }, "chemstation_uv" = { - structure(x, instrument = meta$AcqInstName, - detector = NA, - software = meta$Version, - method = meta$AcqMeth, + }, "chemstation" = { + datetime_formats <- c("%d-%b-%y, %H:%M:%S", "%m/%d/%Y %I:%M:%S %p", "%d/%m/%Y %I:%M:%S %p") + meta$date <- as.POSIXct(meta$date, tz = "UTC", tryFormats = datetime_formats) + structure(x, sample_name = iconv(meta$sample_name, sub = ""), + sample_id = meta$vial, + file_version = meta$version, + file_type = meta$file_type, + instrument = meta$AcqInstName, + detector = meta$`detector`, + detector_range = meta$signal, + detector_unit = meta$units, + software = meta$software, + software_version = meta$software_version, + software_revision = meta$software_revision, + # software = meta$Version, + method = meta$method, batch = meta$SeqPathAndFile, - operator = meta$AcqOp, - run_datetime = meta$InjDateTime, - sample_name = meta$SampleName, - sample_id = NA, + operator = meta$operator, + run_datetime = meta$date, sample_injection_volume = meta$InjVolume, sample_amount = meta$InjVolume, + time_range = meta$time_range, + time_interval = NA, + time_unit = "Minutes", + source_file = source_file, + data_format = data_format, + parser = parser, + format_out = format_out) + }, "chemstation_peaklist" = { + structure(x, instrument = meta$`Acq. Instrument`, + detector = NA, + software = NA, + method = meta$Method, + batch = NA, + operator = meta$`Acq. Operator`, + run_datetime = NA, + sample_name = meta$`Sample Name`, + sample_id = NA, + sample_injection_volume = meta$`Inj Volume`, + sample_amount = meta$`Inj Volume`, time_range = NA, time_interval = NA, time_unit = NA, @@ -194,7 +230,7 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser source_file = ifelse(missing(source_file), NA, source_file), format_out = ifelse(missing(format_out), NA, format_out), data_format = ifelse(missing(data_format), NA, data_format), - parser = "chromConverter") + parser = "chromconverter") }, "mdf" = { structure(x, instrument = meta[meta$Property == "Instrument","Value"], detector = "Variable Wavelength Detector", @@ -220,7 +256,28 @@ attach_metadata <- function(x, meta, format_in, format_out, data_format, parser source_file = ifelse(missing(source_file), NA, source_file), format_out = ifelse(missing(format_out), NA, format_out), data_format = ifelse(missing(data_format), NA, data_format), - parser = "chromConverter") + parser = "chromconverter") + }, "thermoraw" = { + structure(x, instrument = c(meta$`Instrument model`, meta$`Instrument name`, + meta$`Instrument serial number`), + detector = NA, + software = meta$`Software version`, + method = NA, + batch = NA, + operator = NA, + run_date = meta$`Creation date`, + sample_name = basename(meta$`RAW file path`), + sample_id = meta$`Sample id`, + vial = meta$`Sample vial`, + injection_volume = meta$`Sample injection volume`, + sample_dilution = meta$`Sample dilution factor`, + time_range = meta$`Time range`, + time_interval = meta$`Interval(msec)`, + source_file = ifelse(missing(source_file), NA, source_file), + format_out = ifelse(missing(format_out), NA, format_out), + data_format = "long", + parser = "ThermoRawFileParser" + ) }, "default" = { structure(x, instrument = meta$Instrument, detector = NA, @@ -254,10 +311,14 @@ get_metadata_field <- function(x, field){ #' @noRd get_time_unit <- function(string, format_in){ - if (format_in %in% c("chromeleon","shimadzu")){ - pattern <- "\\((.*?)\\)" - unit <- gsub("\\(|\\)", "", regmatches(string, regexpr(pattern, string))[[1]]) - switch(unit, "min" = "Minutes", "sec" = "Seconds") + if (length(string) == 0 || is.na(string)){ + NA + } else{ + if (format_in %in% c("chromeleon", "shimadzu")){ + pattern <- "\\((.*?)\\)" + unit <- gsub("\\(|\\)", "", regmatches(string, regexpr(pattern, string))[[1]]) + switch(unit, "min" = "Minutes", "sec" = "Seconds") + } else NA } } @@ -398,3 +459,14 @@ extract_metadata <- function(chrom_list, } metadata } + + +#' Transfer metadata +#'@noRd +transfer_metadata <- function (new_object, old_object, exclude = c("names", "row.names", + "class", "dim", "dimnames")){ + a <- attributes(old_object) + a[exclude] <- NULL + attributes(new_object) <- c(attributes(new_object), a) + new_object +} diff --git a/R/call_entab.R b/R/call_entab.R index fc5b6fb..4074f0a 100644 --- a/R/call_entab.R +++ b/R/call_entab.R @@ -5,45 +5,54 @@ #' @param format_in Format of input. #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). -#' @importFrom tidyr pivot_wider #' @export -call_entab <- function(file, data_format = c("wide","long"), - format_in = "", +call_entab <- function(file, data_format = c("wide", "long"), + format_in = NULL, format_out = c("matrix", "data.frame"), - read_metadata = TRUE){ + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ if (!requireNamespace("entab", quietly = TRUE)){ stop("The entab R package must be installed to use entab parsers: install.packages('entab', repos='https://ethanbass.github.io/drat/')", call. = FALSE) } - format_out <- match.arg(format_out, c("matrix","data.frame")) - data_format <- match.arg(data_format, c("wide","long")) + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + + metadata_format <- match.arg(tolower(metadata_format), c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = format_in, raw = "raw") r <- entab::Reader(file) x <- entab::as.data.frame(r) + signal.idx <- grep("signal", colnames(x)) + if (length(signal.idx) == 1){ + colnames(x)[signal.idx] <- "wavelength" + } if (data_format == "wide"){ - times <- unique(x$time) - id <- names(x)[2] - x <- as.data.frame(pivot_wider(x, id_cols = "time", - names_from = {{id}}, - values_from = "intensity"), - row.names = "time") - rownames(x) <- times - x <- x[,-1] + x <- reshape_chrom_wide(x, time_var = "time", lambda_var = "wavelength", + value_var = "intensity") } if (format_out == "matrix"){ x <- as.matrix(x) } if (read_metadata){ meta <- r$metadata() - if (format_in == "chemstation_uv"){ + meta$run_date <- as.POSIXct(eval(meta$run_date)) + meta <- rename_list(meta, c("detector" = "instrument", "method" = "method", + "operator" = "operator", "date" = "run_date", + "sample_name" = "sample")) + + if (grepl("chemstation", format_in)){ metadata_from_file <- try(read_chemstation_metadata(file), silent = TRUE) - meta <- c(meta, metadata_from_file) - } - if (format_in == "masshunter_dad"){ + } else if (format_in == "masshunter_dad"){ metadata_from_file <- try(read_masshunter_metadata(file), silent = TRUE) + } + if (exists("metadata_from_file") && !inherits(metadata_from_file, "try-error")){ meta <- c(meta, metadata_from_file) } x <- attach_metadata(x, meta, format_in = format_in, format_out = format_out, diff --git a/R/call_openchrom.R b/R/call_openchrom.R index ec11bd3..557ad82 100644 --- a/R/call_openchrom.R +++ b/R/call_openchrom.R @@ -2,7 +2,10 @@ #' #' Writes `xml` batch-files and calls OpenChrom file parsers using a #' system call to the command-line interface. To use this function -#' [OpenChrom](https://lablicate.com/platform/openchrom) must be manually installed. +#' [OpenChrom](https://lablicate.com/platform/openchrom) (version 0.4.0) must be +#' manually installed. The command line interface is no longer supported in the +#' latest versions of OpenChrom (starting with version 0.5.0), so the function +#' will not work with these new versions. #' #' The \code{call_openchrom} works by creating an \code{xml} batchfile and #' feeding it to the OpenChrom command-line interface. OpenChrom batchfiles @@ -25,8 +28,12 @@ #' @param files files to parse #' @param path_out directory to export converted files. #' @param format_in Either `msd` for mass spectrometry data, `csd` for flame ionization data, or `wsd` for DAD/UV data. -#' @param export_format Either \code{csv}, \code{cdf}, \code{mzml}, \code{animl}. -#' @param return_paths Logical. If TRUE, the function will return a character vector of paths to the newly created files. +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param export_format Either \code{mzml}, \code{csv}, \code{cdf}, \code{animl}. +#' Defaults to \code{mzml}. +#' @param return_paths Logical. If TRUE, the function will return a character +#' vector of paths to the newly created files. +#' @param verbose Logical. Whether to print output from OpenChrom to the console. #' @return If \code{return_paths} is TRUE, the function will return a vector of paths to the newly created files. #' If \code{return_paths} is FALSE and \code{export_format} is \code{csv}, the function will return a list #' of chromatograms in \code{data.frame} format. Otherwise, it will not return anything. @@ -40,27 +47,30 @@ #' \doi{10.1186/1471-2105-11-405}. #' @export -call_openchrom <- function(files, path_out, format_in, - export_format = c("csv", "cdf", "mzml", "animl"), - return_paths = FALSE){ +call_openchrom <- function(files, path_out = NULL, format_in, + format_out = c("matrix","data.frame"), + export_format = c("mzml", "csv", "cdf", "animl"), + return_paths = FALSE, verbose = getOption("verbose")){ + format_out <- match.arg(format_out, c("matrix","data.frame")) if (length(files) == 0){ stop("Files not found.") } if (missing(format_in)){ stop("Format must be specified. The options are `msd` for mass spectrometry, `csd` for flame ionization (FID), or `wsd` for DAD/UV data.")} - export_format <- match.arg(export_format, c("csv", "cdf", "mzml", "animl")) - if (missing(path_out)){ - path_out <- set_temp_directory() + export_format <- match.arg(export_format, c("mzml", "csv", "cdf", "animl")) + if (is.null(path_out)){ + path_out <- tempdir() } - if(!file.exists(path_out)){ - stop("'path_out' not found. Make sure directory exists.") + if(!dir.exists(path_out)){ + stop("Export directory not found. Please check `path_out` argument and try again.") } openchrom_path <- configure_openchrom() path_xml <- write_openchrom_batchfile(files = files, path_out = path_out, format_in = format_in, export_format = export_format) - system(paste0(openchrom_path, " -nosplash -cli -batchfile ", path_xml)) + system(paste0(openchrom_path, " -nosplash -cli -batchfile ", path_xml), + ignore.stdout = !verbose, ignore.stderr = !verbose) new_files <- fs::path(path_out, fs::path_ext_remove(fs::path_file(files)), ext = switch(export_format, "animl" = "animl", @@ -74,7 +84,13 @@ call_openchrom <- function(files, path_out, format_in, "cdf" = read_cdf, "animl" = warning("An animl parser is not currently available in chromConverter"), "mzml" = read_mzml) - lapply(new_files, file_reader) + lapply(new_files, function(x){ + xx <- file_reader(x) + if (export_format == "csv" && format_out == "matrix"){ + xx <- as.matrix(xx) + } + xx + }) } } @@ -153,7 +169,8 @@ configure_openchrom <- function(cli = c("null", "true", "false", "status"), path if (path_parser == "NULL"){ path_parser <- switch(.Platform$OS.type, unix = "/Applications/Eclipse.app/Contents/MacOS/openchrom", - windows = fs::path(fs::path_home(), "AppData/Local/Programs/OpenChrom/openchrom.exe"), + windows = fs::path(fs::path_home(), + "AppData/Local/Programs/OpenChrom/openchrom.exe"), linux = "/snap/bin/openchrom" ) } @@ -161,11 +178,12 @@ configure_openchrom <- function(cli = c("null", "true", "false", "status"), path path_parser <- path } writeLines(path_parser, - con = system.file('shell/path_to_openchrom_commandline.txt', package='chromConverter')) + con = system.file('shell/path_to_openchrom_commandline.txt', + package='chromConverter')) if (!file.exists(path_parser)){ warning("OpenChrom not found!", immediate. = TRUE) - path_parser <- readline(prompt="Please provide path to `OpenChrom` command line):") + path_parser <- readline(prompt = "Please provide path to `OpenChrom` command line (v0.4)):") if (.Platform$OS.type == "windows"){ path_parser <- gsub("/","\\\\", path_parser) } diff --git a/R/call_rainbow.R b/R/call_rainbow.R index e279503..d86562f 100644 --- a/R/call_rainbow.R +++ b/R/call_rainbow.R @@ -34,16 +34,16 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun format_out <- match.arg(format_out, c("matrix","data.frame")) data_format <- match.arg(data_format, c("wide", "long")) # check_rb_dir(file) + if (grepl("chemstation", format_in)){ + format_in <- "chemstation" + } converter <- switch(format_in, "agilent_d" = rb_read$read, "waters_raw" = rb_read$read, "masshunter" = rb_read$read, "chemstation" = rb_parse_agilent$chemstation$parse_file, - "chemstation_uv" = rb_parse_agilent$chemstation$parse_file, - "chemstation_fid" = rb_parse_agilent$chemstation$parse_file, - "chemstation_ch" = rb_parse_agilent$chemstation$parse_file, "default" = rb_read$read) - if (format_in %in% c("chemstation", "chemstation_uv", "chemstation_fid")){ + if (format_in %in% c("chemstation")){ by <- "single" } x <- converter(file) @@ -80,24 +80,24 @@ call_rainbow <- function(file, format_in = c("agilent_d", "waters_raw", "masshun #' @noRd extract_rb_data <- function(xx, format_out = "matrix", - data_format = c("wide","long"), read_metadata = TRUE){ + data_format = c("wide","long"), + read_metadata = TRUE){ data_format <- match.arg(data_format, c("wide","long")) data <- xx$data - # rownames(data) <- xx$xlabels[seq_len(nrow(data))] try(rownames(data) <- xx$xlabels) colnames(data) <- xx$ylabels + if (data_format == "long"){ + data <- reshape_chrom(data, data_format = "long") + } + if (format_out == "data.frame"){ + data <- as.data.frame(data) + } if (read_metadata){ try(attr(data, "detector") <- xx$detector) try(attr(data, "metadata") <- xx$metadata) attr(data, "parser") <- "rainbow" attr(data, "data_format") <- data_format } - if (format_out == "data.frame"){ - data <- as.data.frame(data) - } - if (ncol(xx$data) > 1 && data_format == "long"){ - data <- reshape_chrom(data) - } data } diff --git a/R/parsers.R b/R/parsers.R deleted file mode 100644 index 98a5cca..0000000 --- a/R/parsers.R +++ /dev/null @@ -1,362 +0,0 @@ -#' Chromeleon ASCII reader -#' -#' @importFrom utils tail read.csv -#' @param file path to file -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. -#' @param data_format Whether to return data in \code{wide} or \code{long} format. -#' @param read_metadata Whether to read metadata from file. -#' @return A chromatogram in the format specified by \code{format_out}. -#' (retention time x wavelength). -#' @author Ethan Bass -#' @export - -read_chromeleon <- function(file, format_out = c("matrix","data.frame"), - data_format = c("wide","long"), - read_metadata = TRUE){ - format_out <- match.arg(format_out, c("matrix","data.frame")) - data_format <- match.arg(data_format, c("wide","long")) - xx <- readLines(file) - xx <- remove_unicode_chars(xx) - start <- tail(grep("Data:", xx), 1) - x <- read.csv(file, skip = start, sep="\t", row.names = NULL) - x <- x[,-2, drop = FALSE] - x <- x[,colSums(is.na(x)) < nrow(x)] - if (any(grepl(",",as.data.frame(x)[-1,2]))){ - decimal_separator <- "," - x <- apply(x, 2, function(x) gsub("\\.", "", x)) - x <- apply(x, 2, function(x) gsub(",", ".", x)) - } - x <- apply(x, 2, as.numeric) - colnames(x) <- c("RT","Intensity") - if (data_format == "wide"){ - rownames(x) <- x[,1] - x <- x[, 2, drop = FALSE] - } - if (format_out == "data.frame"){ - x <- as.data.frame(x) - } - if (read_metadata){ - meta <- try(read_chromeleon_metadata(xx)) - if (decimal_separator == ","){ - meta <- lapply(meta, function(x) gsub(",",".",x)) - } - if (!inherits(meta, "try-error")){ - x <- attach_metadata(x, meta, format_in = "chromeleon", - format_out = format_out, data_format = "wide", - parser = "chromConverter", source_file = file) - } - } - x -} - -#' Shimadzu ascii reader -#' -#' Reads 'Shimadzu' ascii files into R. These files are exported from -#' 'Lab Solutions' by right clicking on samples in the sample list and -#' selecting -#' -#' @name read_shimadzu -#' @importFrom utils tail read.csv -#' @importFrom stringr str_split_fixed -#' @param file path to file -#' @param format_in Format of files. \code{fid} or \code{dad}. -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. -#' @param data_format Whether to return data in \code{wide} or \code{long} format. -#' @param read_metadata Whether to read metadata from file. -#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. -#' Accepts multiple arguments. -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). -#' @author Ethan Bass -#' @export - -read_shimadzu <- function(file, format_in, - format_out = c("matrix","data.frame"), - data_format = c("wide","long"), - what = "chromatogram", - read_metadata = TRUE){ - if (missing(format_in)) - stop("`format_in` must be specified. The options are `fid` or `dad`.") - format_out <- match.arg(format_out, c("matrix", "data.frame")) - data_format <- match.arg(data_format, c("wide", "long")) - what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) - x <- readLines(file) - sep <- substr(x[2], 17, 17) - headings <- grep("\\[*\\]", x) - peaktab.idx <- grep("\\[Peak Table", x) - chrom_heading <- switch(format_in, - "fid" = "\\[Chromatogram .*]", - "dad" = "\\[PDA 3D]") - chrom.idx <- grep(chrom_heading, x) - - if (any(what == "chromatogram")){ - if (length(chrom.idx) != 0){ - header <- try(extract_header(x = x, chrom.idx = chrom.idx, sep = sep)) - met <- header[[1]] - decimal_separator <- ifelse(grepl(",", met[2, 2]),",",".") - if (decimal_separator == ","){ - met[c(2:3), 2] <- gsub(",", ".", met[c(2:3), 2]) - } - - if (format_in == "fid"){ - xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses="numeric", - na.strings=c("[FractionCollectionReport]","#ofFractions"), - dec = decimal_separator) - xx <- as.matrix(xx[!is.na(xx[,1]),]) - rownames(xx) <- xx[,1] - xx <- xx[, 2, drop = FALSE] - colnames(xx) <- "Intensity" - if (data_format == "long"){ - xx <- cbind(RT = as.numeric(rownames(xx)), Intensity = as.numeric(xx[,1])) - } - } else if (format_in == "dad"){ - nrows <- as.numeric(met[grep("# of Time Axis Points", met[,1]),2]) - ncols <- as.numeric(met[grep("# of Wavelength Axis Points", met[,1]),2]) - xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses="numeric", - na.strings=c("[FractionCollectionReport]","#ofFractions"), - row.names = 1, nrows = nrows, dec = decimal_separator) - xx <- as.matrix(xx[!is.na(xx[,1]),]) - times <- round(seq(met[grep("Start Time", met[,1]),2], - met[grep("End Time", met[,1]),2], - length.out = nrows), 2) - wavelengths <- round(seq(met[grep("Start Wavelength", met[,1]), 2], - met[grep("End Wavelength", met[,1]), 2], - length.out = ncols), 2) - colnames(xx) <- wavelengths - if (data_format == "long"){ - xx <- reshape_chrom(xx) - } - } - if (format_out == "data.frame"){ - xx <- as.data.frame(xx) - } - } else{ - if (length(what) == 1){ - stop("Chromatogram not found.") - } else{ - warning("Chromatogram not found.") - what = "peak_table" - } - } - } - - ### extract peak_table - if (any(what == "peak_table")){ - if (length(peaktab.idx) == 0){ - if (length(what) == 1){ - stop("Peak table not found!") - } else{ - warning("Peak table not found!") - what <- "chromatogram" - } - } - peak_tab <- lapply(peaktab.idx, function(idx){ - nrows <- as.numeric(strsplit(x = x[idx+1], split = sep)[[1]][2]) - if (!is.na(nrows) && nrows > 0){ - time_column <- grep("R.Time", strsplit(x = x[[idx+2]], split = sep)[[1]]) - t1 <- strsplit(x = x[[idx+3]], split = sep)[[1]][time_column] - decimal_separator <- ifelse(grepl(".", t1), ".", ",") - - peak_tab <- read.csv(file, skip = (idx+1), sep = sep, nrows = nrows, - dec = decimal_separator) - } else{NA} - }) - names(peak_tab) <- gsub("\\[|\\]","", x[peaktab.idx]) - } - if ("peak_table" %in% what & "chromatogram" %in% what){ - what <- "both" - } - if (format_out == "data.frame"){ - xx <- as.data.frame(xx) - } - xx <- switch(what, "chromatogram" = xx, - "peak_table" = peak_tab, - "both" = list(chromatogram = xx, peak_table = peak_tab)) - if (read_metadata){ - idx <- which(x[headings] %in% - c("[Header]", "[File Information]", "[Sample Information]", - "[Original Files]", "[File Description]", "[Configuration]") - ) - meta_start <- headings[min(idx)] - meta_end <- headings[max(idx) + 1] - meta <- x[(meta_start+1):(meta_end-1)] - meta <- meta[meta!=""] - meta <- meta[-grep("\\[", meta)] - meta <- stringr::str_split_fixed(meta, pattern = sep, n = 2) - if (exists("met")){ - meta <- rbind(meta, met) - } - rownames(meta) <- meta[, 1] - meta <- as.list(meta[,2]) - # data_format <- switch(format_in, - # "fid" = "long", - # "dad" = "wide") - if (inherits(xx, "list")){ - xx <- lapply(xx, function(xxx){ - attach_metadata(xxx, meta, format_in = "shimadzu", - source_file = file, format_out = format_out, - data_format = data_format, parser = "chromConverter") - }) - } else{ - xx <- attach_metadata(xx, meta, format_in = "shimadzu", - source_file = file, format_out = format_out, - data_format = data_format, - parser = "chromConverter") - } - } - xx -} - -#' Waters ascii (.arw) reader -#' -#' Reads 'Waters ARW' files. -#' -#' For help exporting files from Empower, you can consult the official -#' documentation: [How_to_export_3D_raw_data_from_Empower](https://support.waters.com/KB_Inf/Empower_Breeze/WKB77571_How_to_export_3D_raw_data_from_Empower_to_a_Microsoft_Excel_spreadsheet). -#' -#' @name read_waters_arw -#' @importFrom utils tail read.csv -#' @param file path to file -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. -#' @param data_format Whether to return data in \code{wide} or \code{long} format. -#' @param read_metadata Whether to read metadata from file. -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). -#' @author Ethan Bass -#' @export - -read_waters_arw <- function(file, format_out = c("matrix","data.frame"), - data_format = c("wide","long"), - read_metadata = TRUE){ - format_out <- match.arg(format_out, c("matrix","data.frame")) - data_format <- match.arg(data_format, c("wide","long")) - x <- read.csv(file, sep="\t", skip = 2, header = FALSE, row.names = 1) - # PDA (3D) - if (rownames(x)[1] == "Wavelength"){ - colnames(x) <- x[1,] - rm <- 1 - if (rownames(x)[2] == "Time"){ - rm <- c(rm,2) - } - x <- x[-rm,] - if (data_format == "long"){ - x <- as.data.frame(reshape_chrom(x, data_format = "long")) - } - } - # 1D - if (ncol(x) == 1){ - colnames(x) <- "Intensity" - if (data_format == "long"){ - x <- data.frame(RT = rownames(x), Intensity = x[,1]) - } - } - if (format_out == "matrix"){ - x <- as.matrix(x) - } - if (read_metadata){ - meta <- try(read_waters_metadata(file)) - if (!inherits(meta, "try-error")){ - x <- attach_metadata(x, meta, format_in = "waters_arw", - format_out = format_out, - data_format = data_format, - parser = "chromConverter", - source_file = file) - } - } - x -} - -#' Chemstation CSV reader -#' -#' @name read_chemstation_csv -#' @importFrom utils tail read.csv -#' @param file path to file -#' @param format_out R format. Either \code{matrix} or \code{data.frame}. -#' @return A chromatogram in the format specified by \code{format_out} -#' (retention time x wavelength). -#' @author Ethan Bass -#' @export - -read_chemstation_csv <- function(file, format_out = c("matrix","data.frame")){ - format_out <- match.arg(format_out, c("matrix", "data.frame")) - x <- read.csv(file, row.names = 1, header = TRUE, - fileEncoding="utf-16",check.names = FALSE) - if (format_out == "matrix"){ - x <- as.matrix(x) - } - x -} - -#' Extract data from mzML files -#' -#' Extracts data from mzML files using parsers from either RaMS or mzR. The RaMS -#' parser (default) will only return data in tidy (long) format. mzR will return -#' data in wide format. Currently the mzR-based parser only returns DAD data. -#' -#' @name read_mzml -#' @importFrom RaMS grabMSdata -#' @param path path to file -#' @param format_out R format. Only applies if \code{mzR} is selected. -#' Either \code{matrix} or \code{data.frame}. \code{RaMS} will return -#' a list of data.tables regardless of what is selected here. -#' @param data_format Whether to return data in \code{wide} or \code{long} format. -#' @param parser What parser to use. Either \code{RaMS} or \code{mzR}. -#' @param what What types of data to return (argument to \code{\link[RaMS]{grabMSdata}}. -#' Options include \code{MS1}, \code{MS2}, \code{BPC}, \code{TIC}, \code{DAD}, -#' \code{chroms}, \code{metadata}, or \code{everything}). -#' @param verbose Argument to \code{\link[RaMS]{grabMSdata}} controlling \code{verbosity}. -#' @param ... Additional arguments to \code{\link[RaMS]{grabMSdata}}. -#' @return If \code{RaMS} is selected, the function will return a list of "tidy" -#' \code{data.table} objects. If \code{mzR} is selected, the function will return a -#' chromatogram in \code{matrix} or \code{data.frame} format according to the -#' value of \code{format_out}. -#' @author Ethan Bass -#' @export read_mzml - -read_mzml <- function(path, format_out = c("matrix", "data.frame"), - data_format = c("long","wide"), - parser=c("RaMS","mzR"), - what=c("MS1","MS2", "BPC", "TIC", "DAD", - "chroms", "metadata", "everything"), - verbose = FALSE, - ...){ - parser <- match.arg(parser, c("RaMS", "mzR")) - format_out <- match.arg(format_out, c("matrix", "data.frame")) - data_format <- match.arg(data_format, c("long","wide")) - what <- match.arg(what, c("MS1","MS2", "BPC", "TIC", "DAD", - "chroms", "metadata", "everything"), several.ok = TRUE) - if (all(c("MS1","MS2", "BPC", "TIC", "DAD", - "chroms", "metadata", "everything") %in% what)){ - what <- grep("everything",what, invert = TRUE,value = TRUE) - } - if (parser == "RaMS"){ - data <- RaMS::grabMSdata(path, grab_what = what, verbosity = verbose, ...) - if (data_format == "wide"){ - data <- reshape_chroms(data, data_format = "wide") - } - } else if (parser == "mzR"){ - if (!requireNamespace("mzR", quietly = TRUE)) { - stop( - "The `mzR` package is not installed. Please install it from Bioconductor: - BiocManager::install('mzR')", - call. = FALSE) - } - x <- mzR::openMSfile(path) - info <- mzR::header(x) - UV_scans <- which(info$msLevel==0) - rts <- info[UV_scans,"retentionTime"] - lambdas <- seq(info$scanWindowLowerLimit[UV_scans[1]], - info$scanWindowUpperLimit[UV_scans[1]]) - pks <- mzR::peaks(x) - data <- t(sapply(UV_scans, function(j) pks[[j]][,2])) - rownames(data) <- rts - colnames(data) <- lambdas - if (data_format == "long"){ - data <- reshape_chrom(data) - } - if (format_out == "data.frame"){ - data <- as.data.frame(data) - } - } - data -} diff --git a/R/read_cdf.R b/R/read_cdf.R index 85633be..bb85d0f 100644 --- a/R/read_cdf.R +++ b/R/read_cdf.R @@ -36,11 +36,15 @@ andi_ms_error <- function(...){ #' Read ANDI chrom file #' @noRd -read_andi_chrom <- function(file, format_out = c("matrix","data.frame"), - data_format = c("wide","long"), - what = "chromatogram", read_metadata = TRUE){ +read_andi_chrom <- function(file, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + what = "chromatogram", read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ data_format <- match.arg(data_format, c("wide","long")) format_out <- match.arg(format_out, c("matrix","data.frame")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = "cdf", raw = "raw") what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) nc <- ncdf4::nc_open(file) if (any(what == "chromatogram")){ @@ -87,7 +91,7 @@ read_andi_chrom <- function(file, format_out = c("matrix","data.frame"), parser = "chromconverter", source_file = file) }) } else{ - data <- attach_metadata(data, meta = meta, format_in = "cdf", + data <- attach_metadata(data, meta = meta, format_in = metadata_format, format_out = format_out, data_format = data_format, parser = "chromconverter", source_file = file) } diff --git a/R/read_chemstation_ch.R b/R/read_chemstation_ch.R index e75ee85..64ee883 100644 --- a/R/read_chemstation_ch.R +++ b/R/read_chemstation_ch.R @@ -4,6 +4,8 @@ #' @param format_out Matrix or data.frame. #' @param data_format Whether to return data in \code{wide} or \code{long} format. #' @param read_metadata Logical. Whether to attach metadata. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. #' @author Ethan Bass #' @return A chromatogram in the format specified by \code{format_out} #' (retention time x wavelength). @@ -12,11 +14,14 @@ #' ((c) James Dillon 2014). #' @export -read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), - data_format = c("wide","long"), - read_metadata = TRUE){ - format_out <- match.arg(format_out, c("matrix","data.frame")) - data_format <- match.arg(data_format, c("wide","long")) +read_chemstation_ch <- function(path, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, chromconverter = "chemstation", raw = "raw") f <- file(path, "rb") on.exit(close(f)) @@ -24,15 +29,22 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), # HEADER seek(f, 1, "start") version <- readBin(f, "character", n = 1) - version <- match.arg(version, choices = c("8", "81", "30", "130", "179", "181")) + version <- match.arg(version, + choices = c("8", "81", "30", "130", "179", "181")) offsets <- get_agilent_offsets(version) + if (version == "179"){ + seek(f, 348) + filetype <- paste(readBin(f, "character", n = 2), collapse = "") + version <- paste(version, filetype, sep = "_") + } decoder <- switch(version, "8" = decode_delta, "81" = decode_double_delta, "30" = decode_delta, "130" = decode_delta, "181" = decode_double_delta, - "179" = decode_double_array) + "179_GC" = decode_double_array_gc, + "179_OL" = decode_double_array_ol) # Sample Info # offsets <- list(sample = 858, description = 1369, method = 2574, @@ -48,8 +60,10 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), seek(f, where = 282, origin = "start") if (version %in% c("8", "30", "130")){ - xmin <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000 - xmax <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, endian = "big")) / 60000 + xmin <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, + endian = "big")) / 60000 + xmax <- as.double(readBin(f, "integer", n = 1, size = 4, signed = TRUE, + endian = "big")) / 60000 } else { xmin <- readBin(f, "numeric", n = 1, endian = "big", size = 4) / 60000 xmax <- readBin(f, "numeric", n = 1, endian = "big", size = 4) / 60000 @@ -76,37 +90,33 @@ read_chemstation_ch <- function(path, format_out = c("matrix","data.frame"), data <- as.matrix(data) } if (read_metadata){ - meta_slots <- switch(version, "8" = 9, - "81" = 9, - "30" = 11, - "130" = 12, - "181" = 9, - "179" = 9) - - meta <- lapply(offsets[seq_len(meta_slots)], function(offset){ - seek(f, where = offset, origin = "start") - n <- get_nchar(f) - if (version == "30"){ - readBin(f, what = "character") - } else{ - cc_collapse(readBin(f, "character", n = n)) - } - }) - if (read_metadata){ + meta_slots <- switch(version, "8" = 10, + "81" = 10, + "30" = 13, + "130" = 14, + "179_GC" = 10, + "179_OL" = 10, + "181" = 10) + + meta <- lapply(offsets[seq_len(meta_slots)], function(offset){ + seek(f, where = offset, origin = "start") + n <- get_nchar(f) + if (version == "30"){ + readBin(f, what = "character") + } else{ + cc_collapse(readBin(f, "character", n = n)) + } + }) + + metadata_from_file <- try(read_chemstation_metadata(path), silent = TRUE) + if (!inherits(metadata_from_file, "try-error")){ + meta <- c(meta, metadata_from_file) + } datetime_regex <- "(\\d{2}-[A-Za-z]{3}-\\d{2}, \\d{2}:\\d{2}:\\d{2})|(\\d{2}/\\d{2}/\\d{4} \\d{1,2}:\\d{2}:\\d{2} (?:AM|PM)?)" - datetime <- regmatches(meta$date, gregexpr(datetime_regex, meta$date))[[1]] - datetime_formats <- c("%d-%b-%y, %H:%M:%S", "%m/%d/%Y %I:%M:%S %p", "%d/%m/%Y %I:%M:%S %p") - datetime <- as.POSIXct(datetime, tz = "UTC", tryFormats = datetime_formats) - data <- structure(data, file_version = version, sample_name = meta$sample_name, - run_date = datetime, - instrument = meta$instrument, - method = meta$method, software_version = meta$software_version, - software = meta$software, software_rev = meta$software_revision, - signal = meta$signal, detector_unit = meta$unit, - time_range = c(xmin, xmax), time_interval = mean(diff(times)), - time_unit = "Minutes", source_file = path, - data_format = data_format, parser = "chromConverter") - } + meta$date <- regmatches(meta$date, gregexpr(datetime_regex, meta$date))[[1]] + data <- attach_metadata(data, meta, format_in = metadata_format, + data_format = data_format, format_out = format_out, + parser = "chromconverter", source_file = path) } data } @@ -126,7 +136,7 @@ cc_trim_str <- function(x, len=2){ get_chemstation_dir_name <- function(path){ dir <- gsub(basename(path), "", path) sp <- str_split_fixed(dir, "/", stringr::str_count(dir,"/")+1)[1,] - grep("\\.D|\\.d$", sp, ignore.case = TRUE,value = TRUE) + grep("\\.D|\\.d$", sp, ignore.case = TRUE, value = TRUE) } #' @noRd @@ -178,10 +188,10 @@ decode_double_delta <- function(file, offset) { #' @note This function was adapted from the #' \href{https://github.com/chemplexity/chromatography}{Chromatography Toolbox} #' ((c) James Dillon 2014). -decode_double_array <- function(file, offset) { +decode_double_array_gc <- function(file, offset) { seek(file, 0, 'end') fsize <- seek(file, NA, "current") - offset <- 0x1800 + offset <- 6144 # Read data seek(file, offset, "start") signal <- readBin(file, what = "double", size = 4, endian = "little", @@ -190,6 +200,19 @@ decode_double_array <- function(file, offset) { return(signal) } +#' Decode double array +#' @noRd +decode_double_array_ol <- function(file, offset) { + seek(file, 0, 'end') + fsize <- seek(file, NA, "current") + offset <- 6144 + # Read data + seek(file, offset, "start") + signal <- readBin(file, what = "double", size = 8, endian = "little", + n = (fsize - offset)) + return(signal) +} + #' Decode delta array #' @noRd #' @note This function was adapted from the @@ -215,12 +238,11 @@ decode_delta <- function(file, offset) { segment_length <- readBin(file, "integer", n = 1, size = 1, endian = "big") for (i in seq_len(segment_length)){ - # for (i in (1:bitwAnd(buffer[1], 4095L))) { buffer[3] <- readBin(file, "integer", n = 1, size = 2, endian = "big") if (buffer[3] != -32768L) { buffer[2] <- buffer[2] + buffer[3] } else { - buffer[2] <- readBin(file, "integer", n = 1, size =4 ,endian = "big") + buffer[2] <- readBin(file, "integer", n = 1, size = 4 ,endian = "big") } signal[index] <- buffer[2] @@ -235,8 +257,58 @@ decode_delta <- function(file, offset) { #' Get Agilent offsets #' @noRd get_agilent_offsets <- function(version){ - if (version %in% c("179","181")){ + if (version == "131_LC"){ + offsets <- list(version = 326, + file_type = 347, + sample_name = 858, + operator = 1880, + date = 2391, + detector = 2492, + method = 2574, + software = 3089, + units = 3093, + sample_id = 4055, + num_times = 278, #big-endian + rt_first = 282, + rt_last = 286, + scaling_factor = 3085, + data_start = 4096 + ) + } else if (version == "131_OL"){ + offsets <- list(version = 326, + file_type = 347, + sample_name = 858, + operator = 1880, + date = 2391, + # detector = 2492, + method = 2574, + # software = 3089, + units = 3093, + sample_id = 4055, + num_times = 278, #big-endian + rt_first = 282, + rt_last = 286, + scaling_factor = 3085, + data_start = 4096 + ) + } else if (version == "31"){ + offsets <- list(version = 0, + file_type = 4, + sample_name = 24, + operator = 148, + date = 178, + detector = 208, + instrument = 218, + method = 228, + # unknown = 260, + num_times = 278, # big-endian + scaling_factor = 318, + units = 326, + data_start = 512 + ) + } else if (version %in% c("179","179_GC", "179_OL", "181")){ offsets <- list( + version = 326, file_type = 347, #0x15B sample_name = 858, #0x35A operator = 1880, #0x758 @@ -258,9 +330,9 @@ get_agilent_offsets <- function(version){ # sequence_line_or_injection = 252, #UINT16 # injection_or_sequence_line = 256, #UINT16 # data_offset = 264, # UINT32 - start_time = 282, + # start_time = 282, # end_time = 286, - # version_string = 326, # utf16 + version = 326, # utf16 file_type = 347, # utf16 sample_name = 858, # utf16 operator = 1880, # utf16 @@ -268,24 +340,26 @@ get_agilent_offsets <- function(version){ inlet = 2492, # utf16 instrument = 2533, # utf16' method = 2574, # utf16 - software_version = 3601, #utf16' software = 3089, # 'utf16' + software_version = 3601, #utf16' software_revision = 3802, #'utf16' + sample_id = 4054, units = 4172, # 'utf16' signal = 4213, # 'utf16' intercept = 4110, # INT32 scaling_factor = 4732) #ENDIAN + 'd' } else if (version == 30){ offsets <- list( + version = 0, file_type = 4, # utf16 sample_name = 24, # utf16 operator = 148, # utf16 date = 178, # utf16 - # inlet = 2492, # utf16 - instrument = 208, # utf16' + detector = 208, # utf16' + instrument = 218, method = 228, # utf16 - software_version = 355, #utf16' software = 322, # 'utf16' + software_version = 355, #utf16' software_revision = 405, #'utf16' units = 580, # 'utf16' signal = 596, # 'utf16' @@ -294,14 +368,15 @@ get_agilent_offsets <- function(version){ data_start = 1024 #ENDIAN + 'd' ) } else if (version %in% c("8","81")){ - offsets <- list(sample_name = 24, + offsets <- list(version = 0, + file_type = 4, + sample_name = 24, description = 86, operator = 148, date = 178, + detector = 208, instrument = 218, - inlet = 208, method = 228, - # software = 0xC11, unit = 580, num_times = 0x116, rt_first = 0x11A, @@ -312,3 +387,37 @@ get_agilent_offsets <- function(version){ } offsets } + +#' Parser for reading Agilent ('.dx') files into R +#' @importFrom utils unzip +#' @param path Path to \code{.dx} file. +#' @param path_out Path to directory to export unzipped files. +#' @param format_out Matrix or data.frame. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' @param read_metadata Logical. Whether to attach metadata. +#' @author Ethan Bass +#' @return A chromatogram in the format specified by \code{format_out} +#' (retention time x wavelength). +#' @author Ethan Bass +#' @export +read_agilent_dx <- function(path, path_out = NULL, format_out = c("matrix","data.frame"), + data_format = c("wide","long"), + read_metadata = TRUE){ + format_out <- match.arg(format_out, c("matrix","data.frame")) + data_format <- match.arg(data_format, c("wide","long")) + files <- unzip(path, list = TRUE) + files.idx <- grep(".ch$", files$Name, ignore.case = TRUE) + # make temp directory + if (is.null(path_out)){ + path_out <- tempdir() + on.exit(unlink(path_out), add = TRUE) + } + # copy .dx file to directory + file.copy(path, path_out) + path <- fs::path(path_out, basename(path)) + # unzip .dx file + unzip(path, files = files$Name[files.idx], exdir = path_out) + # read in `.ch` files + read_chemstation_ch(fs::path(path_out, files$Name[files.idx]), format_out = format_out, + data_format = data_format, read_metadata = read_metadata) +} diff --git a/R/read_chemstation_csv.R b/R/read_chemstation_csv.R new file mode 100644 index 0000000..4e3b3cc --- /dev/null +++ b/R/read_chemstation_csv.R @@ -0,0 +1,21 @@ +#' Read Chemstation CSV +#' +#' @name read_chemstation_csv +#' @importFrom utils tail read.csv +#' @param file path to file +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @return A chromatogram in the format specified by \code{format_out} +#' (retention time x wavelength). +#' @author Ethan Bass +#' @export + +read_chemstation_csv <- function(file, format_out = c("matrix","data.frame")){ + format_out <- match.arg(format_out, c("matrix", "data.frame")) + x <- read.csv(file, row.names = 1, header = TRUE, + fileEncoding="utf-16", check.names = FALSE) + if (format_out == "matrix"){ + x <- as.matrix(x) + } + x +} + diff --git a/R/read_chemstation_report.R b/R/read_chemstation_report.R new file mode 100644 index 0000000..3d6f986 --- /dev/null +++ b/R/read_chemstation_report.R @@ -0,0 +1,140 @@ +#' Read Agilent Chemstation Reports +#' @param files Paths to Chemstation report files. +#' @param data_format Format to output data. Either \code{chromatographr} or +#' \code{chemstation}. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} or +#' \code{raw}. +#' @author Ethan Bass +#' @export + +read_chemstation_reports <- function(files, data_format = c("chromatographr", "original"), + metadata_format = c("chromconverter", "raw")){ + data_format <- match.arg(tolower(data_format), c("chromatographr", "original")) + metadata_format = match.arg(metadata_format, c("chromconverter","raw")) + names(files) <- sub(".*/([^/]+)\\.D/.*$", "\\1", files) + + pks <- lapply(seq_along(files), function(i){ + xx <- read_chemstation_report(files[i], data_format = data_format, + metadata_format = metadata_format) + dat <- lapply(seq_along(xx), function(ii){ + lambda <- sub(".*Sig=([0-9]+).*", "\\1", names(xx)[ii]) + cbind(sample = names(files)[i], lambda = lambda, xx[[ii]]) + }) + names(dat) <- sub(".*Sig=([0-9]+).*", "\\1", names(xx)) + dat + }) + names(pks) <- names(files) + structure(pks, + chrom_list = NA, + lambdas = names(pks[[1]]), fit = "chemstation", sd.max = NA, + max.iter = NA, + time.units = "min", + class = "peak_list") +} + +#' Read Agilent Chemstation Report +#' @param file Path to file +#' @param data_format Format to output data. Either \code{chromatographr} or +#' \code{chemstation}. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} or +#' \code{raw}. +#' @author Ethan Bass +#' @noRd + +read_chemstation_report <- function(file, data_format = c("chromatographr", "original"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ + data_format <- match.arg(tolower(data_format), c("chromatographr", "original")) + metadata_format = match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = "chemstation_peaklist", + raw = "raw") + x <- readLines(file, encoding = "UTF-16LE", skipNul = TRUE) + x[1] <- gsub("\xff\xfe", "", x[1], useBytes = TRUE) + x <- gsub("\xb5", "", x, useBytes = TRUE) + + sections <- grep("=====================================================================", x) + + signals <- grep("Signal [0-9]:", x) + signals <- c(signals, (grep("End of Report", x)-2)) + + peak_lists <- lapply(seq_along(signals[-length(signals)]),function(i){ + table <- x[signals[i]:(signals[i+1] - 1)] + convert_chemstation_peaklist(table, data_format = data_format) + }) + names(peak_lists) <- x[signals[-length(signals)]] + if (read_metadata){ + metadata <- x[(sections[1]+1):(sections[2]-1)] + metadata <- remove_blank_lines(metadata) + merge_lines <- function(xx){ + idx <- grep(":", xx, invert = TRUE) + xx[idx - 1] <- paste0(xx[idx-1], xx[idx]) + xx <- xx[-idx] + xx + } + metadata <- gsub("^\\s+","", metadata) + metadata <- gsub("\\s+\\:\\s+", " : ", metadata) + metadata <- merge_lines(metadata) + metadata <- unlist(strsplit(metadata, "(? 1 && parser == "openchrom"){ + export_format = "mzml" + } else{ + export_format <- match.arg(export_format, choices = + c("csv", "chemstation_csv", + "cdf", "mzml", "animl")) + } check_parser(format_in, parser) if (parser != "openchrom" && !(export_format %in% c("csv", "chemstation_csv", "cdf"))) stop("The selected export format is currently only supported by `openchrom` parsers.") @@ -139,7 +161,7 @@ read_chroms <- function(paths, find_files, if (all(!exists)){ stop("Cannot locate files. None of the supplied paths exist.") } - if (export | format_in == "thermoraw" | parser == "openchrom"){ + if (export){ if (is.null(path_out)){ path_out <- set_temp_directory() } @@ -148,77 +170,96 @@ read_chroms <- function(paths, find_files, } } if (is.null(dat)){ - dat <- list()} + dat <- list() + } - # choose converter entab_parser <- partial(call_entab, format_in = format_in, format_out = format_out, data_format = data_format, read_metadata = read_metadata) rainbow_parser <- partial(call_rainbow, format_in = format_in, - format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + format_out = format_out, data_format = data_format, + read_metadata = read_metadata) if (format_in == "agilent_d"){ pattern <- ifelse(is.null(pattern), ".D", pattern) converter <- rainbow_parser + } else if (format_in == "agilent_dx"){ + pattern <- ifelse(is.null(pattern), ".dx", pattern) + converter <- partial(read_agilent_dx, path_out = path_out, + format_out = format_out, + data_format = data_format, + read_metadata = read_metadata) } else if (format_in == "masshunter_dad"){ pattern <- ifelse(is.null(pattern), ".sp", pattern) converter <- switch(parser, "aston" = partial(sp_converter, format_out = format_out, data_format = data_format, - read_metadata = read_metadata), + read_metadata = read_metadata, + metadata_format = metadata_format), "entab" = entab_parser) - } else if (format_in == "chemstation_uv"){ + } else if (format_in == "chemstation_uv" | grepl("31", format_in)){ pattern <- ifelse(is.null(pattern), ".uv", pattern) converter <- switch(parser, "chromconverter" = partial(read_chemstation_uv, format_out = format_out, data_format = data_format, - read_metadata = read_metadata), + read_metadata = read_metadata, + metadata_format = metadata_format), "aston" = partial(uv_converter, format_out = format_out, data_format = data_format, - read_metadata = read_metadata), + read_metadata = read_metadata, + metadata_format = metadata_format), "entab" = entab_parser, "rainbow" = rainbow_parser) - } else if (format_in == "chemstation"){ - pattern <- ifelse(is.null(pattern), "*", pattern) - converter <- rainbow_parser - } else if (format_in == "chromeleon_uv"){ + } else if (format_in == "chromeleon_uv"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_chromeleon, format_out = format_out, - data_format = data_format, read_metadata = read_metadata) + data_format = data_format, + read_metadata = read_metadata, + metadata_format = metadata_format) } else if (format_in == "shimadzu_fid"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_shimadzu, format_in = "fid", format_out = format_out, data_format = data_format, - read_metadata = read_metadata) + read_metadata = read_metadata, + metadata_format = metadata_format) } else if (format_in == "shimadzu_dad"){ pattern <- ifelse(is.null(pattern), ".txt", pattern) converter <- partial(read_shimadzu, format_in = "dad", format_out = format_out, data_format = data_format, + read_metadata = read_metadata, + metadata_format = metadata_format) + } else if (format_in == "shimadzu_lcd"){ + pattern <- ifelse(is.null(pattern), ".lcd", pattern) + converter <- partial(read_shimadzu_lcd, format_out = format_out, + data_format = data_format, read_metadata = read_metadata) - } else if (format_in == "thermoraw"){ + } else if (format_in == "thermoraw"){ pattern <- ifelse(is.null(pattern), ".raw", pattern) converter <- switch(parser, - "thermoraw" = partial(read_thermoraw, path_out = path_out, - format_out = format_out, - read_metadata = read_metadata), - "entab" = entab_parser) + "thermoraw" = partial(read_thermoraw, path_out = path_out, + format_out = format_out, + read_metadata = read_metadata, + metadata_format = metadata_format, + verbose = verbose), + "entab" = entab_parser) } else if (format_in == "mzml"){ pattern <- ifelse(is.null(pattern), ".mzML", pattern) converter <- partial(read_mzml, format_out = format_out) } else if (format_in == "waters_arw"){ pattern <- ifelse(is.null(pattern), ".arw", pattern) - converter <- partial(read_waters_arw, format_out = format_out) + converter <- partial(read_waters_arw, format_out = format_out, + data_format = data_format, + read_metadata = read_metadata) } else if (format_in == "waters_raw"){ - pattern <- ifelse(is.null(pattern), ".raw", pattern) - converter <- rainbow_parser + pattern <- ifelse(is.null(pattern), ".raw", pattern) + converter <- rainbow_parser } else if (format_in == "chemstation_csv"){ pattern <- ifelse(is.null(pattern), ".csv|.CSV", pattern) converter <- partial(read_chemstation_csv, format_out = format_out) - } else if (format_in %in% c("chemstation_fid", "chemstation_ch")){ + } else if (grepl("chemstation", format_in)){ pattern <- ifelse(is.null(pattern), ".ch", pattern) converter <- switch(parser, "chromconverter" = partial(read_chemstation_ch, @@ -231,25 +272,27 @@ read_chroms <- function(paths, find_files, if (is.null(pattern) & find_files){ stop("Please supply `pattern` (e.g. a suffix) or set `find_files = FALSE`") } - return_paths <- ifelse(export_format == "csv", FALSE, TRUE) + # return paths if animl is selected + return_paths <- ifelse(export_format == "animl", TRUE, FALSE) converter <- partial(call_openchrom, path_out = path_out, format_in = format_in, export_format = export_format, - return_paths = return_paths) + return_paths = return_paths, verbose = verbose) } else if (format_in == "mdf"){ pattern <- ifelse(is.null(pattern), ".mdf|.MDF", pattern) converter <- partial(read_mdf, format_out = format_out, data_format = data_format, read_metadata = read_metadata) } else if (format_in == "cdf"){ - pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) - converter <- partial(read_cdf, format_out = format_out, - data_format = data_format, - read_metadata = read_metadata) + pattern <- ifelse(is.null(pattern), ".cdf|.CDF", pattern) + converter <- partial(read_cdf, format_out = format_out, + data_format = data_format, + read_metadata = read_metadata) } else { converter <- switch(parser, "aston" = partial(trace_converter, format_out = format_out, data_format = data_format, - read_metadata = read_metadata), + read_metadata = read_metadata, + metadata_format = metadata_format), "entab" = entab_parser ) } @@ -265,9 +308,9 @@ read_chroms <- function(paths, find_files, Please confirm that the specified format ('format_in') is correct.", immediate. = TRUE) } else if (length(match) < length(files)){ - warning(paste("Some of the files do not have the expected file extension:", - files[match]), immediate. = TRUE) - } + warning(paste("Some of the files do not have the expected file extension:", + files[match]), immediate. = TRUE) + } } } if (all(grepl("\\.[Dd]$|\\.[Dd]?[/\\\\]",files))){ @@ -282,7 +325,7 @@ read_chroms <- function(paths, find_files, file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1]) } if (parser != "openchrom"){ - laplee <- choose_apply_fnc(progress_bar) + laplee <- choose_apply_fnc(progress_bar, cl = cl) data <- laplee(X = files, function(file){ df <- try(converter(file), silent = TRUE) }) diff --git a/R/read_mdf.R b/R/read_mdf.R index 597f7a6..fb42c0b 100644 --- a/R/read_mdf.R +++ b/R/read_mdf.R @@ -51,10 +51,10 @@ read_mdf <- function(file, format_out = c("matrix","data.frame"), } if (data_format == "long"){ - data <- reshape_chrom(data) + data <- reshape_chrom(data, data_format = data_format) } if (format_out == "matrix"){ - as.matrix(format_out) + data <- as.matrix(data) } if (read_metadata){ data <- attach_metadata(x = data, meta = meta, format_in = "mdf", diff --git a/R/read_mzml.R b/R/read_mzml.R new file mode 100644 index 0000000..004df4e --- /dev/null +++ b/R/read_mzml.R @@ -0,0 +1,73 @@ +#' Extract data from mzML files +#' +#' Extracts data from mzML files using parsers from either RaMS or mzR. The RaMS +#' parser (default) will only return data in tidy (long) format. mzR will return +#' data in wide format. Currently the mzR-based parser only returns DAD data. +#' +#' @name read_mzml +#' @importFrom RaMS grabMSdata +#' @param path path to file +#' @param format_out R format. Only applies if \code{mzR} is selected. +#' Either \code{matrix} or \code{data.frame}. \code{RaMS} will return +#' a list of data.tables regardless of what is selected here. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' @param parser What parser to use. Either \code{RaMS} or \code{mzR}. +#' @param what What types of data to return (argument to \code{\link[RaMS]{grabMSdata}}. +#' Options include \code{MS1}, \code{MS2}, \code{BPC}, \code{TIC}, \code{DAD}, +#' \code{chroms}, \code{metadata}, or \code{everything}). +#' @param verbose Argument to \code{\link[RaMS]{grabMSdata}} controlling \code{verbosity}. +#' @param ... Additional arguments to \code{\link[RaMS]{grabMSdata}}. +#' @return If \code{RaMS} is selected, the function will return a list of "tidy" +#' \code{data.table} objects. If \code{mzR} is selected, the function will return a +#' chromatogram in \code{matrix} or \code{data.frame} format according to the +#' value of \code{format_out}. +#' @author Ethan Bass +#' @export read_mzml + +read_mzml <- function(path, format_out = c("matrix", "data.frame"), + data_format = c("long","wide"), + parser=c("RaMS","mzR"), + what=c("MS1","MS2", "BPC", "TIC", "DAD", + "chroms", "metadata", "everything"), + verbose = FALSE, + ...){ + parser <- match.arg(parser, c("RaMS", "mzR")) + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("long","wide")) + what <- match.arg(what, c("MS1","MS2", "BPC", "TIC", "DAD", + "chroms", "metadata", "everything"), several.ok = TRUE) + if (all(c("MS1","MS2", "BPC", "TIC", "DAD", + "chroms", "metadata", "everything") %in% what)){ + what <- grep("everything",what, invert = TRUE,value = TRUE) + } + if (parser == "RaMS"){ + data <- RaMS::grabMSdata(path, grab_what = what, verbosity = verbose, ...) + if (data_format == "wide"){ + data <- reshape_chroms(data, data_format = "wide") + } + } else if (parser == "mzR"){ + if (!requireNamespace("mzR", quietly = TRUE)) { + stop( + "The `mzR` package is not installed. Please install it from Bioconductor: + BiocManager::install('mzR')", + call. = FALSE) + } + x <- mzR::openMSfile(path) + info <- mzR::header(x) + UV_scans <- which(info$msLevel == 0) + rts <- info[UV_scans, "retentionTime"] + lambdas <- seq(info$scanWindowLowerLimit[UV_scans[1]], + info$scanWindowUpperLimit[UV_scans[1]]) + pks <- mzR::peaks(x) + data <- t(sapply(UV_scans, function(j) pks[[j]][,2])) + rownames(data) <- rts + colnames(data) <- lambdas + if (data_format == "long"){ + data <- reshape_chrom(data) + } + if (format_out == "data.frame"){ + data <- as.data.frame(data) + } + } + data +} diff --git a/R/read_peaklist.R b/R/read_peaklist.R index 2e226c1..25a88aa 100644 --- a/R/read_peaklist.R +++ b/R/read_peaklist.R @@ -1,24 +1,114 @@ -#' Read varian peaklist. -#' Read peak list from 'Varian MS Workstation'. -#' @param file Path to Varian peak list file. -#' @importFrom utils read.csv +#' Read peak lists +#' +#' Reads peak lists from specified folders or vector of paths. +#' +#' @param paths paths to files or folders containing files. +#' @param find_files Logical. Set to \code{TRUE} (default) if you are providing +#' the function with a folder or vector of folders containing the files. +#' Otherwise, set to\code{FALSE}. +#' @param format_in Format of files to be imported/converted. Current options +#' include: \code{chemstation} or \code{shimadzu}. +#' @param pattern pattern (e.g. a file extension). Defaults to NULL, in which +#' case file extension will be deduced from \code{format_in}. +#' @param data_format Whether to output data in wide or long format. Either +#' \code{wide} or \code{long}. +#' @param read_metadata Logical, whether to attach metadata (if it's available). +#' Defaults to TRUE. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} or +#' \code{raw}. +#' @param progress_bar Logical. Whether to show progress bar. Defaults to +#' \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed. +#' @param cl Argument to \code{\link[pbapply]{pbapply}} specifying the number +#' of clusters to use or a cluster object created by +#' \code{\link[parallel]{makeCluster}}. Defaults to 1. +#' @return A list of chromatograms in \code{matrix} or \code{data.frame} format, +#' according to the value of \code{format_out}. +#' @import reticulate +#' @importFrom utils write.csv file_test +#' @importFrom purrr partial +#' @examplesIf interactive() +#' path <- "tests/testthat/testdata/dad1.uv" +#' chr <- read_chroms(path, find_files = FALSE, format_in = "chemstation_uv") #' @author Ethan Bass #' @export -read_varian_peaklist <- function(file){ - x <- read.csv(file, skip = 5, header = FALSE) - x$V1[x$V1 == ""] <- NA - x <- tidyr::fill(data = x, "V1", .direction = "down") +read_peaklist <- function(paths, find_files, + format_in = c("chemstation", "shimadzu_fid", + "shimadzu_dad"), + pattern = NULL, + data_format = c("chromatographr", "original"), + metadata_format = c("chromconverter", "raw"), + read_metadata = TRUE, progress_bar, cl = 1){ + data_format <- match.arg(tolower(data_format), c("chromatographr", "original")) + format_in <- match.arg(tolower(format_in), + c("chemstation", "shimadzu_fid", "shimadzu_dad")) + if (missing(progress_bar)){ + progress_bar <- check_for_pkg("pbapply", return_boolean = TRUE) + } + if (missing(find_files)){ + if (length(format_in) == 1){ + ft <- all(file_test("-f", paths)) + find_files <- !ft + } else{ + find_files <- FALSE + } + } + exists <- dir.exists(paths) | file.exists(paths) + if (all(!exists)){ + stop("Cannot locate files. None of the supplied paths exist.") + } + # choose parser + if (format_in == "chemstation"){ + pattern <- ifelse(is.null(pattern), "report.txt", pattern) + parser <- purrr::partial(read_chemstation_reports, + data_format = data_format, + metadata_format = metadata_format) + } else if (format_in == "shimadzu_fid"){ + pattern <- ifelse(is.null(pattern), ".txt", pattern) + parser <- partial(read_shimadzu, format_in = "fid", what = "peak_table", + # format_out = format_out, + data_format = "wide", + read_metadata = read_metadata, + peaktable_format = data_format) + } else if (format_in == "shimadzu_dad"){ + pattern <- ifelse(is.null(pattern), ".txt", pattern) + parser <- partial(read_shimadzu, format_in = "dad", what = "peak_table", + # format_out = format_out, + data_format = "wide", + read_metadata = read_metadata) + } - column_names <- x[2,] - column_names[1] <- "compound" - colnames(x) <- column_names - - x <- x[-which(x$`Line#`=="Line#"),] - x <- x[-which(x$`Line#` == ""), ] - - x$Area <- as.numeric(x$Area) - x$Height <- as.numeric(x$Height) - x <- x[,-16] - x + if (find_files){ + files <- find_files(paths, pattern) + } else { + files <- paths + if (!is.null(pattern)){ + match <- grep(pattern, files, ignore.case = TRUE) + if (length(match) == 0){ + warning("The provided files do not match the expected file extension. + Please confirm that the specified format ('format_in') is correct.", + immediate. = TRUE) + } else if (length(match) < length(files)){ + warning(paste("Some of the files do not have the expected file extension:", + files[match]), immediate. = TRUE) + } + } + } + file_names <- extract_filenames(files) + if (format_in == "chemstation"){ + data <- parser(files) + } else{ + laplee <- choose_apply_fnc(progress_bar, cl = cl) + data <- laplee(X = files, function(file){ + try(parser(file), silent = TRUE) + }) + data <- lapply(seq_along(data), function(i){ + lapply(data[[i]], function(xx){ + cbind(sample = file_names[i], xx) + }) + }) + class(data) <- "peak_list" + names(data) <- file_names + } + data } diff --git a/R/read_shimadzu_ascii.R b/R/read_shimadzu_ascii.R new file mode 100644 index 0000000..d79ec6b --- /dev/null +++ b/R/read_shimadzu_ascii.R @@ -0,0 +1,162 @@ +#' Shimadzu ascii reader +#' +#' Reads 'Shimadzu' ascii files into R. These files are exported from +#' 'Lab Solutions' by right clicking on samples in the sample list and +#' selecting +#' +#' @name read_shimadzu +#' @importFrom utils tail read.csv +#' @importFrom stringr str_split_fixed +#' @param file path to file +#' @param format_in Format of files. \code{fid} or \code{dad}. +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' @param peaktable_format Whether to return peak tables in \code{chromatographr} or +#' \code{original} format. +#' @param what Whether to extract \code{chromatogram} and/or \code{peak_table}. +#' Accepts multiple arguments. +#' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} or +#' \code{raw}. +#' @return A chromatogram in the format specified by \code{format_out} +#' (retention time x wavelength). +#' @author Ethan Bass +#' @export + +read_shimadzu <- function(file, format_in, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + peaktable_format = c("chromatographr", "original"), + what = "chromatogram", + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ + if (missing(format_in)) + stop("`format_in` must be specified. The options are `fid` or `dad`.") + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + peaktable_format <- match.arg(peaktable_format, c("chromatographr","original")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + what <- match.arg(what, c("chromatogram", "peak_table"), several.ok = TRUE) + x <- readLines(file) + sep <- substr(x[2], 17, 17) + headings <- grep("\\[*\\]", x) + peaktab.idx <- grep("\\[Peak Table", x) + chrom_heading <- switch(format_in, + "fid" = "\\[Chromatogram .*]", + "dad" = "\\[PDA 3D]") + chrom.idx <- grep(chrom_heading, x) + + if (any(what == "chromatogram")){ + if (length(chrom.idx) != 0){ + header <- try(extract_shimadzu_header(x = x, chrom.idx = chrom.idx, sep = sep)) + met <- header[[1]] + decimal_separator <- ifelse(grepl(",", met[2, 2]), ",", ".") + if (decimal_separator == ","){ + met[c(2:3), 2] <- gsub(",", ".", met[c(2:3), 2]) + } + + if (format_in == "fid"){ + xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses = "numeric", + na.strings = c("[FractionCollectionReport]","#ofFractions"), + dec = decimal_separator) + xx <- as.matrix(xx[!is.na(xx[,1]),]) + rownames(xx) <- xx[, 1] + xx <- xx[, 2, drop = FALSE] + colnames(xx) <- "Intensity" + if (data_format == "long"){ + xx <- cbind(RT = as.numeric(rownames(xx)), Intensity = as.numeric(xx[,1])) + } + } else if (format_in == "dad"){ + nrows <- as.numeric(met[grep("# of Time Axis Points", met[,1]), 2]) + ncols <- as.numeric(met[grep("# of Wavelength Axis Points", met[,1]), 2]) + xx <- read.csv(file, skip = header[[2]], sep = sep, colClasses = "numeric", + na.strings = c("[FractionCollectionReport]", "#ofFractions"), + row.names = 1, nrows = nrows, dec = decimal_separator) + xx <- as.matrix(xx[!is.na(xx[,1]),]) + colnames(xx) <- as.numeric(gsub("X", "", colnames(xx)))*0.01 + if (data_format == "long"){ + xx <- reshape_chrom(xx, data_format = "long") + } + } + if (format_out == "data.frame"){ + xx <- as.data.frame(xx) + } + } else{ + if (length(what) == 1){ + stop("Chromatogram not found.") + } else{ + warning("Chromatogram not found.") + what = "peak_table" + } + } + } + + ### extract peak_table + if (any(what == "peak_table")){ + if (length(peaktab.idx) == 0){ + if (length(what) == 1){ + stop("Peak table not found!") + } else{ + warning("Peak table not found!") + what <- "chromatogram" + } + } + peak_tab <- lapply(peaktab.idx, function(idx){ + nrows <- as.numeric(strsplit(x = x[idx + 1], split = sep)[[1]][2]) + if (!is.na(nrows) && nrows > 0){ + time_column <- grep("R.Time", strsplit(x = x[[idx + 2]], split = sep)[[1]]) + t1 <- strsplit(x = x[[idx + 3]], split = sep)[[1]][time_column] + decimal_separator <- ifelse(grepl(".", t1), ".", ",") + + peak_tab <- read.csv(file, skip = (idx + 1), sep = sep, nrows = nrows, + dec = decimal_separator) + if (peaktable_format == "chromatographr"){ + peak_tab <- peak_tab[, c("R.Time", "I.Time", "F.Time", "Area", "Height")] + colnames(peak_tab) <- c("rt", "start", "end", "area", "height") + # cbind(sample = gsub("\\[|\\]","", x[idx]), peak_tab) + } + peak_tab + } else{NA} + }) + names(peak_tab) <- gsub("\\[|\\]","", x[peaktab.idx]) + } + if ("peak_table" %in% what & "chromatogram" %in% what){ + what <- "both" + } + if (format_out == "data.frame"){ + xx <- as.data.frame(xx) + } + xx <- switch(what, "chromatogram" = xx, + "peak_table" = peak_tab, + "both" = list(chromatogram = xx, peak_table = peak_tab)) + if (read_metadata){ + idx <- which(x[headings] %in% c("[Header]", "[File Information]", + "[Sample Information]", "[Original Files]", + "[File Description]", "[Configuration]") ) + meta_start <- headings[min(idx)] + meta_end <- headings[max(idx) + 1] + meta <- x[(meta_start + 1):(meta_end - 1)] + meta <- meta[meta!=""] + meta <- meta[-grep("\\[", meta)] + meta <- stringr::str_split_fixed(meta, pattern = sep, n = 2) + if (exists("met")){ + meta <- rbind(meta, met) + } + rownames(meta) <- meta[, 1] + meta <- as.list(meta[,2]) + if (inherits(xx, "list")){ + xx <- lapply(xx, function(xxx){ + attach_metadata(xxx, meta, format_in = "shimadzu", + source_file = file, format_out = format_out, + data_format = data_format, parser = "chromConverter") + }) + } else{ + xx <- attach_metadata(xx, meta, format_in = "shimadzu", + source_file = file, format_out = format_out, + data_format = data_format, + parser = "chromConverter") + } + } + xx +} + diff --git a/R/read_shimadzu_lcd.R b/R/read_shimadzu_lcd.R new file mode 100644 index 0000000..24afab6 --- /dev/null +++ b/R/read_shimadzu_lcd.R @@ -0,0 +1,335 @@ +#' Shimadzu LCD parser +#' +#' Read 3D PDA data stream from 'Shimadzu' LCD files. +#' +#' A parser to read PDA data from 'Shimadzu' \code{.lcd} files. LCD files are +#' encoded as 'Microsoft' OLE documents. The parser relies on the +#' [olefile](https://pypi.org/project/olefile/) package in Python to unpack the +#' files. The PDA data is encoded in a stream called \code{PDA 3D Raw Data:3D Data Item}. +#' The PDA data stream contains a segment for each retention time, beginning +#' with a 24-byte header. +#' +#' The 24 byte header consists of the following fields: +#' * 4 bytes: segment label (\code{17234}). +#' * 4 bytes: ??? +#' * 4 bytes: Little-endian integer specifying the number of wavelength values +#' in the segment. +#' * 4 bytes: Little-endian integer specifying the total number of bytes in the segment. +#' * 8 bytes of \code{00}s +#' +#' Each segment is divided into two sub-segments, which begin and end with an +#' integer specifying the length of the sub-segment in bytes. All known values +#' in this data stream are little-endian and the data are delta-encoded. The +#' first hexadecimal digit of each value is a sign digit +#' specifying the number of bytes in the delta and whether the value is positive +#' or negative. The sign digit represents the number of hexadecimal digits used +#' to encode each value. Even numbered sign digits correspond to positive deltas, +#' whereas odd numbers indicate negative deltas. Positive values are encoded as +#' little-endian integers, while negative values are encoded as two's +#' complements. The value at each position is derived by subtracting the delta +#' from the previous value. +#' +#' @param path Path to LCD file. +#' @param format_out Matrix or data.frame. +#' @param data_format Either \code{wide} (default) or \code{long}. +#' @param read_metadata Logical. Whether to attach metadata. +#' @author Ethan Bass +#' @note This parser is experimental and may still +#' need some work. It is not yet able to interpret much metadata from the files. +#' @export + +read_shimadzu_lcd <- function(path, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE){ + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + + olefile_installed <- reticulate::py_module_available("olefile") + if (!olefile_installed){ + configure_olefile() + } + + # read wavelengths from "Wavelength Table" stream + lambdas <- read_shimadzu_wavelengths(path) + n_lambdas <- length(lambdas) + + # read data from "3D Raw Data" stream + dat <- read_shimadzu_raw(path, n_lambdas = n_lambdas) + colnames(dat) <- lambdas + + # infer times from "PDA.1.Method" stream + method_metadata <- read_sz_method(path) + times <- get_sz_times(method_metadata, nval = nrow(dat)) + if (inherits(times, "numeric")){ + rownames(dat) <- times + } + + if (data_format == "long"){ + dat <- reshape_chrom(dat, data_format = "wide") + } + + if (format_out == "data.frame"){ + data <- as.data.frame(data) + } + dat +} + +#' Read Shimadzu "Method" stream +#' @author Ethan Bass +#' @noRd +read_sz_method <- function(path){ + method_path <- export_stream(path, + stream = c("GUMM_Information", "ShimadzuPDA.1", + "PDA.1.METHOD"), + remove_null_bytes = TRUE) + if (is.na(method_path)){ + warning("Method stream could not be found -- unable to infer retention times.") + return(NA) + } else{ + method_stream <- xml2::read_xml(method_path) + + sz_extract_upd_elements <- function(method_stream, xpath, + data_format = c("list", "data.frame")){ + data_format <- match.arg(data_format, c("list", "data.frame")) + upd_elements <- xml2::xml_find_all(method_stream, xpath) + + vals <- suppressWarnings(as.numeric(xml2::xml_text( + xml2::xml_find_first(upd_elements, ".//Val")))) + data <- as.list(vals) + names(data) <- xml2::xml_attr(upd_elements, "ID") + + if (data_format == "data.frame"){ + data <- as.data.frame(do.call(rbind, data)) + colnames(data) <- "Val" + } + data + } + + sz_extract_upd_elements(method_stream, xpath = "/GUD/UP/UPD") + } +} + +#' Infer times from 'Shimadzu' Method stream +#' @author Ethan Bass +#' @noRd +get_sz_times <- function(sz_method, nval){ + start_time <- try(get_metadata_field(sz_method, "StTm")/60000, silent = TRUE) + end_time <- try(get_metadata_field(sz_method, "EdTm")/60000, silent = TRUE) + if (inherits(start_time, "numeric") & inherits(end_time, "numeric")){ + seq(from = start_time, to = end_time, length.out = nval) + } else NA +} + +#' Read 'Shimadzu' LCD 3D raw data +#' @author Ethan Bass +#' @noRd + +read_shimadzu_raw <- function(path, n_lambdas = NULL){ + path_raw <- export_stream(path, stream = c("PDA 3D Raw Data", "3D Raw Data"), + verbose = TRUE) + f <- file(path_raw, "rb") + on.exit(close(f)) + + seek(f, 0, 'end') + fsize <- seek(f, NA, "current") + + # Read data + + seek(f, 0, "start") + seek(f, 0, "start") + + mat <- matrix(NA, nrow = fsize/(n_lambdas*1.5), ncol = n_lambdas) + i <- 1 + while (seek(f, NA, "current") < fsize) { + mat[i,] <- decode_shimadzu_block(f) + i <- i + 1 + } + if (any(is.na(mat[,1]))){ + mat <- mat[-which(is.na(mat[,1])),] + } + mat +} + +#' Export OLE stream +#' Use olefile to export te specified stream. +#' @param file Path to ole file. +#' @author Ethan Bass +#' @noRd +export_stream <- function(path_in, stream, path_out, remove_null_bytes = FALSE, + verbose = FALSE){ + reticulate::py_run_string('import olefile') + reticulate::py_run_string(paste0('ole = olefile.OleFileIO("', path_in, '")')) + python_stream <- paste0("[", paste(paste0("'", stream, "'"), collapse = ', '),"]") + stream_exists <- reticulate::py_eval(paste0("ole.exists(", python_stream, ")")) + if (!stream_exists){ + if (verbose){ + warning(paste0("The stream ", sQuote(python_stream), " could not be found."), + immediate. = TRUE) + } + return(NA) + } else{ + reticulate::py_run_string(paste0("st = ole.openstream(", python_stream, ")")) + reticulate::py_run_string('data = st.read()') + + if (missing(path_out)){ + path_out <- tempfile() + } + if (.Platform$OS.type == "windows"){ + path_out <- gsub("\\\\", "/", path_out) + } + if (remove_null_bytes){ + reticulate::py_run_string("data = data.replace(b'\\x00', b'')") + } + reticulate::py_run_string(paste0('with open("', path_out ,'", "wb") as binary_file: + binary_file.write(data)')) + path_out + } +} + +#' Read 'Shimadzu' LCD data block +#' @author Ethan Bass +#' @noRd +decode_shimadzu_block <- function(file) { + block_start <- seek(file, NA, "current") + + readBin(file, what = "integer", n = 6, size=1) #skip + readBin(file, what = "integer", n = 1, size=2) + + n_lambda <- readBin(file, what = "integer", n = 1, size = 2, endian = "little") + + readBin(file, what = "integer", n = 1, size = 2) + block_length <- readBin(file, what = "integer", n = 1, size = 2) + readBin(file, what = "integer", n = 5, size = 2) + + signal <- numeric(n_lambda) + count <- 1 + buffer <- list(0,0,0,0) + + for (i in c(1:2)){ + n_bytes <- readBin(file, "integer", n = 1, size = 2) + start <- seek(file, NA, "current") + + while(seek(file, NA, "current") < start + n_bytes){ + buffer[[3]] <- readBin(file, "raw", n = 1, size = 1) + hex1 <- as.numeric(substr(buffer[[3]], 1, 1)) + if (hex1 == 0){ + buffer[[2]] <- strtoi(buffer[[3]], 16) + } else if (hex1 == 1){ + bin <- as_binary(strtoi(buffer[[3]], 16), 8) + buffer[[2]] <- twos_complement(substr(bin, 5, nchar(bin))) + } else if (hex1 > 1){ + buffer[[4]] <- readBin(file, "raw", n = (hex1 %/% 2), size = 1) + bin <- paste(as_binary(strtoi(c(buffer[[3]], buffer[[4]]), 16), 8), + collapse = "") + if (hex1 %% 2 == 0){ + buffer[[2]] <- strtoi(substr(bin, 5, nchar(bin)), 2) + } else { + buffer[[2]] <- twos_complement(substr(bin, 5, nchar(bin))) + } + } + buffer[[1]] <- buffer[[1]] + buffer[[2]] + signal[count] <- buffer[[1]] + count <- count + 1 + } + end <- readBin(file, "integer", n = 1, size = 2) + n_bytes == end + buffer[[1]] <- 0 + } + signal +} + +#' Return twos complement from binary string +#' @noRd +twos_complement <- function(bin, exp){ + if (missing(exp)){ + exp <- nchar(bin) + } + strtoi(bin, 2) - 2^exp +} + +#' Convert integer to binary +#' @author Stuart K. Grange +#' @note This function is borrowed from the threadr package +#' \url{https://github.com/skgrange/threadr/} where it's licensed under GPL3. +#' @noRd +as_binary <- function(x, n = 32) { + # Check type + if (!is.integer(x)) stop("Input must be an integer.", call. = FALSE) + # Do + x <- sapply(x, function(x) integer_to_binary(x, n)) + # Return + x +} + +#' Convert integer to binary +#' @author Stuart K. Grange +#' @note This function is borrowed from the threadr package +#' \url{https://github.com/skgrange/threadr/} where it's licensed under GPL3. +#' @noRd +integer_to_binary <- function(x, n) { + # Convert to a vector of integers + x <- intToBits(x) + # Drop leading zeros + x <- as.integer(x) + # Filter to a certain number of bits + x <- x[1:n] + # Reverse order of vector + x <- rev(x) + # Collapse vector into string + x <- stringr::str_c(x, collapse = "") + # Return + x +} + +#' Extract wavelengths from Shimadzu LCD +#' @author Ethan Bass +#' @noRd +read_shimadzu_wavelengths <- function(path){ + path_wavtab <- export_stream(path, stream = c("PDA 3D Raw Data", "Wavelength Table")) + f <- file(path_wavtab, "rb") + on.exit(close(f)) + n_lambda <- readBin(f, what="integer", size = 4) + count <- 1 + # lambdas <- numeric(n_lambda) + lambdas <- sapply(seq_len(n_lambda), function(i){ + readBin(f, what="integer", size = 4)/100 + }) + lambdas +} + + +#' Configure olefile +#' +#' Configures reticulate to use olefile. Olefile is required to use the 'Shimadzu' +#' LCD parser. +#' @name configure_olefile +#' @param return_boolean Logical. Whether to return a Boolean value indicating +#' if the chromConverter environment is correctly configured. +#' @return If \code{return_boolean} is \code{TRUE}, returns a Boolean value +#' indicating whether the chromConverter environment is configured correctly. +#' Otherwise, there is no return value. +#' @author Ethan Bass +#' @import reticulate +#' @export +configure_olefile <- function(return_boolean = FALSE){ + install <- FALSE + if (!dir.exists(miniconda_path())){ + install <- readline("It is recommended to install miniconda in your R library to use the Shimadzu LCD parser. Install miniconda now? (y/n)") + if (install %in% c('y', "Y", "YES", "yes", "Yes")){ + install_miniconda() + } + } + env <- reticulate::configure_environment("chromConverter") + if (!env){ + reqs <- c("olefile") + reqs_available <- sapply(reqs, reticulate::py_module_available) + if (!all(reqs_available)){ + conda_install(envname = "chromConverter", reqs[which(!reqs_available)], + pip = TRUE) + } + } + if (return_boolean){ + return(env) + } +} + diff --git a/R/read_thermoraw.R b/R/read_thermoraw.R index a50decc..7e26167 100644 --- a/R/read_thermoraw.R +++ b/R/read_thermoraw.R @@ -7,10 +7,14 @@ #' must be manually installed. #' #' @name read_thermoraw -#' @param path_in path to file -#' @param path_out directory to export \code{mzML} files. +#' @param path_in Path to file. +#' @param path_out Path to directory to export \code{mzML} files. If +#' \code{path_out} isn't specified, a temp directory will be used. #' @param format_out R format. Either \code{matrix} or \code{data.frame}. #' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} or +#' \code{raw}. +#' @param verbose Logical. Whether to print output from OpenChrom to the console. #' @return A chromatogram in the format specified by \code{format_out}. #' @section Side effects: Exports chromatograms in \code{mzml format} to the #' folder specified by \code{path_out}. @@ -26,67 +30,60 @@ #' \doi{10.1021/acs.jproteome.9b00328}. #' @export read_thermoraw -read_thermoraw <- function(path_in, path_out, format_out = c("matrix", "data.frame"), - read_metadata = TRUE){ +read_thermoraw <- function(path_in, path_out = NULL, + format_out = c("matrix", "data.frame"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + verbose = getOption("verbose")){ format_out <- match.arg(format_out, c("matrix", "data.frame")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, chromconverter = "thermoraw", + raw = "raw") if(!file.exists(path_in)){ stop("File not found. Check path.") } - base <- basename(path_in) - if (missing(path_out)){ - path_out <- set_temp_directory() + if (is.null(path_out)){ + path_out <- tempdir() } - if(!file.exists(path_out)){ - stop("'path_out' not found. Make sure directory exists.") + if(!dir.exists(path_out)){ + stop("Export directory not found. Please check `path_out` argument and try again.") } configure_thermo_parser() + verbose <- switch(as.character(verbose), "TRUE" = "") if (.Platform$OS.type != "windows"){ - system2("sh", args=paste0(system.file('shell/thermofileparser.sh', package='chromConverter'), " -i=", path_in, - " -o=", path_out, " -a")) - new_path <- paste0(path_out, strsplit(base,"\\.")[[1]][1],".mzML") + system2("sh", args = paste0(system.file('shell/thermofileparser.sh', + package='chromConverter'), + " -i=", path_in, " -o=", path_out, " -a"), + stdout = verbose) if (read_metadata){ - system2("sh", args=paste0(system.file('shell/thermofileparser.sh', package='chromConverter'), " -i=", path_in, - " -o=", path_out, " -m=1")) - meta_path <- paste0(path_out, strsplit(base, "\\.")[[1]][1], "-metadata.txt") + system2("sh", args = paste0(system.file('shell/thermofileparser.sh', + package='chromConverter'), + " -i=", path_in, " -o=", path_out, " -m=1"), + stdout = verbose) } } else { - parser_path <- readLines(system.file('shell/path_parser.txt', package='chromConverter')) - shell(paste0(parser_path, " -i=", path_in, - " -o=", path_out, " -a")) - new_path <- paste(path_out, - paste0(strsplit(base,"\\.")[[1]][1],".mzML"), - sep="\\") + parser_path <- readLines(system.file('shell/path_parser.txt', + package='chromConverter')) + shell(paste0(parser_path, " -i=", path_in, + " -o=", path_out, " -a")) if (read_metadata){ shell(paste0(parser_path, " -i=", path_in, " -o=", path_out, " -m=1")) - meta_path <- paste(path_out, - paste0(strsplit(base, "\\.")[[1]][1], "-metadata.txt"), - sep = "\\") } } + base_name <- basename(path_in) + base_name <- strsplit(base_name, "\\.")[[1]][1] + new_path <- fs::path(path_out, base_name, ext = "mzML") x <- read_mzml(new_path, format_out) if (read_metadata){ - meta <- strsplit(readLines(meta_path), "=",fixed = TRUE) - meta <- do.call(rbind,meta) + meta_path <- fs::path(path_out, paste0(base_name, "-metadata"), ext = "txt") + meta <- strsplit(readLines(meta_path), "=", fixed = TRUE) + meta <- do.call(rbind, meta) rownames(meta) <- meta[,1] meta <- as.list(meta[,-1]) - x <- structure(x, instrument = c(meta$`Instrument model`, meta$`Instrument name`, meta$`Instrument serial number`), - detector = NA, - software = meta$`Software version`, - method = NA, - batch = NA, - operator = NA, - run_date = meta$`Creation date`, - sample_name = basename(meta$`RAW file path`), - sample_id = meta$`Sample id`, - vial = meta$`Sample vial`, - injection_volume = meta$`Sample injection volume`, - sample_dilution = meta$`Sample dilution factor`, - time_range = meta$`Time range`, - time_interval = meta$`Interval(msec)`, - format = "long", - parser = "chromConverter", - class = format_out) + x <- attach_metadata(x, meta, format_in = metadata_format, + format_out = format_out, data_format = "long", + source_file = path_in) } x } diff --git a/R/read_varian_peaklist.R b/R/read_varian_peaklist.R new file mode 100644 index 0000000..f3b4de5 --- /dev/null +++ b/R/read_varian_peaklist.R @@ -0,0 +1,24 @@ +#' Read 'Varian' peaklist. +#' Read peak list(s) from 'Varian MS Workstation'. +#' @param file Path to Varian peak list file. +#' @importFrom utils read.csv +#' @author Ethan Bass +#' @export + +read_varian_peaklist <- function(file){ + x <- read.csv(file, skip = 5, header = FALSE) + x$V1[x$V1 == ""] <- NA + x <- tidyr::fill(data = x, "V1", .direction = "down") + + column_names <- x[1,] + column_names[1] <- "compound" + colnames(x) <- column_names + + x <- x[-which(x$`Line#` == ""), ] + x <- x[-which(x$`Line#` == "Line#"),] + + x$Area <- as.numeric(x$Area) + x$Height <- as.numeric(x$Height) + x <- x[,-16] + x +} diff --git a/R/read_waters_arw.R b/R/read_waters_arw.R new file mode 100644 index 0000000..d7350ca --- /dev/null +++ b/R/read_waters_arw.R @@ -0,0 +1,62 @@ +#' Waters ascii (.arw) reader +#' +#' Reads 'Waters ARW' files. +#' +#' For help exporting files from Empower, you can consult the official +#' documentation: [How_to_export_3D_raw_data_from_Empower](https://support.waters.com/KB_Inf/Empower_Breeze/WKB77571_How_to_export_3D_raw_data_from_Empower_to_a_Microsoft_Excel_spreadsheet). +#' +#' @name read_waters_arw +#' @importFrom utils tail read.csv +#' @param file path to file +#' @param format_out R format. Either \code{matrix} or \code{data.frame}. +#' @param data_format Whether to return data in \code{wide} or \code{long} format. +#' @param read_metadata Whether to read metadata from file. +#' @param metadata_format Format to output metadata. Either \code{chromconverter} +#' or \code{raw}. +#' @return A chromatogram in the format specified by \code{format_out} +#' (retention time x wavelength). +#' @author Ethan Bass +#' @export + +read_waters_arw <- function(file, format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw")){ + format_out <- match.arg(format_out, c("matrix", "data.frame")) + data_format <- match.arg(data_format, c("wide", "long")) + metadata_format <- match.arg(metadata_format, c("chromconverter", "raw")) + metadata_format <- switch(metadata_format, + chromconverter = "waters_arw", raw = "raw") + x <- read.csv(file, sep = "\t", skip = 2, header = FALSE, row.names = 1) + # PDA (3D) + if (rownames(x)[1] == "Wavelength"){ + colnames(x) <- x[1,] + rm <- 1 + if (rownames(x)[2] == "Time"){ + rm <- c(rm, 2) + } + x <- x[-rm,] + if (data_format == "long"){ + x <- as.data.frame(reshape_chrom(x, data_format = "long")) + } + } else if (ncol(x) == 1){ + colnames(x) <- "Intensity" + if (data_format == "long"){ + x <- data.frame(RT = rownames(x), Intensity = x[,1]) + } + } + if (format_out == "matrix"){ + x <- as.matrix(x) + } + if (read_metadata){ + meta <- try(read_waters_metadata(file)) + if (!inherits(meta, "try-error")){ + x <- attach_metadata(x, meta, format_in = metadata_format, + format_out = format_out, + data_format = data_format, + parser = "chromConverter", + source_file = file) + } + } + x +} diff --git a/R/reshape_chroms.R b/R/reshape_chroms.R index f453bce..5341024 100644 --- a/R/reshape_chroms.R +++ b/R/reshape_chroms.R @@ -12,11 +12,11 @@ #' @return A list of chromatographic matrices in long format. #' @author Ethan Bass -reshape_chroms <- function(x, idx, sample_var = "sample", lambdas=NULL, +reshape_chroms <- function(x, idx, sample_var = "sample", lambdas = NULL, data_format, combine = TRUE, ...){ if (missing(data_format)){ - data_format <- switch(attr(x[[1]],"data_format"), - long="wide",wide="long") + data_format <- switch(attr(x[[1]], "data_format"), + long = "wide", wide = "long") } if (missing(idx)){ idx <- seq_along(x) @@ -25,7 +25,7 @@ reshape_chroms <- function(x, idx, sample_var = "sample", lambdas=NULL, if (is.null(lambdas)){ if (data_format == "wide"){ lambda.idx <- grep("lambda", colnames(x[[i]])) - lambdas <- unique(as.data.frame(x[[i]])[,lambda.idx]) + lambdas <- unique(as.data.frame(x[[i]])[, lambda.idx]) } else if (data_format == "long"){ lambdas <- colnames(x[[i]]) } @@ -46,10 +46,6 @@ reshape_chroms <- function(x, idx, sample_var = "sample", lambdas=NULL, #' @noRd reshape_chrom <- function(x, data_format, ...){ - # if (missing(data_format)){ - # data_format <- switch(attr(x[[1]],"data_format"), - # long="wide", wide="long") - # } fn <- switch(data_format, long = reshape_chrom_long, wide = reshape_chrom_wide) @@ -65,25 +61,29 @@ reshape_chrom <- function(x, data_format, ...){ #' @return A chromatographic matrix in long format. #' @author Ethan Bass #' @noRd -reshape_chrom_long <- function(x, lambdas, format_out=c("data.frame","matrix")){ +reshape_chrom_long <- function(x, lambdas, format_out = NULL){ if (!is.null(attr(x, "data_format")) && attr(x, "data_format") == "long"){ warning("The data already appear to be in long format!", immediate. = TRUE) } - if (ncol(x) == 1) - stop("The provided data is already in long format!") - format_out <- match.arg(format_out,c("data.frame","matrix")) + if (is.null(format_out)){ + format_out <- class(x)[1] + } + + format_out <- match.arg(format_out, c("data.frame", "matrix")) xx <- as.data.frame(x) - if (!missing(lambdas)){ - xx <- xx[,lambdas, drop = FALSE] + + if (ncol(x) == 1){ + data <- data.frame(RT = as.numeric(rownames(xx)), Intensity = xx[,1], + row.names = NULL) + } else { + if (!missing(lambdas)){ + xx <- xx[,lambdas, drop = FALSE] + } + data <- data.frame(tidyr::pivot_longer(data.frame(rt = rownames(xx), xx, check.names = FALSE), + cols = -c("rt"), names_to = "lambda", values_to = "intensity")) + data$rt <- as.numeric(data$rt) + data$lambda <- as.numeric(data$lambda) } - data <- reshape(as.data.frame(rt=rownames(xx), xx), direction = "long", - varying = list(1:ncol(xx)), v.names="absorbance", - times = colnames(xx), timevar = "lambda", - idvar = "rt", ids = rownames(xx)) - rownames(data) <- NULL - data$rt <- as.numeric(data$rt) - data$lambda <- as.numeric(data$lambda) - data <- data[,c(3,2,1)] if (format_out == "matrix"){ data <- as.matrix(data) } @@ -97,11 +97,10 @@ reshape_chrom_long <- function(x, lambdas, format_out=c("data.frame","matrix")){ reshape_chrom_wide <- function(x, lambdas, lambda_var = "lambda", time_var="rt", value_var = "int", drop){ if (!is.null(attr(x, "data_format")) && attr(x, "data_format") == "wide"){ - warning("The data already appear to be in wide format!",immediate. = TRUE) + warning("The data already appear to be in wide format!", immediate. = TRUE) } - x <- as.data.frame(x) if (missing(drop)){ - drop <- colnames(x)[which(sapply(x,is.character))] + drop <- colnames(x)[which(sapply(x, is.character))] } if (missing(value_var)){ value_var <- colnames(x)[grep("int|abs", colnames(x),ignore.case = TRUE)] @@ -109,12 +108,12 @@ reshape_chrom_wide <- function(x, lambdas, lambda_var = "lambda", time_var="rt", if (!missing(lambdas)){ x <- x[which(x[,lambda_var] %in% lambdas),] } - data <- reshape(x, idvar=time_var, timevar=lambda_var, v.names = value_var, - new.row.names = unique(x$rt), direction="wide", drop=drop) - colnames(data) <- gsub(paste0(value_var,"."),"", colnames(data)) - data <- as.matrix(data) - rownames(data) <- data[,1] - data <- data[,-1] + x <- as.data.frame(x) + data <- data.frame(tidyr::pivot_wider(x, id_cols = !!time_var, + names_from = !!lambda_var, + values_from = !!value_var), + row.names = time_var) + colnames(data) <- gsub("X", "", colnames(data)) data <- transfer_metadata(data, x) attr(data, "data_format") <- "wide" data diff --git a/R/utils.R b/R/utils.R index 861a3dc..352bc09 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,45 +1,117 @@ utils::globalVariables(names = c('.')) # Globals <- list() + +#' Get filetype +#' @noRd +get_filetype <- function(path, out = c("format_in", "filetype")){ + out <- match.arg(out, c("format_in", "filetype")) + f <- file(path, "rb") + on.exit(close(f)) + + magic <- readBin(f, what = "raw", n = 4) + magic <- paste(paste0("x",as.character(magic)),collapse="/") + # magic + filetype <- switch(magic, + "x01/x32/x00/x00" = "AgilentChemstationMS", + "x02/x02/x00/x00" = "AgilentMasshunterDADHeader", + # "x02/x33/x30/x00" = "AgilentChemstationMWD", + "x03/x02/x00/x00" = "AgilentMasshunterDAD", + "x02/x33/x30/x00" = "chemstation_30", + "x02/x33/x31/x00" = "chemstation_31", + "x03/x31/x33/x30" = "chemstation_130", #130 + "x03/x31/x33/x31" = "chemstation_131", #131 + "x03/x31/x37/x39" = "chemstation_179", #179 + "x02/x38/x31/x00" = "chemstation_81", #81 + "x03/x31/x38/x31" = "chemstation_181", #181 + "x01/xa1/x46/x00" = "ThermoRAW", + "xd0/xcf/x11/xe0" = "ShimadzuLCD", + "x80/x00/x01/x00" = "WatersRAW", + "x43/x44/x46/x01" = "cdf" + ) + if (is.null(filetype)){ + stop("File type not recognized. Please specify a filetype by providing an argument to `format_in` + or file an issue at `https://github.com/ethanbass/chromConverter/issues`.") + } + if (filetype == "chemstation_131"){ + seek(f, 348) + magic2 <- readBin(f, what="character", n = 2) + magic2 <- paste(magic2, collapse="") + filetype <- switch(magic2, "OL" = "openlab_131", + "LC" = "chemstation_131") + } + format_in <- switch(filetype, + "AgilentChemstationMS" = "chemstation", + "AgilentChemstationCH" = "chemstation_ch", + "AgilentChemstationFID" = "chemstation_ch", + # "chemstation_31" = "chemstation_uv", + # "chemstation_131" = "chemstation_uv", + # "openlab_131" = "chemstation_uv", + "ThermoRAW" = "thermoraw", + "ShimadzuLCD" = "shimadzu_lcd", + "WatersRAW" = "waters_raw", + filetype + ) + + switch(out, "filetype" = filetype, "format_in" = format_in) +} + #' Check parser #' @noRd check_parser <- function(format_in, parser=NULL, find = FALSE){ allowed_formats <- list(openchrom = c("msd","csd","wsd"), - chromconverter = c("chemstation_csv", "chemstation_ch", - "chemstation_fid", "chemstation_uv", - "chromeleon_uv", "mzml", + chromconverter = c("agilent_dx", "cdf", "chemstation_csv", + "chemstation_ch", "chemstation_fid", + "chemstation_uv", "chromeleon_uv", + "chemstation_30", "chemstation_31", + "chemstation_130", "chemstation_131", + "openlab_131", + "chemstation_179", "chemstation_81", + "chemstation_181", "mzml", "mdf", "shimadzu_fid", "shimadzu_dad", - "waters_arw", "mdf", "cdf"), - aston = c("chemstation", "chemstation_uv", "masshunter_dad", "other"), - entab = c("chemstation", "chemstation_ch", "chemstation_fid", - "chemstation_uv", "masshunter_dad", "thermoraw", "other"), - rainbow = c("chemstation", "chemstation_ch", "chemstation_fid", + "shimadzu_lcd", "waters_arw"), + aston = c("chemstation", "chemstation_uv", + "chemstation_131", + "masshunter_dad", "other"), + entab = c("chemstation", "chemstation_ch", + "chemstation_30", "chemstation_31", + "chemstation_131", "chemstation_fid", + "chemstation_uv", "masshunter_dad", + "thermoraw", "other"), + rainbow = c("chemstation", "chemstation_ch", + "chemstation_130","chemstation_131", + "chemstation_fid", "chemstation_179", "chemstation_uv", "waters_raw", "agilent_d"), thermoraw = c("thermoraw") ) if (find){ - possible_parsers <- names(allowed_formats)[grep(format_in, allowed_formats)] - if (all(c("aston","entab") %in% possible_parsers)){ - if (any(format_in == c("chemstation_uv", "masshunter_dad"))){ - possible_parsers <- ifelse(!requireNamespace("entab", quietly = TRUE), "aston", "entab") - } + if (!reticulate::py_module_available("aston")){ + allowed_formats <- allowed_formats[-which(names(allowed_formats) == "aston")] + } + if (!reticulate::py_module_available("rainbow")){ + allowed_formats <- allowed_formats[-which(names(allowed_formats) == "rainbow")] } - if (all(c("rainbow","aston") %in% possible_parsers)){ - possible_parsers <- "rainbow" + if (!requireNamespace("entab", quietly = TRUE)){ + allowed_formats <- allowed_formats[-which(names(allowed_formats) == "entab")] } - if (all(c("entab","thermoraw") %in% possible_parsers)){ - possible_parsers <- "thermoraw" + possible_parsers <- names(allowed_formats)[grep(format_in, allowed_formats)] + if (length(possible_parsers) > 1){ + possible_parsers <- possible_parsers[match( + c("thermoraw", "entab", "chromconverter", "rainbow", "aston"), possible_parsers)] + if (any(is.na(possible_parsers))){ + possible_parsers <- possible_parsers[-which(is.na(possible_parsers))] + } } possible_parsers[1] } else{ - if (!(format_in %in% allowed_formats[[parser]])){ - stop("Mismatched arguments!", "\n\n", "The ", paste0(sQuote(format_in), " format can be converted using the following parsers: ", - paste(sQuote(names(allowed_formats)[grep(format_in, allowed_formats)]), collapse = ", "), ". \n \n", - "The ", sQuote(parser), " parser can take the following formats as inputs: \n", - paste(sQuote(allowed_formats[[parser]]), collapse=", "), ". \n \n", - "Please double check your arguments and try again.")) - } + if (!(format_in %in% allowed_formats[[parser]])){ + stop("Mismatched arguments!", "\n\n", "The ", paste0(sQuote(format_in), " format can be converted using the following parsers: ", + paste(sQuote(names(allowed_formats)[grep(format_in, allowed_formats)]), collapse = ", "), ". \n \n", + "The ", sQuote(parser), " parser can take the following formats as inputs: \n", + paste(sQuote(allowed_formats[[parser]]), collapse=", "), ". \n \n", + "Please double check your arguments and try again.")) + } } } @@ -49,6 +121,21 @@ remove_unicode_chars <- function(x){ stringr::str_replace_all(x, "\xb5", "micro") } +#' Extract file names +#' @noRd +extract_filenames <- function(files){ + if (all(grepl("\\.[Dd]$|\\.[Dd]?[/\\\\]", files))){ + file_names <- strsplit(files, "/") + file_names <- gsub("\\.[Dd]", "", + sapply(file_names, function(n){ + ifelse(any(grepl("\\.[Dd]", n)), grep("\\.[Dd]", n, value = TRUE), tail(n,1)) + })) + } else { + file_names <- sapply(strsplit(basename(files),"\\."), function(x) x[1]) + } + file_names +} + #' Format extension #' @noRd format_to_extension <- function(format_in){ @@ -63,7 +150,8 @@ format_to_extension <- function(format_in){ "shimadzu_dad" = ".txt", "chromeleon_uv" = ".txt", "thermoraw" = ".raw", "mzml" = ".mzml", "waters_arw" = ".arw", - "waters_raw" = ".raw", "msd" = ".", "csd" =".", "wsd" =".", "mdf" = ".mdf|.MDF", "other"=".") + "waters_raw" = ".raw", "msd" = ".", "csd" =".", "wsd" =".", + "mdf" = ".mdf|.MDF", "other"=".") } #' @noRd @@ -97,13 +185,13 @@ set_temp_directory <- function(){ #' Extract header from Shimadzu ascii files #' @noRd -extract_header <- function(x, chrom.idx, sep){ - index <- chrom.idx+1 +extract_shimadzu_header <- function(x, chrom.idx, sep){ + index <- chrom.idx + 1 line <- x[index] l <- length(strsplit(x = line, split = sep)[[1]]) header <- strsplit(x = line, split = sep)[[1]] while (l > 1) { - index <- index+1 + index <- index + 1 line <- strsplit(x = x[index], split = sep)[[1]] l <- length(line) if (l == 1 | suppressWarnings(!is.na(as.numeric(line[1])))) @@ -137,59 +225,26 @@ choose_apply_fnc <- function(progress_bar, parallel = FALSE, cl = NULL){ if (progress_bar){ check_for_pkg("pbapply") fn <- pbapply::pblapply + if (!is.null(cl)){ + fn <- purrr::partial(fn, cl = cl) + } } else{ fn <- lapply } fn } -#' Transfer metadata -#'@noRd -transfer_metadata <- function (new_object, old_object, exclude = c("names", "row.names", - "class", "dim", "dimnames")) -{ - a <- attributes(old_object) - a[exclude] <- NULL - attributes(new_object) <- c(attributes(new_object), a) - new_object -} - -#' Get filetype +#' Rename list +#' @author Ethan Bass #' @noRd -get_filetype <- function(file, out = c("format_in", "filetype")){ - out <- match.arg(out, c("format_in", "filetype")) - magic <- readBin(file, what = "raw", n = 4) - magic <- paste(paste0("x",as.character(magic)),collapse="/") - # magic - filetype <- switch(magic, - "x01/x32/x00/x00" = "AgilentChemstationMS", - "x02/x02/x00/x00" = "AgilentMasshunterDADHeader", - # "x02/x33/x30/x00" = "AgilentChemstationMWD", - "x02/x33/x31/x00" = "AgilentChemstationDAD", - "x02/x38/x31/x00" = "AgilentChemstationFID", #81 - "x03/x02/x00/x00" = "AgilentMasshunterDAD", - "x03/x31/x33/x30" = "AgilentChemstationCH", #131 - "x03/x31/x33/x31" = "AgilentChemstationDAD", #131 rainbow - "x03/x31/x37/x39" = "AgilentChemstationFID", #179 - "x03/x31/x38/x31" = "AgilentChemstationFID", #181 - "x02/x33/x30/x00" = "AgilentChemstationCH", #31/30 - "x01/xa1/x46/x00" = "ThermoRAW", - "xd0/xcf/x11/xe0" = "ShimadzuLCD", - "x80/x00/x01/x00" = "WatersRAW" - ) - if (is.null(filetype)){ - stop("File type not recognized. Please specify a filetype by providing an argument to `format_in` - or file an issue at `https://github.com/ethanbass/chromConverter/issues`.") +rename_list <- function(x, new_names){ + old_names <- names(x) + names.idx <- match(names(x), new_names) + new_names <- names(new_names)[names.idx] + not_found <- which(is.na(new_names)) + if (any(not_found)){ + new_names[not_found] <- old_names[not_found] } - format_in <- switch(filetype, - "AgilentChemstationMS" = "chemstation", - "AgilentChemstationCH" = "chemstation_ch", - "AgilentChemstationFID" = "chemstation_ch", - "AgilentChemstationDAD" = "chemstation_uv", - "ThermoRAW" = "thermoraw", - "ShimadzuLCD" = "shimadzu_lcd", - "WatersRAW" = "waters_raw" - ) - - switch(out, "filetype" = filetype, "format_in" = format_in) + names(x) <- new_names + x } diff --git a/README.md b/README.md index 65941ab..703d14e 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,16 @@ chromConverter aims to facilitate the conversion of chromatography data from various proprietary formats so it can be easily read into R for further analysis. It currently consists of wrappers around file parsers from various external libraries including [Aston](https://github.com/bovee/aston), [Entab](https://github.com/bovee/entab), the [ThermoRawFileParser](https://github.com/compomics/ThermoRawFileParser), [rainbow](https://rainbow-api.readthedocs.io/), and [OpenChrom](https://lablicate.com/platform/openchrom) as well as some parsers written directly in R for (mostly) text-based formats. ### Formats + +##### ChromConverter +- Chromeleon UV ascii (`.txt`) +- mzML (`.mzml`) +- Shimadzu LabSolutions ascii (`.txt`) +- Waters ascii (`.arw`) +- 'Agilent Chemstation' & 'OpenLab' `.ch` files (versions 8, 81, 130, 179, 181) +- 'Agilent Chemstation' & 'OpenLab' `.uv` files (versions 131, 31) +- 'Shimadzu' `.lcd` (*provisional support* for PDA stream) + ##### External Libraries ###### Aston/Entab (*Entab requires separate installation, see [instructions below](README.md#Installation)*) - Agilent ChemStation (`.ch`, `.fid`, `.ms`, .`mwd`, & `.uv`) @@ -34,13 +44,6 @@ chromConverter aims to facilitate the conversion of chromatography data from var - ABSciex DAD (`.wiff`) - and many more (see full list [here](https://lablicate.com/platform/openchrom)). -##### ChromConverter -- Chromeleon UV ascii (`.txt`) -- mzML (`.mzml`) -- Shimadzu LabSolutions ascii (`.txt`) -- Waters ascii (`.arw`) (*provisional support*) -- 'Agilent Chemstation' `.ch` files (versions 8, 81, 130, 179, 181) - ### Installation chromConverter can now be installed directly from CRAN: @@ -91,17 +94,17 @@ Thermo RAW files can be converted by calling the [ThermoRawFileParser](https://g 1) Download [OpenChrom](https://lablicate.com/platform/openchrom/download) (**version 1.4.x only**) and place it into a directory of your choice. 2) If you intend to use the GUI in the future, it is recommended to make a separate copy of OpenChrom for command-line use. -3) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, you may be asked to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. Alternatively, the command-line option can be activated from R by calling `configure_openchrom(cli = "true")` or following the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to manually activate the CLI. This process can be reversed using the same function: e.g. `configure_openchrom(cli = "false"). To specify an OpenChrom executable in a non-standard location, call `configure_openchrom` with the `path` argument, e.g. `configure_openchrom(cli = "true", path="path_to_openchrom_executable"). +3) Call `read_chroms` with `parser = "openchrom"`. The first time you call the parser, you may be asked to provide the path to your local installation of OpenChrom. The path will then be saved for future use. If the command-line interface is disabled, you will be given the option to automatically activate the command-line. Alternatively, the command-line option can be activated from R by calling `configure_openchrom(cli = "true")` or following the [instructions](https://github.com/OpenChrom/openchrom/wiki/CLI) to manually activate the CLI. This process can be reversed using the same function: e.g. `configure_openchrom(cli = "false"). To specify an OpenChrom executable in a non-standard location, call `configure_openchrom` with the `path` argument, e.g. `configure_openchrom(cli = "true", path = "path_to_openchrom_executable"). ### Usage -##### `read_chroms` function +##### Importing chromatograms The workhorse of chromConverter is the `read_chroms` function, which functions as a wrapper around all of the supported parsers. To convert files, call `read_chroms`, specifying the `paths` to a vector of directories or files and the appropriate file format (`format_in`). Supported formats include `chemstation_uv`, `chemstation_csv`, `masshunter_dad`, `shimadzu_fid`, `shimadzu_dad`, `chromeleon_uv`, `thermoraw`, `mzml`, `waters_arw`, `msd`, `csd`, and `wsd`. ``` library(chromConverter) -dat <- read_chroms(path, format.in = "chemstation_uv") +dat <- read_chroms(path, format_in = "chemstation_uv") ``` The `read_chroms` function will attempt to determine an appropriate parser to use and whether you've provided a vector of directories or files. However, if you'd like to be more explicit, you can provide arguments to the `parsers` and `find_files` arguments. Setting `find_files = FALSE` will instruct the function that you are providing a vector of files, while `find_files = TRUE` implies that you are providing a vector of directories. @@ -121,11 +124,19 @@ For formats where multiple parsers are available, you can choose between them us ###### OpenChrom parsers -Parsers in OpenChrom are organized by detector-type. Thus, for the `format_in` argument, the user must specify whether the files come from a mass selective detector (`msd`), a current-selective detector like a flame-ionization detector (`csd`), or a wavelength-selective detector (`wsd`), rather than providing a specific file format. In addition, the user should specify what format they'd like to export (`export_format`). Current options include `csv`, `cdf`, `mzml`, or `animl` (the analytical information markup language). The files will then be converted by calling OpenChrom through the command-line interface. If the files are exported in `csv` format, the chromatograms will be automatically read into R. Otherwise, files will be exported to the specified folder but will not be read into the R workspace. +Parsers in OpenChrom are organized by detector-type. Thus, for the `format_in` argument, the user must specify whether the files come from a mass selective detector (`msd`), a current-selective detector like a flame-ionization detector (`csd`), or a wavelength-selective detector (`wsd`), rather than providing a specific file format. In addition, the user should specify what format they'd like to export (`export_format`). Current options include `csv`, `cdf`, `mzml`, or `animl` (the analytical information markup language). The files will then be converted by calling OpenChrom through the command-line interface. If the files are exported in `csv` or `mzml` format, the chromatograms will be automatically read into R. Otherwise, files will be exported to the specified folder but will not be read into the R workspace. ###### Extracting metadata -chromConverter includes some options to extract metadata from the provided files. If `read_metadata = TRUE`, metadata will be extracted and stored as `attributes` of the associated object. The metadata can then be extracted into a data.frame or tibble using the `extract_metadata` function. +chromConverter includes some options to extract metadata from the provided files. If `read_metadata = TRUE`, metadata will be extracted and stored as [attributes](https://stat.ethz.ch/R-manual/R-devel/library/base/html/attributes.html) of the associated object. The metadata can then be extracted into a data.frame or tibble using the `extract_metadata` function. + +##### Importing peak lists + +The `read_peak` list function can be used to import peak lists from 'Chemstation' or 'Shimadzu' ascii files. The syntax is similar to `read_chroms`. In the simplest case, you can just provide paths to the files or directory you want to read in along with the format (`format_in`), e.g. + +``` +pks <- read_chroms(, format_in = "chemstation") +``` ### Further analysis diff --git a/inst/CITATION b/inst/CITATION index 88afb15..445a7b9 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,7 +5,7 @@ citEntry( title = "chromConverter: chromatographic file converter", author = "Ethan Bass", year = "2023", - version = "version 0.4.3", + version = "version 0.5.0", doi = "10.5281/zenodo.6792521", url = "https://ethanbass.github.io/chromConverter/", textVersion = paste("Bass, E. (2023).", diff --git a/man/call_entab.Rd b/man/call_entab.Rd index bc0cf10..342dc3f 100644 --- a/man/call_entab.Rd +++ b/man/call_entab.Rd @@ -7,9 +7,10 @@ call_entab( file, data_format = c("wide", "long"), - format_in = "", + format_in = NULL, format_out = c("matrix", "data.frame"), - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") ) } \arguments{ @@ -22,6 +23,9 @@ call_entab( \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} \item{read_metadata}{Whether to read metadata from file.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/call_openchrom.Rd b/man/call_openchrom.Rd index 01869c9..26e1d17 100644 --- a/man/call_openchrom.Rd +++ b/man/call_openchrom.Rd @@ -6,10 +6,12 @@ \usage{ call_openchrom( files, - path_out, + path_out = NULL, format_in, - export_format = c("csv", "cdf", "mzml", "animl"), - return_paths = FALSE + format_out = c("matrix", "data.frame"), + export_format = c("mzml", "csv", "cdf", "animl"), + return_paths = FALSE, + verbose = getOption("verbose") ) } \arguments{ @@ -19,9 +21,15 @@ call_openchrom( \item{format_in}{Either \code{msd} for mass spectrometry data, \code{csd} for flame ionization data, or \code{wsd} for DAD/UV data.} -\item{export_format}{Either \code{csv}, \code{cdf}, \code{mzml}, \code{animl}.} +\item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} -\item{return_paths}{Logical. If TRUE, the function will return a character vector of paths to the newly created files.} +\item{export_format}{Either \code{mzml}, \code{csv}, \code{cdf}, \code{animl}. +Defaults to \code{mzml}.} + +\item{return_paths}{Logical. If TRUE, the function will return a character +vector of paths to the newly created files.} + +\item{verbose}{Logical. Whether to print output from OpenChrom to the console.} } \value{ If \code{return_paths} is TRUE, the function will return a vector of paths to the newly created files. @@ -31,7 +39,10 @@ of chromatograms in \code{data.frame} format. Otherwise, it will not return anyt \description{ Writes \code{xml} batch-files and calls OpenChrom file parsers using a system call to the command-line interface. To use this function -\href{https://lablicate.com/platform/openchrom}{OpenChrom} must be manually installed. +\href{https://lablicate.com/platform/openchrom}{OpenChrom} (version 0.4.0) must be +manually installed. The command line interface is no longer supported in the +latest versions of OpenChrom (starting with version 0.5.0), so the function +will not work with these new versions. } \details{ The \code{call_openchrom} works by creating an \code{xml} batchfile and diff --git a/man/configure_olefile.Rd b/man/configure_olefile.Rd new file mode 100644 index 0000000..4eef869 --- /dev/null +++ b/man/configure_olefile.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_shimadzu_lcd.R +\name{configure_olefile} +\alias{configure_olefile} +\title{Configure olefile} +\usage{ +configure_olefile(return_boolean = FALSE) +} +\arguments{ +\item{return_boolean}{Logical. Whether to return a Boolean value indicating +if the chromConverter environment is correctly configured.} +} +\value{ +If \code{return_boolean} is \code{TRUE}, returns a Boolean value +indicating whether the chromConverter environment is configured correctly. +Otherwise, there is no return value. +} +\description{ +Configures reticulate to use olefile. Olefile is required to use the 'Shimadzu' +LCD parser. +} +\author{ +Ethan Bass +} diff --git a/man/read_agilent_dx.Rd b/man/read_agilent_dx.Rd new file mode 100644 index 0000000..04f7604 --- /dev/null +++ b/man/read_agilent_dx.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_chemstation_ch.R +\name{read_agilent_dx} +\alias{read_agilent_dx} +\title{Parser for reading Agilent ('.dx') files into R} +\usage{ +read_agilent_dx( + path, + path_out = NULL, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE +) +} +\arguments{ +\item{path}{Path to \code{.dx} file.} + +\item{path_out}{Path to directory to export unzipped files.} + +\item{format_out}{Matrix or data.frame.} + +\item{data_format}{Whether to return data in \code{wide} or \code{long} format.} + +\item{read_metadata}{Logical. Whether to attach metadata.} +} +\value{ +A chromatogram in the format specified by \code{format_out} +(retention time x wavelength). +} +\description{ +Parser for reading Agilent ('.dx') files into R +} +\author{ +Ethan Bass +} diff --git a/man/read_chemstation_ch.Rd b/man/read_chemstation_ch.Rd index 576f681..2cf355d 100644 --- a/man/read_chemstation_ch.Rd +++ b/man/read_chemstation_ch.Rd @@ -8,7 +8,8 @@ read_chemstation_ch( path, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") ) } \arguments{ @@ -19,6 +20,9 @@ read_chemstation_ch( \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} \item{read_metadata}{Logical. Whether to attach metadata.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/read_chemstation_csv.Rd b/man/read_chemstation_csv.Rd index 4840619..c971af9 100644 --- a/man/read_chemstation_csv.Rd +++ b/man/read_chemstation_csv.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parsers.R +% Please edit documentation in R/read_chemstation_csv.R \name{read_chemstation_csv} \alias{read_chemstation_csv} -\title{Chemstation CSV reader} +\title{Read Chemstation CSV} \usage{ read_chemstation_csv(file, format_out = c("matrix", "data.frame")) } @@ -16,7 +16,7 @@ A chromatogram in the format specified by \code{format_out} (retention time x wavelength). } \description{ -Chemstation CSV reader +Read Chemstation CSV } \author{ Ethan Bass diff --git a/man/read_chemstation_reports.Rd b/man/read_chemstation_reports.Rd new file mode 100644 index 0000000..4c6010b --- /dev/null +++ b/man/read_chemstation_reports.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_chemstation_report.R +\name{read_chemstation_reports} +\alias{read_chemstation_reports} +\title{Read Agilent Chemstation Reports} +\usage{ +read_chemstation_reports( + files, + data_format = c("chromatographr", "original"), + metadata_format = c("chromconverter", "raw") +) +} +\arguments{ +\item{files}{Paths to Chemstation report files.} + +\item{data_format}{Format to output data. Either \code{chromatographr} or +\code{chemstation}.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} or +\code{raw}.} +} +\description{ +Read Agilent Chemstation Reports +} +\author{ +Ethan Bass +} diff --git a/man/read_chemstation_uv.Rd b/man/read_chemstation_uv.Rd index 8a4683d..ac278ab 100644 --- a/man/read_chemstation_uv.Rd +++ b/man/read_chemstation_uv.Rd @@ -2,30 +2,34 @@ % Please edit documentation in R/read_chemstation_uv.R \name{read_chemstation_uv} \alias{read_chemstation_uv} -\title{Parser for reading Agilent UV (.uv) files into R} +\title{Read 'Chemstation' DAD files} \usage{ read_chemstation_uv( path, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") ) } \arguments{ -\item{path}{Path to \code{.uv} file} +\item{path}{Path to \code{.uv} file.} -\item{format_out}{Matrix or data.frame} +\item{format_out}{Matrix or data.frame.} \item{data_format}{Either \code{wide} (default) or \code{long}.} \item{read_metadata}{Logical. Whether to attach metadata.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} } \value{ A chromatogram in the format specified by \code{format_out} (retention time x wavelength). } \description{ -Parser for reading Agilent UV (.uv) files into R +Parser for reading Agilent UV (.uv) files into R. } \note{ This function was adapted from the parser in the rainbow project diff --git a/man/read_chromeleon.Rd b/man/read_chromeleon.Rd index d5a8dc4..4066b8b 100644 --- a/man/read_chromeleon.Rd +++ b/man/read_chromeleon.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parsers.R +% Please edit documentation in R/read_chromeleon.R \name{read_chromeleon} \alias{read_chromeleon} \title{Chromeleon ASCII reader} @@ -8,7 +8,8 @@ read_chromeleon( file, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") ) } \arguments{ @@ -19,6 +20,9 @@ read_chromeleon( \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} \item{read_metadata}{Whether to read metadata from file.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} or +\code{raw}.} } \value{ A chromatogram in the format specified by \code{format_out}. diff --git a/man/read_chroms.Rd b/man/read_chroms.Rd index fd70245..a0261a4 100644 --- a/man/read_chroms.Rd +++ b/man/read_chroms.Rd @@ -7,10 +7,10 @@ read_chroms( paths, find_files, - format_in = c("agilent_d", "chemstation", "chemstation_uv", "chemstation_csv", - "chemstation_ch", "chemstation_fid", "masshunter_dad", "shimadzu_fid", - "shimadzu_dad", "chromeleon_uv", "thermoraw", "mzml", "waters_arw", "waters_raw", - "msd", "csd", "wsd", "mdf", "other"), + format_in = c("agilent_d", "agilent_dx", "chemstation", "chemstation_fid", + "chemstation_ch", "chemstation_csv", "chemstation_uv", "masshunter_dad", + "chromeleon_uv", "shimadzu_fid", "shimadzu_dad", "shimadzu_lcd", "thermoraw", "mzml", + "waters_arw", "waters_raw", "msd", "csd", "wsd", "mdf", "other"), pattern = NULL, parser = c("", "chromconverter", "aston", "entab", "thermoraw", "openchrom", "rainbow"), format_out = c("matrix", "data.frame"), @@ -19,7 +19,10 @@ read_chroms( path_out = NULL, export_format = c("csv", "chemstation_csv", "cdf", "mzml", "animl"), read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), progress_bar, + cl = 1, + verbose = getOption("verbose"), sample_names = NULL, dat = NULL ) @@ -32,12 +35,12 @@ the function with a folder or vector of folders containing the files. Otherwise, set to\code{FALSE}.} \item{format_in}{Format of files to be imported/converted. Current options -include: \code{chemstation_uv}, \code{chemstation}, \code{chemstation_ch}, -\code{chemstation_csv}, \code{masshunter}, \code{masshunter_dad}, -\code{shimadzu_fid}, \code{shimadzu_dad}, \code{chromeleon_uv}, -\code{agilent_d}, \code{thermoraw}, \code{mzml}, \code{cdf}, \code{mdf}, -\code{waters_arw}, \code{waters_raw}, \code{msd}, \code{csd}, \code{wsd}, -or \code{other}.} +include: \code{agilent_d}, \code{agilent_dx}, \code{chemstation}, +\code{chemstation_uv}, \code{chemstation_ch}, \code{chemstation_csv}, +\code{masshunter}, \code{masshunter_dad}, \code{chromeleon_uv}, +\code{shimadzu_fid}, \code{shimadzu_dad}, \code{thermoraw}, +\code{waters_arw}, \code{waters_raw}, \code{mzml}, \code{cdf}, \code{mdf}, +\code{msd}, \code{csd}, \code{wsd}, or \code{other}.} \item{pattern}{pattern (e.g. a file extension). Defaults to NULL, in which case file extension will be deduced from \code{format_in}.} @@ -65,9 +68,19 @@ and \code{animl}.} \item{read_metadata}{Logical, whether to attach metadata (if it's available). Defaults to TRUE.} +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} + \item{progress_bar}{Logical. Whether to show progress bar. Defaults to \code{TRUE} if \code{\link[pbapply]{pbapply}} is installed.} +\item{cl}{Argument to \code{\link[pbapply]{pbapply}} specifying the number +of clusters to use or a cluster object created by +\code{\link[parallel]{makeCluster}}. Defaults to 1.} + +\item{verbose}{Logical. Whether to print output from external parsers to the +R console.} + \item{sample_names}{An optional character vector of sample names. Otherwise sample names default to the basename of the specified files.} @@ -88,7 +101,7 @@ parsers from \href{https://github.com/bovee/aston}{Aston}, } \details{ Provides a general interface to chromConverter parsers. Currently recognizes -'Agilent ChemStation' (\code{.uv}, \code{.ch}), 'MassHunter' (\code{.dad}) +'Agilent ChemStation' (\code{.uv}, \code{.ch}, \code{.dx}), 'MassHunter' (\code{.dad}) files, 'Thermo RAW' (\code{.raw}), 'Waters ARW' (\code{.arw}), 'Waters RAW' (\code{.raw}), 'Chromeleon ASCII' (\code{.txt}), 'Shimadzu ASCII' (\code{.txt}). Also, wraps Openchrom parsers, which include many additional diff --git a/man/read_mzml.Rd b/man/read_mzml.Rd index 74cdf1b..848b1ce 100644 --- a/man/read_mzml.Rd +++ b/man/read_mzml.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parsers.R +% Please edit documentation in R/read_mzml.R \name{read_mzml} \alias{read_mzml} \title{Extract data from mzML files} diff --git a/man/read_peaklist.Rd b/man/read_peaklist.Rd new file mode 100644 index 0000000..e2b6f2a --- /dev/null +++ b/man/read_peaklist.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_peaklist.R +\name{read_peaklist} +\alias{read_peaklist} +\title{Read peak lists} +\usage{ +read_peaklist( + paths, + find_files, + format_in = c("chemstation", "shimadzu_fid", "shimadzu_dad"), + pattern = NULL, + data_format = c("chromatographr", "original"), + metadata_format = c("chromconverter", "raw"), + read_metadata = TRUE, + progress_bar, + cl = 1 +) +} +\arguments{ +\item{paths}{paths to files or folders containing files.} + +\item{find_files}{Logical. Set to \code{TRUE} (default) if you are providing +the function with a folder or vector of folders containing the files. +Otherwise, set to\code{FALSE}.} + +\item{format_in}{Format of files to be imported/converted. Current options +include: \code{chemstation} or \code{shimadzu}.} + +\item{pattern}{pattern (e.g. a file extension). Defaults to NULL, in which +case file extension will be deduced from \code{format_in}.} + +\item{data_format}{Whether to output data in wide or long format. Either +\code{wide} or \code{long}.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} or +\code{raw}.} + +\item{read_metadata}{Logical, whether to attach metadata (if it's available). +Defaults to TRUE.} + +\item{progress_bar}{Logical. Whether to show progress bar. Defaults to +\code{TRUE} if \code{\link[pbapply]{pbapply}} is installed.} + +\item{cl}{Argument to \code{\link[pbapply]{pbapply}} specifying the number +of clusters to use or a cluster object created by +\code{\link[parallel]{makeCluster}}. Defaults to 1.} +} +\value{ +A list of chromatograms in \code{matrix} or \code{data.frame} format, +according to the value of \code{format_out}. +} +\description{ +Reads peak lists from specified folders or vector of paths. +} +\examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +path <- "tests/testthat/testdata/dad1.uv" +chr <- read_chroms(path, find_files = FALSE, format_in = "chemstation_uv") +\dontshow{\}) # examplesIf} +} +\author{ +Ethan Bass +} diff --git a/man/read_shimadzu.Rd b/man/read_shimadzu.Rd index f149ea6..e814f83 100644 --- a/man/read_shimadzu.Rd +++ b/man/read_shimadzu.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parsers.R +% Please edit documentation in R/read_shimadzu_ascii.R \name{read_shimadzu} \alias{read_shimadzu} \title{Shimadzu ascii reader} @@ -9,8 +9,10 @@ read_shimadzu( format_in, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), + peaktable_format = c("chromatographr", "original"), what = "chromatogram", - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") ) } \arguments{ @@ -22,10 +24,16 @@ read_shimadzu( \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} +\item{peaktable_format}{Whether to return peak tables in \code{chromatographr} or +\code{original} format.} + \item{what}{Whether to extract \code{chromatogram} and/or \code{peak_table}. Accepts multiple arguments.} \item{read_metadata}{Whether to read metadata from file.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} or +\code{raw}.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/read_shimadzu_lcd.Rd b/man/read_shimadzu_lcd.Rd new file mode 100644 index 0000000..b929954 --- /dev/null +++ b/man/read_shimadzu_lcd.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_shimadzu_lcd.R +\name{read_shimadzu_lcd} +\alias{read_shimadzu_lcd} +\title{Shimadzu LCD parser} +\usage{ +read_shimadzu_lcd( + path, + format_out = c("matrix", "data.frame"), + data_format = c("wide", "long"), + read_metadata = TRUE +) +} +\arguments{ +\item{path}{Path to LCD file.} + +\item{format_out}{Matrix or data.frame.} + +\item{data_format}{Either \code{wide} (default) or \code{long}.} + +\item{read_metadata}{Logical. Whether to attach metadata.} +} +\description{ +Read 3D PDA data stream from 'Shimadzu' LCD files. +} +\details{ +A parser to read PDA data from 'Shimadzu' \code{.lcd} files. LCD files are +encoded as 'Microsoft' OLE documents. The parser relies on the +\href{https://pypi.org/project/olefile/}{olefile} package in Python to unpack the +files. The PDA data is encoded in a stream called \code{PDA 3D Raw Data:3D Data Item}. +The PDA data stream contains a segment for each retention time, beginning +with a 24-byte header. + +The 24 byte header consists of the following fields: +\itemize{ +\item 4 bytes: segment label (\code{17234}). +\item 4 bytes: ??? +\item 4 bytes: Little-endian integer specifying the number of wavelength values +in the segment. +\item 4 bytes: Little-endian integer specifying the total number of bytes in the segment. +\item 8 bytes of \code{00}s +} + +Each segment is divided into two sub-segments, which begin and end with an +integer specifying the length of the sub-segment in bytes. All known values +in this data stream are little-endian and the data are delta-encoded. The +first hexadecimal digit of each value is a sign digit +specifying the number of bytes in the delta and whether the value is positive +or negative. The sign digit represents the number of hexadecimal digits used +to encode each value. Even numbered sign digits correspond to positive deltas, +whereas odd numbers indicate negative deltas. Positive values are encoded as +little-endian integers, while negative values are encoded as two's +complements. The value at each position is derived by subtracting the delta +from the previous value. +} +\note{ +This parser is experimental and may still +need some work. It is not yet able to interpret much metadata from the files. +} +\author{ +Ethan Bass +} diff --git a/man/read_thermoraw.Rd b/man/read_thermoraw.Rd index 7a08f73..17755d4 100644 --- a/man/read_thermoraw.Rd +++ b/man/read_thermoraw.Rd @@ -6,19 +6,27 @@ \usage{ read_thermoraw( path_in, - path_out, + path_out = NULL, format_out = c("matrix", "data.frame"), - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw"), + verbose = getOption("verbose") ) } \arguments{ -\item{path_in}{path to file} +\item{path_in}{Path to file.} -\item{path_out}{directory to export \code{mzML} files.} +\item{path_out}{Path to directory to export \code{mzML} files. If +\code{path_out} isn't specified, a temp directory will be used.} \item{format_out}{R format. Either \code{matrix} or \code{data.frame}.} \item{read_metadata}{Whether to read metadata from file.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} or +\code{raw}.} + +\item{verbose}{Logical. Whether to print output from OpenChrom to the console.} } \value{ A chromatogram in the format specified by \code{format_out}. diff --git a/man/read_varian_peaklist.Rd b/man/read_varian_peaklist.Rd index fb37daa..410d0f6 100644 --- a/man/read_varian_peaklist.Rd +++ b/man/read_varian_peaklist.Rd @@ -1,9 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/read_peaklist.R +% Please edit documentation in R/read_varian_peaklist.R \name{read_varian_peaklist} \alias{read_varian_peaklist} -\title{Read varian peaklist. -Read peak list from 'Varian MS Workstation'.} +\title{Read 'Varian' peaklist. +Read peak list(s) from 'Varian MS Workstation'.} \usage{ read_varian_peaklist(file) } @@ -11,8 +11,8 @@ read_varian_peaklist(file) \item{file}{Path to Varian peak list file.} } \description{ -Read varian peaklist. -Read peak list from 'Varian MS Workstation'. +Read 'Varian' peaklist. +Read peak list(s) from 'Varian MS Workstation'. } \author{ Ethan Bass diff --git a/man/read_waters_arw.Rd b/man/read_waters_arw.Rd index 24c25b8..57bc7b4 100644 --- a/man/read_waters_arw.Rd +++ b/man/read_waters_arw.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/parsers.R +% Please edit documentation in R/read_waters_arw.R \name{read_waters_arw} \alias{read_waters_arw} \title{Waters ascii (.arw) reader} @@ -8,7 +8,8 @@ read_waters_arw( file, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") ) } \arguments{ @@ -19,6 +20,9 @@ read_waters_arw( \item{data_format}{Whether to return data in \code{wide} or \code{long} format.} \item{read_metadata}{Whether to read metadata from file.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} } \value{ A chromatogram in the format specified by \code{format_out} diff --git a/man/sp_converter.Rd b/man/sp_converter.Rd index 4109224..7863de9 100644 --- a/man/sp_converter.Rd +++ b/man/sp_converter.Rd @@ -8,7 +8,8 @@ sp_converter( file, format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") ) } \arguments{ @@ -20,6 +21,9 @@ sp_converter( \item{read_metadata}{Logical. Whether to read metadata and attach it to the chromatogram.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} } \value{ A chromatogram in \code{data.frame} format (retention time x wavelength). diff --git a/man/uv_converter.Rd b/man/uv_converter.Rd index 9062984..679a3a0 100644 --- a/man/uv_converter.Rd +++ b/man/uv_converter.Rd @@ -9,7 +9,8 @@ uv_converter( format_out = c("matrix", "data.frame"), data_format = c("wide", "long"), correction = TRUE, - read_metadata = TRUE + read_metadata = TRUE, + metadata_format = c("chromconverter", "raw") ) } \arguments{ @@ -24,6 +25,9 @@ TRUE.} \item{read_metadata}{Logical. Whether to read metadata and attach it to the chromatogram.} + +\item{metadata_format}{Format to output metadata. Either \code{chromconverter} +or \code{raw}.} } \value{ A chromatogram in \code{data.frame} format (retention time x wavelength). diff --git a/tests/testthat/helpers.R b/tests/testthat/helpers.R index 1e34fe3..feaf331 100644 --- a/tests/testthat/helpers.R +++ b/tests/testthat/helpers.R @@ -3,8 +3,7 @@ elementwise.all.equal <- Vectorize(function(x, y, ...) {isTRUE(all.equal(x, y, ...))}) # helper function to skip tests if we don't have the right python dependencies -skip_if_missing_dependecies <- function() { - reqs <- c("scipy","numpy", "aston", "pandas") +skip_if_missing_dependecies <- function(reqs = c("scipy","numpy", "aston", "pandas", "olefile")) { have_reqs <- sapply(reqs, reticulate::py_module_available) if (mean(have_reqs) < 1) skip(paste("required packages", reqs[!have_reqs], @@ -12,9 +11,28 @@ skip_if_missing_dependecies <- function() { } skip_if_missing_thermorawfileparser <- function() { - reqs <- c("scipy","numpy", "aston", "pandas") - have_reqs <- sapply(reqs, py_module_available) + if (.Platform$OS.type != "windows"){ + path <- readLines(system.file("shell/thermofileparser.sh", package = "chromConverter"))[2] + path <- strsplit(path," ")[[1]][2] + } else { + path <- readLines(system.file("shell/path_parser.txt", package = "chromConverter")) + path <- gsub("\\\\", "/", path) + } + have_reqs <- fs::file_exists(path) if (mean(have_reqs) < 1) - skip(paste("required packages", reqs[!have_reqs], - "not available for testing")) + skip("ThermoRawFileParser could not be found.") +} + +skip_if_missing_openchrom <- function() { + path_openchrom <- readLines(system.file('shell/path_to_openchrom_commandline.txt', + package='chromConverter')) + if (file.exists(path_openchrom)){ + have_openchrom_cli <- switch(configure_openchrom(cli="status"), true = TRUE, + false = FALSE) + } else{ + have_openchrom_cli <- FALSE + } + if (!have_openchrom_cli){ + skip("OpenChrom could not be found.") + } } diff --git a/tests/testthat/test-extra.R b/tests/testthat/test-extra.R new file mode 100644 index 0000000..6f152b0 --- /dev/null +++ b/tests/testthat/test-extra.R @@ -0,0 +1,298 @@ + +test_that("read_chroms can read 'Agilent Chemstation' version 30 files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chemstation_30.ch", + package = "chromConverterExtraTests") + + x <- read_chroms(path, progress_bar = FALSE, parser="chromconverter") + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(38405, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") + + x1 <- read_chroms(path, progress_bar = FALSE, format_out = "data.frame", + data_format = "long", parser = "chromconverter") + expect_equal(class(x1[[1]])[1], "data.frame") + expect_equal(as.numeric(rownames(x[[1]])), x1[[1]][,1]) + expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) +}) + +test_that("read_chroms can read 'Agilent Chemstation' 31 files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + skip_if_not_installed("entab") + path <- system.file("chemstation_31.uv", package = "chromConverterExtraTests") + x <- read_chroms(path, progress_bar = FALSE, parser = "chromconverter")[[1]] + x1 <- read_chroms(path, progress_bar = FALSE, parser = "entab")[[1]] + + expect_equal(class(x)[1], "matrix") + expect_equal(class(x1)[1], "matrix") + + expect_equal(dim(x), c(27659, 176)) + expect_equal(dim(x1), c(27659, 177)) + + expect_equal(attr(x1, "parser"), "entab") + expect_equal(attr(x, "parser"), "chromconverter") +}) + + +test_that("read_chroms can read 'Agilent Chemstation' version 81 files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chemstation_81.ch", + package = "chromConverterExtraTests") + + x <- read_chroms(path, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(2699, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") + + x1 <- read_chroms(path, progress_bar = FALSE, + format_out = "data.frame", data_format = "long") + expect_equal(class(x1[[1]])[1], "data.frame") + expect_equal(dim(x1[[1]]), c(2699, 2)) + expect_equal(as.numeric(rownames(x[[1]])), x1[[1]][,1]) + expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) +}) + +test_that("read_chroms can read 'Agilent Chemstation' version 130 files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chemstation_130.ch", + package = "chromConverterExtraTests") + x <- read_chroms(path, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(12750, 1)) + x <- read_chroms(path, data_format = "long", format_out = "data.frame", + progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "data.frame") + expect_equal(dim(x[[1]]), c(12750, 2)) +}) + + +test_that("read_chroms can read 'Agilent Chemstation' 179 files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chemstation_179.ch", + package = "chromConverterExtraTests") + + x <- read_chroms(path, progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(10000, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") + + x1 <- read_chroms(path, progress_bar = FALSE, + format_out = "data.frame", data_format = "long") + expect_equal(class(x1[[1]])[1], "data.frame") + expect_equal(as.numeric(rownames(x[[1]])), x1[[1]][,1]) + expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) +}) + +test_that("read_chroms can read 'Agilent Masshunter' dad files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + skip_if_not_installed("entab") + + path <- system.file("masshunter.d/AcqData/DAD1.sp", + package = "chromConverterExtraTests") + x <- read_chroms(path, format_in = "masshunter_dad", progress_bar = FALSE) + x1 <- read_chroms(path, format_in = "masshunter_dad", parser = "aston", + progress_bar = FALSE) + expect_equal(dim(x[[1]]), c(240, 276)) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(x[[1]], x1[[1]], ignore_attr = TRUE) + expect_equal(attr(x[[1]], "parser"), "entab") + expect_equal(attr(x1[[1]], "parser"), "aston") + + x <- read_chroms(path, format_in = "masshunter_dad", parser = "entab", + data_format = "long", format_out = "data.frame", + progress_bar = FALSE) + x1 <- read_chroms(path, format_in = "masshunter_dad", parser = "aston", + data_format = "long", format_out = "data.frame", + progress_bar = FALSE) + expect_equal(dim(x[[1]]), c(66240, 3)) + expect_equal(class(x[[1]]), "data.frame") + # expect_equal(x[[1]], x1[[1]], ignore_attr = TRUE) + expect_equal(attr(x[[1]], "parser"), "entab") + expect_equal(attr(x1[[1]], "parser"), "aston") +}) + +test_that("read_chroms can read 'Waters ARW' PDA files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("waters_pda.arw", package = "chromConverterExtraTests") + + x <- read_chroms(path, format_in = "waters_arw", progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(6001, 489)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") + expect_equal(attr(x[[1]], "data_format"), "wide") + + x1 <- read_chroms(path, format_in = "waters_arw", progress_bar = FALSE, + format_out = "data.frame", data_format = "long") + expect_equal(class(x1[[1]])[1], "data.frame") + expect_equal(attr(x1[[1]], "data_format"), "long") + # expect_equal(dim(x1[[1]])) +}) + +test_that("read_chroms can read 'Chromeleon' comma-separated files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chromeleon_comma.txt", + package = "chromConverterExtraTests") + + x <- read_chroms(path, format_in = "chromeleon_uv", progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(3241, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") + expect_equal(attr(x[[1]], "data_format"), "wide") + + x1 <- read_chroms(path, format_in = "chromeleon", progress_bar = FALSE, + format_out = "data.frame", data_format = "long") + expect_equal(class(x1[[1]])[1], "data.frame") + expect_equal(as.numeric(rownames(x[[1]])), x1[[1]][,1]) + expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) + expect_equal(attr(x1[[1]], "data_format"), "long") +}) + +test_that("read_chroms can read 'Chromeleon' period-separated files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("chromeleon_period.txt", + package = "chromConverterExtraTests") + + x <- read_chroms(path, format_in = "chromeleon", progress_bar = FALSE) + expect_equal(class(x[[1]])[1], "matrix") + expect_equal(dim(x[[1]]), c(10, 1)) + expect_equal(attr(x[[1]], "parser"), "chromconverter") + + x1 <- read_chroms(path, format_in = "chromeleon", progress_bar = FALSE, + format_out = "data.frame", data_format = "long") + expect_equal(class(x1[[1]])[1], "data.frame") + expect_equal(as.numeric(rownames(x[[1]])), x1[[1]][,1]) + expect_equal(x[[1]][,1], x1[[1]][,2], ignore_attr = TRUE) +}) + +test_that("read_peaklist can read `Shimadzu` PDA files", { + skip_on_cran() + skip_if_missing_dependecies() + skip_if_not_installed("chromConverterExtraTests") + path <- system.file("shimadzuDAD_Anthocyanin.txt", + package = "chromConverterExtraTests") + + x <- read_peaklist(path, format_in="shimadzu_dad", progress_bar = FALSE)[[1]] + expect_equal(class(x), "list") + expect_equal(length(x), 5) + expect_equal(class(x[[1]]), "data.frame") + expect_equal(dim(x[[1]]), c(133, 6)) + expect_equal(colnames(x[[1]]), c("sample", "rt", "start", "end", "area", "height")) +}) + +test_that("read_chroms can read 'Shimadzu' PDA files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("shimadzuDAD_Anthocyanin.txt", + package = "chromConverterExtraTests") + + x <- read_chroms(path, format_in = "shimadzu_dad", progress_bar = FALSE)[[1]] + expect_equal(class(x)[1], "matrix") + expect_equal(dim(x), c(4689, 328)) + expect_equal(attr(x, "parser"), "chromconverter") + expect_equal(attr(x, "data_format"), "wide") + + x1 <- read_chroms(path, format_in="shimadzu_dad", progress_bar = FALSE, + data_format = "long", format_out = "data.frame")[[1]] + expect_equal(class(x1)[1], "data.frame") + expect_equal(dim(x1), c(4689*328, 3)) + + path <- system.file("Anthocyanin.lcd", package = "chromConverterExtraTests") + x2 <- read_chroms(path, progress_bar = FALSE)[[1]] + expect_equal(dim(x2),c(4689,328)) + expect_equal(x, x2, ignore_attr = TRUE) +}) + +test_that("read_chroms can read 'Agilent' dx files", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("agilent.dx", package = "chromConverterExtraTests") + + x <- read_chroms(path, format_in = "agilent_dx", progress_bar = FALSE)[[1]] + expect_equal(class(x)[1], "matrix") + expect_equal(dim(x), c(10000, 1)) + expect_equal(attr(x, "parser"), "chromconverter") + expect_equal(attr(x, "data_format"), "wide") + + x1 <- read_chroms(path, format_in="agilent_dx", progress_bar = FALSE, + data_format = "long", format_out = "data.frame")[[1]] + expect_equal(class(x1)[1], "data.frame") + expect_equal(dim(x1), c(10000, 2)) +}) + +test_that("read_chroms can read 'Thermo' RAW files", { + skip_on_cran() + skip_on_ci() + skip_if_not_installed("chromConverterExtraTests") + skip_if_missing_thermorawfileparser() + + path <- system.file("20220404_CirA_D2_04.raw", + package = "chromConverterExtraTests") + x <- read_chroms(path, progress_bar = FALSE)[[1]] + expect_equal(class(x), "list") + expect_equal(names(x), c("MS1", "MS2", "DAD", "BPC", "TIC", "chroms", "metadata")) +}) + + +# test_that("thermoraw parser works",{ +# skip_if_not(configure_thermo_parser(check = TRUE)) +# file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" +# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) +# expect_equal(class(x[[1]])[1], "matrix") +# expect_equal(attributes(x[[1]])$instrument, "GC-2014") +# }) + +test_that("read_chroms can use 'OpenChrom' parsers", { + skip_on_cran() + skip_on_ci() + skip_if_not_installed("chromConverterExtraTests") + skip_if_missing_openchrom() + + path <- system.file("DCM1.SMS", package = "chromConverterExtraTests") + x <- read_chroms(path, format_in = "msd", progress_bar = FALSE, + verbose = FALSE, export_format = "csv")[[1]] + expect_equal(class(x)[1], "matrix") + expect_equal(dim(x), c(3032, 297)) + x <- read_chroms(path, format_in = "msd", progress_bar = FALSE, + verbose = FALSE)[[1]] + expect_equal(class(x), "list") + expect_equal(dim(x$MS1), c(469732,4)) +}) + +test_that("read_varian_peaklist function works", { + skip_on_cran() + skip_if_not_installed("chromConverterExtraTests") + + path <- system.file("varian_peaklist.csv", + package = "chromConverterExtraTests") + x <- read_varian_peaklist(path) + expect_s3_class(x, "data.frame") + expect_equal(dim(x), c(46476, 15)) +}) + +test_that("read_cdf function can read peak tables", { + skip_on_cran() + skip_if_not_installed("ncdf4") + skip_if_not_installed("chromConverterExtraTests") + path <- system.file("VARIAN1.CDF", package = "chromConverterExtraTests") + x <- read_cdf(path, what = "peak_table") + expect_s3_class(x, "data.frame") + expect_equal(dim(x), c(8,5)) +}) diff --git a/tests/testthat/test-read_chroms.R b/tests/testthat/test-read_chroms.R index 626abca..f3798bf 100644 --- a/tests/testthat/test-read_chroms.R +++ b/tests/testthat/test-read_chroms.R @@ -1,17 +1,17 @@ library(testthat) path_csv <- "testdata/dad1.csv" -path_uv <- "testdata/dad1.uv" +path_uv <- "testdata/dad1.uv" #chemstation 131 x <- read_chroms(path_csv, format_in = "chemstation_csv", progress_bar = FALSE) -test_that("aston parser works", { +test_that("aston parser can read `Agilent Chemstation` 131 files", { skip_if_missing_dependecies() paths <- rep(path_uv, 2) x1 <- read_chroms(paths, format_in = "chemstation_uv", parser = "aston", find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) - expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220.0"])) + expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][, "220.0"])) expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) expect_equal(length(x1), length(paths)) expect_equal(class(x1[[1]])[1], "matrix") @@ -23,7 +23,7 @@ x1 <- read_chroms(path_uv, format_in = "chemstation_uv", parser = "chromconverte find_files = FALSE, read_metadata = TRUE, progress_bar = FALSE) -test_that("read_chemstation_uv parser works", { +test_that("read_chemstation_uv parser can read chemstation 131 files", { expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) expect_equal(length(x1), length(path_uv)) @@ -31,7 +31,7 @@ test_that("read_chemstation_uv parser works", { expect_equal(attr(x1[[1]], "data_format"), "wide") }) -test_that ("extract_metadata function works", { +test_that("extract_metadata function works", { meta <- extract_metadata(x1) expect_equal(class(meta), "data.frame") expect_equal(nrow(meta),1) @@ -39,7 +39,7 @@ test_that ("extract_metadata function works", { expect_equal(meta$parser, attr(x1[[1]],"parser")) }) -test_that("entab parser works", { +test_that("entab parser can read `Agilent Chemstation` 131 files", { skip_on_cran() skip_if_not_installed("entab") file <- "testdata/dad1.uv" @@ -53,7 +53,7 @@ test_that("entab parser works", { expect_equal(attr(x1[[1]], "data_format"), "wide") }) -test_that("shimadzu parser works", { +test_that("Shimadzu ascii parser works", { file <- "testdata/ladder.txt" x <- read_chroms(file, format_in = "shimadzu_fid", find_files = FALSE, progress_bar = FALSE) @@ -78,10 +78,10 @@ test_that("read_mzml works", { }) test_that("get_filetype works as expected", { - expect_equal(get_filetype(path_uv), "chemstation_uv") + expect_equal(get_filetype(path_uv), "chemstation_131") }) -test_that("rainbow parser works", { +test_that("Rainbow parser can read chemstation 131 files", { skip_if_missing_dependecies() skip_on_cran() skip_on_ci() @@ -96,20 +96,20 @@ test_that("rainbow parser works", { expect_equal(attr(x1[[1]], "data_format"), "wide") }) -test_that("chemstation_ch parser works", { +test_that("chromconverter parser can read chemstation 130 files", { skip_if_missing_dependecies() skip_on_cran() - x1 <- read_chroms("testdata/dad1B.ch", progress_bar = FALSE) + x1 <- read_chroms("testdata/chemstation_130.ch", progress_bar = FALSE) # expect_equal(as.numeric(x[[1]][,1]), as.numeric(x1[[1]][,"220"])) # expect_equal(as.numeric(rownames(x[[1]])), as.numeric(rownames(x1[[1]]))) expect_equal(class(x1[[1]])[1], "matrix") - expect_equal(attr(x1[[1]], "parser"), "chromConverter") + expect_equal(attr(x1[[1]], "parser"), "chromconverter") expect_equal(attr(x1[[1]], "data_format"), "wide") expect_equal(attr(x1[[1]], "detector_unit"), "mAU") expect_equal(attr(x1[[1]], "file_version"), "130") expect_equal(ncol(x1[[1]]), 1) - x2 <- read_chroms("testdata/dad1B.ch", progress_bar = FALSE, - data_format ="long", format_out="data.frame")[[1]] + x2 <- read_chroms("testdata/chemstation_130.ch", progress_bar = FALSE, + data_format = "long", format_out = "data.frame")[[1]] expect_equal(ncol(x2), 2) expect_equal(class(x2), "data.frame") expect_equal(as.numeric(rownames(x1[[1]])), x2[,1]) @@ -119,10 +119,10 @@ test_that("read_chroms exports csvs correctly", { skip_on_cran() path_out <- tempdir(check = TRUE) on.exit(unlink(c(fs::path(path_out, "dad1", ext = "csv"), path_out))) - x1 <- read_chroms(paths = path_uv, export=TRUE, path_out = path_out, + x1 <- read_chroms(paths = path_uv, export = TRUE, path_out = path_out, export_format="csv", format_out = "data.frame", progress_bar = FALSE) - x1_out <- read.csv(fs::path(path_out, "dad1", ext="csv"), row.names=1) + x1_out <- read.csv(fs::path(path_out, "dad1", ext = "csv"), row.names = 1) expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) }) @@ -138,10 +138,48 @@ test_that("read_chroms exports cdf files correctly", { expect_equal(x1[[1]], x1_out, ignore_attr = TRUE) }) -# test_that("thermoraw parser works",{ -# skip_if_not(configure_thermo_parser(check = TRUE)) -# file <- "/Users/ethanbass/Library/CloudStorage/Box-Box/chromatography_test_files/thermo_files/small.RAW" -# x <- read_chroms(file, format_in = "thermoraw", find_files = FALSE) -# expect_equal(class(x[[1]])[1], "matrix") -# expect_equal(attributes(x[[1]])$instrument, "GC-2014") -# }) +test_that("read_peaklist can read chemstation reports", { + path <- "testdata/RUTIN2.D/Report.TXT" + x <- read_peaklist(path, format_in = "chemstation") + expect_equal(class(x[[1]]), "list") + expect_equal(class(x[[1]][[1]]), "data.frame") + expect_equal(names(x[[1]]), c("254", "320", "360", "210", "230")) + expect_equal(x[[1]][[1]][[1,"sample"]], "RUTIN2") + expect_equal(x[[1]][[1]][[1,"lambda"]], "254") + expect_equal(colnames(x[[1]][[1]]), + c("sample", "lambda", "rt", "width", "area", "height", "type")) + expect_equal(attr(x, "fit"), "chemstation") + expect_equal(attr(x, "class"), "peak_list") + x <- read_peaklist(path, format_in = "chemstation", data_format = "original") + expect_equal(class(x[[1]]), "list") + expect_equal(class(x[[1]][[1]]), "data.frame") + expect_equal(names(x[[1]]), c("254", "320", "360", "210", "230")) + expect_equal(x[[1]][[1]][[1,"sample"]], "RUTIN2") + expect_equal(x[[1]][[1]][[1,"lambda"]], "254") + expect_equal(colnames(x[[1]][[1]]), + c("sample", "lambda", "Peak #", "RetTime [min]", "Width [min]", + "Area [mAU*s]", "Height [mAU]", "Area %", "Type")) + expect_equal(attr(x, "fit"), "chemstation") + expect_equal(attr(x, "class"), "peak_list") +}) + +test_that("read_peaklist can read 'Shimadzu' fid files", { + path <- "testdata/ladder.txt" + x <- read_peaklist(path, format_in = "shimadzu_fid", progress_bar = FALSE) + expect_equal(class(x[[1]]), "list") + expect_equal(class(x[[1]][[1]]), "data.frame") + expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") + expect_equal(colnames(x[[1]][[1]]), + c("sample", "rt", "start", "end", "area", "height")) + x <- read_peaklist(path, format_in = "shimadzu_fid", data_format = "original") + expect_equal(class(x[[1]]), "list") + expect_equal(class(x[[1]][[1]]), "data.frame") + expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") + expect_equal(x[[1]][[1]][[1,"sample"]], "ladder") + expect_equal(colnames(x[[1]][[1]]), + c("sample","Peak.","R.Time","I.Time","F.Time","Area","Height", + "A.H","Conc.","Mark","ID.","Name", "k.", "Plate..", "Plate.Ht.", + "Tailing", "Resolution", "Sep.Factor", "Area.Ratio", "Height.Ratio", + "Conc...", "Norm.Conc.")) + expect_equal(attr(x, "class"), "peak_list") +}) diff --git a/tests/testthat/test_utils.R b/tests/testthat/test_utils.R index 883f8d2..f9c191f 100644 --- a/tests/testthat/test_utils.R +++ b/tests/testthat/test_utils.R @@ -8,8 +8,17 @@ test_that("check_parser works as expected", { expect_equal(check_parser(format_in = "chromeleon_uv", parser=NULL, find = TRUE), "chromconverter") expect_equal(check_parser(format_in = "waters_arw", parser=NULL, find = TRUE), "chromconverter") expect_equal(check_parser(format_in = "mzml", parser=NULL, find = TRUE), "chromconverter") - expect_equal(check_parser(format_in = "chemstation_fid", parser=NULL, find = TRUE), "chromconverter") - expect_equal(check_parser(format_in = "chemstation_ch", parser=NULL, find = TRUE), "chromconverter") + # expect_equal(check_parser(format_in = "chemstation_fid", parser = NULL, find = TRUE), "chromconverter") + # expect_equal(check_parser(format_in = "chemstation_ch", parser = NULL, find = TRUE), "chromconverter") + expect_equal(check_parser(format_in = "chemstation_130", parser=NULL, find = TRUE), "chromconverter") expect_equal(check_parser(format_in = "thermoraw", parser=NULL, find = TRUE), "thermoraw") expect_error(check_parser(format_in = "csd", parser="rainbow", find = FALSE)) }) + +test_that("check for pkg returns error for fake package", { + expect_error(check_for_pkg("made_up_package")) +}) + +test_that("get_filetype returns error for unknown file", { + expect_error(get_filetype("testdata/dad1.csv")) +}) diff --git a/tests/testthat/testdata/RUTIN2.D/Report.TXT b/tests/testthat/testdata/RUTIN2.D/Report.TXT new file mode 100644 index 0000000..cb9a375 Binary files /dev/null and b/tests/testthat/testdata/RUTIN2.D/Report.TXT differ diff --git a/tests/testthat/testdata/dad1B.ch b/tests/testthat/testdata/chemstation_130.ch similarity index 100% rename from tests/testthat/testdata/dad1B.ch rename to tests/testthat/testdata/chemstation_130.ch