Skip to content

Commit

Permalink
feat: add support for Shimadzu ver1 tables
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanbass committed Dec 21, 2024
1 parent d7faa83 commit 10792b0
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 43 deletions.
37 changes: 28 additions & 9 deletions R/olefile_utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ export_stream <- function(path, stream, path_out, remove_null_bytes = FALSE,
reticulate::py_run_string('data = st.read()')

if (missing(path_out)){
path_out <- fs::file_temp(pattern =
gsub(" ", "_", paste(c(fs::path_ext_remove(basename(path)), stream),
path_out <- fs::file_temp(pattern = gsub(" ", "_",
paste(c(fs::path_ext_remove(
basename(path)), stream),
collapse="_")))
}
if (remove_null_bytes){
Expand All @@ -41,7 +42,7 @@ export_stream <- function(path, stream, path_out, remove_null_bytes = FALSE,
check_streams <- function(path, what = c("pda", "chroms", "tic", "peaks", ""),
stream = NULL,
boolean = FALSE,
min_size = 552){
min_size = 1200){
what <- match.arg(what, c("pda", "chroms", "tic", "peaks", ""))
olefile <- reticulate::import("olefile")
ole <- olefile$OleFileIO(path)
Expand All @@ -56,7 +57,7 @@ check_streams <- function(path, what = c("pda", "chroms", "tic", "peaks", ""),
streams <- ole$listdir()
what <- switch(what, "chroms" = "Chromatogram Ch",
"tic" = "Centroid SumTIC",
"peaks" = "Peak Table")
"peaks" = "Peak Table|PT")
selected_streams <- streams[grep(what, streams)]
sizes <- sapply(selected_streams, function(x){
ole$get_size(paste0(x, collapse = "/"))})
Expand All @@ -72,21 +73,39 @@ check_streams <- function(path, what = c("pda", "chroms", "tic", "peaks", ""),
#' Check OLE stream by name
#' @noRd
check_stream <- function(path, stream = NULL,
boolean = FALSE){
boolean = FALSE, min_size = 552){
olefile <- reticulate::import("olefile")
ole <- olefile$OleFileIO(path)
python_stream <- paste0(stream, collapse="/")
pda_exists <- tryCatch(ole$get_size(python_stream), error=function(e) 0) > 0
python_stream <- paste0(stream, collapse = "/")
pda_exists <- tryCatch(ole$get_size(python_stream),
error=function(e) 0) > min_size
pda_exists
}


#' List OLE streams
#' @noRd
ole_list_streams <- function(path){
ole_list_streams <- function(path, pattern = NULL, ignore.case = FALSE,
min_size = 552){
olefile <- reticulate::import("olefile")
ole <- olefile$OleFileIO(path)
ole$listdir()
streams <- ole$listdir()
if (!is.null(pattern)){
idx <- grep(streams, pattern = pattern, ignore.case = ignore.case)
if (length(idx)==0)
return(message("No streams found matching the specified pattern."))
streams <- streams[idx]
}
if (!is.null(min_size)){
idx <- which(sapply(streams, function(stream){
check_stream(path, stream, min_size=min_size)
}))
if (length(idx)==0)
return(message(sprintf("All streams matching the specified pattern are smaller than %g bytes.",
min_size)))
streams <- streams[idx]
}
streams
}


Expand Down
67 changes: 44 additions & 23 deletions R/read_shimadzu_gcd.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,17 @@
#' floating-point numbers. The retention times can be (approximately?) derived
#' from the number of values and the sampling interval encoded in the header.
#' @param path Path to GCD file.
#' @param what What stream to get: current options are chromatograms
#' (\code{chroms}) and/or peak lists (\code{peak_table}). If a stream
#' is not specified, the function will default to \code{chroms}.
#' @param format_out Class of output. Either \code{matrix}, \code{data.frame},
#' or \code{data.table}.
#' @param data_format Either \code{wide} (default) or \code{long}.
#' @param read_metadata Logical. Whether to attach metadata.
#' @param metadata_format Format to output metadata. Either \code{chromconverter}
#' or \code{raw}.
#' @param collapse Logical. Whether to collapse lists that only contain a single
#' element.
#' @author Ethan Bass
#' @return A 2D chromatogram from the chromatogram stream in \code{matrix} or
#' \code{data.frame} format, according to the value of \code{format_out}.
Expand All @@ -36,10 +41,13 @@
#' yet able to interpret much metadata from the files.
#' @export

read_shimadzu_gcd <- function(path, format_out = c("matrix", "data.frame", "data.table"),
data_format = c("wide", "long"),
read_metadata = TRUE,
metadata_format = c("chromconverter","raw")){
read_shimadzu_gcd <- function(path, what = "chroms",
format_out = c("matrix", "data.frame",
"data.table"),
data_format = c("wide", "long"),
read_metadata = TRUE,
metadata_format = c("chromconverter","raw"),
collapse = TRUE){
format_out <- match.arg(format_out, c("matrix", "data.frame", "data.table"))
data_format <- match.arg(data_format, c("wide", "long"))
metadata_format <- match.arg(metadata_format, c("chromconverter", "raw"))
Expand All @@ -53,32 +61,45 @@ read_shimadzu_gcd <- function(path, format_out = c("matrix", "data.frame", "data
if (read_metadata){
meta <- read_sz_file_properties(path)
}
existing_streams <- check_streams(path, what = "chroms")
if (any(what == "chroms")){
existing_streams <- check_streams(path, what = "chroms")

dat <- lapply(existing_streams, function(stream){
chroms <- lapply(existing_streams, function(stream){

idx <- as.numeric(gsub("\\D", "", stream[2]))
DI <- read_sz_2DDI(path, idx = idx)
idx <- as.numeric(gsub("\\D", "", stream[2]))
DI <- read_sz_2DDI(path, idx = idx)

x <- decode_shimadzu_gcd(path, stream = stream)
x <- format_2d_chromatogram(rt = x$rt, int = x$int,
data_format = data_format,
format_out = format_out)
if (read_metadata){
x <- attach_metadata(x, c(meta,DI), format_in = metadata_format,
x <- decode_shimadzu_gcd(path, stream = stream)
x <- format_2d_chromatogram(rt = x$rt, int = x$int,
data_format = data_format,
format_out = format_out)
if (read_metadata){
x <- attach_metadata(x, c(meta,DI), format_in = metadata_format,
source_file = path, data_format = data_format,
format_out = format_out)
}
x
})
# infer times from "PDA.1.Method" stream:
# method_metadata <- read_sz_method(path,
# stream = c("GUMM_Information",
# "ShimadzuGC.1","GUC.1.METHOD"))
if (length(chroms) == 1){
chroms <- chroms[[1]]
}
}
if (any(what == "peak_table")){
peak_table <- read_sz_tables(path, format_out = format_out)
if (read_metadata){
peak_table <- attach_metadata(peak_table, meta, format_in = metadata_format,
source_file = path, data_format = data_format,
format_out = "data.frame")
}
x
})

# infer times from "PDA.1.Method" stream
# method_metadata <- read_sz_method(path, stream = c("GUMM_Information", "ShimadzuGC.1",
# "GUC.1.METHOD"))

if (length(dat) == 1){
dat <- dat[[1]]
}
dat <- mget(what, ifnotfound = NA)
null <- sapply(dat, is.null)
if (any(null)) dat <- dat[-which(sapply(dat, is.null))]
if (collapse) dat <- collapse_list(dat)
dat
}

Expand Down
96 changes: 86 additions & 10 deletions R/read_shimadzu_lcd.R
Original file line number Diff line number Diff line change
Expand Up @@ -843,7 +843,10 @@ read_sz_tables <- function(path, format_out = "data.frame"){
stop("Peak table streams could not be detected.")
}
pktab <- lapply(existing_streams, function(stream){
read_sz_table(path, stream)
tryCatch({read_sz_table(path, stream)}, error = function(e){
message(sprintf("Unable to parse `%s`.", paste(stream,collapse=", ")))
NA
})
})
names(pktab) <- sapply(existing_streams, `[[`, 2)
pktab
Expand All @@ -855,40 +858,113 @@ read_sz_table <- function(path, stream, format_out = "data.frame"){
path_raw <- export_stream(path, stream)
f <- file(path_raw, "rb")
on.exit(close(f))
magic <- readBin(f, "raw", n=4)
magic <- paste(paste0("x", as.character(magic)), collapse = "/")
read_sz_table <- switch(magic,
"x56/x45/x52/x31" = read_sz_table_v1,
read_sz_table_v0)
read_sz_table(f, format_out = format_out)
}

#' Read Shimadzu Peak Table 'VER1'
#' @noRd
read_sz_table_v1 <- function(f, format_out = "data.frame"){
rows <- readBin(f, "integer", size = 4)
readBin(f, "integer", size = 8) #skip
seek(f,0,origin = "end")
n_bytes <- seek(f,0,origin = "end")
block_len <- (n_bytes - 20)/rows
seek(f, 20)
tab <- do.call(rbind, lapply(seq_len(rows), function(i){
read_sz_table_block_v1(f, block_len)
}))
tab$ID <- ifelse(tab$ID==0,NA,tab$ID)
if (format_out == "data.frame"){
tab <- as.data.frame(tab)
}
tab
}

#' Read Shimadzu Peak Table 'VER0'
#' @noRd
read_sz_table_v0 <- function(f, format_out = "data.frame"){
seek(f,0)
rows <- readBin(f, "integer", size = 4, endian = "little")
readBin(f, "integer", size = 4) #skip
tab <- do.call(rbind, lapply(seq_len(rows), function(i){
read_sz_table_block(f)
read_sz_table_block_v0(f)
}))
if (format_out == "data.frame"){
tab <- as.data.frame(tab)
}
tab
}

#' Read Shimadzu Table Block
#' Read Shimadzu Table Block 'VER0'
#' @author Ethan Bass
#' @noRd
read_sz_table_block <- function(f){
read_sz_table_block_v0 <- function(f){
readBin(f, "integer", size = 4, endian="little")
R.time <- readBin(f, "integer", size = 4, endian="little")/60000
Area <- readBin(f, "numeric", size=8, endian="little")
readBin(f, "numeric", size=8, endian="little")
Height <- readBin(f, "numeric", size=8, endian="little")
readBin(f, "numeric", size=8, endian="little")
unknown_ints <- readBin(f, "integer", size=4, n=4, endian="little")
unknown_ints
I.time <- readBin(f, "integer", size=4, endian="little")/60000
F.time <- readBin(f, "integer", size=4, endian="little")/60000
AH <- readBin(f, "integer", size=4, endian="little")/1000
seek(f,148,"current")
seek(f, 148, "current") #skip 148 bytes
Plate.no <- readBin(f, "numeric", size=8, endian="little")
Plate.ht <- readBin(f, "numeric", size=8, endian="little")
Tailing <- readBin(f, "numeric", size=8, endian="little")
Resolution <- readBin(f, "numeric", size=8, endian="little")
Sep.factor <- readBin(f, "numeric", size=8, endian="little")
Conc.percent <- readBin(f, "numeric", size=8, endian="little")
Conc.norm <- readBin(f, "numeric", size=8, endian="little")
unknown_ints <- readBin(f, "integer", size=4, n=3, endian="little")
data.frame(R.time, Area,Height,I.time,F.time,AH,Plate.no,Plate.ht,Tailing,
Resolution,Sep.factor)
unknown_ints <- readBin(f, "integer", size=4, n=2, endian="little")
data.frame(R.time, Area, Height, I.time, F.time, AH, Plate.no, Plate.ht,
Tailing, Resolution, Sep.factor)
}

#' Read Shimadzu Table Block 'VER1'
#' @author Ethan Bass
#' @noRd
read_sz_table_block_v1 <- function(f, block_len){
readBin(f, "integer", size = 4, endian = "little")
R.time <- readBin(f, "integer", size = 4, endian="little")/60000
Area <- readBin(f, "numeric", size=8, endian="little")
readBin(f, "numeric", size=8, endian="little")
Height <- readBin(f, "numeric", size=8, endian="little")
readBin(f, "numeric", size=8, endian="little")
unknown_ints <- readBin(f, "integer", size=4, n=4, endian="little")
unknown_ints
I.time <- readBin(f, "integer", size=4, endian="little")/60000
F.time <- readBin(f, "integer", size=4, endian="little")/60000
AH <- readBin(f, "numeric", size=8, endian="little")
seek(f, 104, "current")
Conc <- readBin(f, "numeric", size=8, endian="little")
ID <- readBin(f, "integer", size=4, endian="little")
seek(f, 52, "current")
k <- readBin(f, "numeric", size=8, endian="little")
Plate.no <- readBin(f, "numeric", size=8, endian="little")
seek(f, 56, "current")
unk1 <- readBin(f, "numeric", size=8, endian="little")
seek(f, 56, "current")
Plate.ht <- readBin(f, "numeric", size=8, endian="little")
seek(f, 56, "current")
unk2 <- readBin(f, "numeric", size=8, endian="little")
seek(f, 56, "current")
Tailing <- readBin(f, "numeric", size=8, endian="little")
Resolution <- readBin(f, "numeric", size=8, endian="little")
seek(f, 48, "current")
unk3 <- readBin(f, "numeric", size=8, endian="little")
seek(f, 56, "current")
unk4 <- readBin(f, "numeric", size=8, endian="little")
Sep.factor <- readBin(f, "numeric", size=8, endian="little")
seek(f,64,"current")
Percent.conc <- readBin(f, "numeric", size=8, endian = "little")
Norm.conc <- readBin(f, "numeric", size=8, endian = "little")
seek(f, (block_len-728), "current")
data.frame(R.time, Area, Height, I.time, F.time, AH, Conc, ID, k, Plate.no, Plate.ht,
Tailing, Resolution, Sep.factor, Percent.conc, Norm.conc)
}
11 changes: 10 additions & 1 deletion man/read_shimadzu_gcd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 10792b0

Please sign in to comment.