From 5334e0af349ab42e7781b5502ed72e47da4f67ca Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Wed, 28 Aug 2024 09:09:39 +1000 Subject: [PATCH 1/8] bclconvert reports: handle missing files --- R/bclconvert.R | 74 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/R/bclconvert.R b/R/bclconvert.R index f63d36c..e362877 100644 --- a/R/bclconvert.R +++ b/R/bclconvert.R @@ -1,4 +1,4 @@ -#' bcl_convert Wf R6 Class +#' Wf bcl_convert R6 Class #' #' @description #' Contains methods for reading and processing files output from the UMCCR @@ -99,24 +99,30 @@ BclconvertReports <- R6::R6Class( "Adapter_Metrics.csv", "Demultiplex_Stats.csv", "Index_Hopping_Counts.csv", "Top_Unknown_Barcodes.csv" ) - assertthat::assert_that( - all(req_fnames %in% self$contents[["bname"]]) - ) - .read_topunknownbarcodes <- function(x) { - d <- readr::read_csv(x, col_types = "cccd") - assertthat::assert_that(all(colnames(d) == c("Lane", "index", "index2", "# Reads"))) + read_topunknownbarcodes <- function(x) { + cnames <- c("Lane", "index", "index2", "# Reads") + ctypes <- "cccd" + if (!file.exists(x)) { + return(empty_tbl(cnames, ctypes)) + } + d <- readr::read_csv(x, col_types = ctypes) + assertthat::assert_that(all(colnames(d) == cnames)) d |> rlang::set_names(c("lane", "index1", "index2", "n_reads")) |> dplyr::mutate(barcode = glue("{.data$index1}-{.data$index2}") |> as.character()) |> dplyr::select("lane", "barcode", "n_reads") } - .read_adaptermetrics <- function(x) { - d <- readr::read_csv(x, col_types = "ccccddddd") - old_nms <- c( + read_adaptermetrics <- function(x) { + cnames <- c( "Lane", "Sample_ID", "index", "index2", "R1_AdapterBases", "R1_SampleBases", "R2_AdapterBases", "R2_SampleBases", "# Reads" ) - assertthat::assert_that(all(colnames(d) == old_nms)) + ctypes <- "ccccddddd" + if (!file.exists(x)) { + return(empty_tbl(cnames, ctypes)) + } + d <- readr::read_csv(x, col_types = ctypes) + assertthat::assert_that(all(colnames(d) == cnames)) d |> dplyr::rename( index1 = "index", n_reads = "# Reads", SampleID = "Sample_ID", lane = "Lane" @@ -130,16 +136,20 @@ BclconvertReports <- R6::R6Class( "R1_SampleBases", "R2_SampleBases" ) } - .read_indexhoppingcounts <- function(x) { - d <- readr::read_csv(x, col_types = "ccccd") - old_nms <- c("Lane", "SampleID", "index", "index2", "# Reads") - assertthat::assert_that(all(colnames(d) == old_nms)) + read_indexhoppingcounts <- function(x) { + cnames <- c("Lane", "SampleID", "index", "index2", "# Reads") + ctypes <- "ccccd" + if (!file.exists(x)) { + return(empty_tbl(cnames, ctypes)) + } + d <- readr::read_csv(x, col_types = ctypes) + assertthat::assert_that(all(colnames(d) == cnames)) d |> dplyr::rename(index1 = "index", n_reads = "# Reads", lane = "Lane") |> dplyr::mutate(barcode = glue("{.data$index1}-{.data$index2}")) |> dplyr::select("lane", "SampleID", "barcode", "n_reads") } - .read_demultiplexstats <- function(x) { + read_demultiplexstats <- function(x) { nms <- tibble::tribble( ~new_nm, ~old_nm, ~class, "lane", "Lane", "c", @@ -152,12 +162,17 @@ BclconvertReports <- R6::R6Class( "mean_quality_score", "Mean Quality Score (PF)", "d" ) lookup <- tibble::deframe(nms[c("new_nm", "old_nm")]) - d <- readr::read_csv(x, col_types = nms[["class"]]) - assertthat::assert_that(all(colnames(d) == nms[["old_nm"]])) + cnames <- nms[["old_nm"]] + ctypes <- nms[["class"]] + if (!file.exists(x)) { + return(empty_tbl(cnames, ctypes)) + } + d <- readr::read_csv(x, col_types = ctypes) + assertthat::assert_that(all(colnames(d) == cnames)) d |> dplyr::rename(dplyr::all_of(lookup)) } - .read_fastqlist <- function(x) { + read_fastqlist <- function(x) { nms <- tibble::tribble( ~new_nm, ~old_nm, ~class, "rgid", "RGID", "c", @@ -168,18 +183,23 @@ BclconvertReports <- R6::R6Class( "2", "Read2File", "c" ) lookup <- tibble::deframe(nms[c("new_nm", "old_nm")]) - d <- readr::read_csv(x, col_types = readr::cols(.default = "c")) - assertthat::assert_that(all(colnames(d) == nms[["old_nm"]])) + cnames <- nms[["old_nm"]] + ctypes <- nms[["class"]] + if (!file.exists(x)) { + return(empty_tbl(cnames, ctypes)) + } + d <- readr::read_csv(x, col_types = ctypes) + assertthat::assert_that(all(colnames(d) == cnames)) d |> dplyr::rename(dplyr::all_of(lookup)) |> tidyr::pivot_longer(c("1", "2"), names_to = "read", values_to = "path") } - - am <- .read_adaptermetrics(file.path(p, "Adapter_Metrics.csv")) - ds <- .read_demultiplexstats(file.path(p, "Demultiplex_Stats.csv")) - ih <- .read_indexhoppingcounts(file.path(p, "Index_Hopping_Counts.csv")) - ub <- .read_topunknownbarcodes(file.path(p, "Top_Unknown_Barcodes.csv")) - fq <- .read_fastqlist(file.path(p, "fastq_list.csv")) + # now return all as list elements + am <- read_adaptermetrics(file.path(p, "Adapter_Metrics.csv")) + ds <- read_demultiplexstats(file.path(p, "Demultiplex_Stats.csv")) + ih <- read_indexhoppingcounts(file.path(p, "Index_Hopping_Counts.csv")) + ub <- read_topunknownbarcodes(file.path(p, "Top_Unknown_Barcodes.csv")) + fq <- read_fastqlist(file.path(p, "fastq_list.csv")) list( adapter_metrics = am, demultiplex_stats = ds, From 869db2c3ef9552c081f7fe2553a59a1e40f36100 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Wed, 28 Aug 2024 10:22:19 +1000 Subject: [PATCH 2/8] alignqc: remove aws-vault --- .pre-commit-config.yaml | 2 +- inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 748d2c9..b9d4ad3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # R specific hooks: https://github.com/lorenzwalthert/precommit repos: - repo: https://github.com/lorenzwalthert/precommit - rev: v0.4.2 + rev: v0.4.3 hooks: - id: style-files args: [--style_pkg=styler, --style_fun=tidyverse_style] diff --git a/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R b/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R index 4159a2a..9a5f75f 100755 --- a/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R +++ b/inst/rmd/umccr_workflows/alignment_qc/dl_and_tidy.R @@ -5,12 +5,9 @@ require(dracarys, include.only = "umccr_tidy") require(glue, include.only = "glue") require(here, include.only = "here") - require(rportal, include.only = c("awsvault_profile")) + require(rportal, include.only = c("portaldb_query_workflow")) } -# log into aws umccr prod account -rportal::awsvault_profile("upro") - query_workflow_alignqc <- function(start_date) { wfs <- c("wgs_alignment_qc", "wts_alignment_qc") |> shQuote() |> From 1f1040dcb6a6b3b0f1bcc92eb89f6ac0e4ae1a52 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Fri, 30 Aug 2024 23:59:35 +1000 Subject: [PATCH 3/8] bclconvert reports: add Adapter_Cycle_Metrics.csv --- R/bclconvert.R | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/R/bclconvert.R b/R/bclconvert.R index e362877..4f95a79 100644 --- a/R/bclconvert.R +++ b/R/bclconvert.R @@ -41,7 +41,8 @@ Wf_bcl_convert <- R6::R6Class( #' #' @examples #' \dontrun{ -#' b <- BclconvertReports$new(here::here("nogit/bcl_convert/WGS_TsqNano/Reports")) +#' p1 <- "240816_A01052_0220_AHM7VHDSXC/202408195d4f2fc4/Reports" +#' b <- BclconvertReports$new(here::here("nogit/bcl_convert", p1)) #' b$path #' b$contents #' d <- b$read() @@ -95,10 +96,25 @@ BclconvertReports <- R6::R6Class( #' @export read = function() { p <- self$path - req_fnames <- c( - "Adapter_Metrics.csv", "Demultiplex_Stats.csv", - "Index_Hopping_Counts.csv", "Top_Unknown_Barcodes.csv" - ) + read_adaptercyclemetrics <- function(x) { + cnames <- c( + "Lane", "Sample_ID", "index", "index2", "ReadNumber", "Cycle", + "NumClustersWithAdapterAtCycle", "% At Cycle" + ) + ctypes <- "ccccccdd" + if (!file.exists(x)) { + return(empty_tbl(cnames, ctypes)) + } + d <- readr::read_csv(x, col_types = ctypes) + assertthat::assert_that(all(colnames(d) == cnames)) + d |> + rlang::set_names( + c( + "lane", "sampleid", "index1", "index2", + "readnum", "cycle", "nclustadapt", "cycpct" + ) + ) + } read_topunknownbarcodes <- function(x) { cnames <- c("Lane", "index", "index2", "# Reads") ctypes <- "cccd" From b14f71358776fb6b3bfda42a1121b7a5d520b4dc Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Sat, 31 Aug 2024 02:13:09 +1000 Subject: [PATCH 4/8] refactor bclc reports parsers --- R/bclconvert.R | 193 +++++++++++++++++++++++++++++-------------------- 1 file changed, 114 insertions(+), 79 deletions(-) diff --git a/R/bclconvert.R b/R/bclconvert.R index 4f95a79..19c73e9 100644 --- a/R/bclconvert.R +++ b/R/bclconvert.R @@ -42,7 +42,8 @@ Wf_bcl_convert <- R6::R6Class( #' @examples #' \dontrun{ #' p1 <- "240816_A01052_0220_AHM7VHDSXC/202408195d4f2fc4/Reports" -#' b <- BclconvertReports$new(here::here("nogit/bcl_convert", p1)) +#' b <- here::here("nogit/bcl_convert", p1) |> +#' BclconvertReports$new() #' b$path #' b$contents #' d <- b$read() @@ -97,126 +98,160 @@ BclconvertReports <- R6::R6Class( read = function() { p <- self$path read_adaptercyclemetrics <- function(x) { - cnames <- c( - "Lane", "Sample_ID", "index", "index2", "ReadNumber", "Cycle", - "NumClustersWithAdapterAtCycle", "% At Cycle" + cnames <- list( + old = c( + "Lane", "Sample_ID", "index", "index2", "ReadNumber", "Cycle", + "NumClustersWithAdapterAtCycle", "% At Cycle" + ), + new = c( + "lane", "sampleid", "barcode", "readnum", "cycle", + "clustadapt_n", "cycle_pct" + ) + ) + ctypes <- list( + old = "ccccccdd", + new = "cccccdd" ) - ctypes <- "ccccccdd" if (!file.exists(x)) { - return(empty_tbl(cnames, ctypes)) + return(empty_tbl(cnames$new, ctypes$new)) } - d <- readr::read_csv(x, col_types = ctypes) - assertthat::assert_that(all(colnames(d) == cnames)) + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) d |> - rlang::set_names( - c( - "lane", "sampleid", "index1", "index2", - "readnum", "cycle", "nclustadapt", "cycpct" - ) - ) + dplyr::mutate(barcode = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("barcode", .after = "Sample_ID") |> + rlang::set_names(cnames$new) } read_topunknownbarcodes <- function(x) { - cnames <- c("Lane", "index", "index2", "# Reads") - ctypes <- "cccd" + cnames <- list( + old = c("Lane", "index", "index2", "# Reads", "% of Unknown Barcodes", "% of All Reads"), + new = c("lane", "barcode", "reads_n", "unknownbcodes_pct", "reads_pct") + ) + ctypes <- list( + old = "cccddd", + new = "ccddd" + ) if (!file.exists(x)) { - return(empty_tbl(cnames, ctypes)) + return(empty_tbl(cnames$new, ctypes$new)) } - d <- readr::read_csv(x, col_types = ctypes) - assertthat::assert_that(all(colnames(d) == cnames)) + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) d |> - rlang::set_names(c("lane", "index1", "index2", "n_reads")) |> - dplyr::mutate(barcode = glue("{.data$index1}-{.data$index2}") |> as.character()) |> - dplyr::select("lane", "barcode", "n_reads") + dplyr::mutate(barcode = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("barcode", .after = "Lane") |> + rlang::set_names(cnames$new) } read_adaptermetrics <- function(x) { - cnames <- c( - "Lane", "Sample_ID", "index", "index2", "R1_AdapterBases", - "R1_SampleBases", "R2_AdapterBases", "R2_SampleBases", "# Reads" + cnames <- list( + old = c( + "Lane", "Sample_ID", "index", "index2", "ReadNumber", + "AdapterBases", "SampleBases", "% Adapter Bases" + ), + new = c( + "lane", "sampleid", "barcode", "readnum", "adapter_bases", + "sample_bases", "adapter_bases_pct" + ) + ) + ctypes <- list( + old = "cccccddd", + new = "ccccddd" ) - ctypes <- "ccccddddd" if (!file.exists(x)) { - return(empty_tbl(cnames, ctypes)) + return(empty_tbl(cnames$new, ctypes$new)) } - d <- readr::read_csv(x, col_types = ctypes) - assertthat::assert_that(all(colnames(d) == cnames)) + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) d |> - dplyr::rename( - index1 = "index", n_reads = "# Reads", SampleID = "Sample_ID", lane = "Lane" - ) |> dplyr::mutate(barcode = ifelse( - is.na(.data$index1), NA_character_, glue("{.data$index1}-{.data$index2}") + is.na(.data$index), NA_character_, paste0(.data$index, "-", .data$index2) )) |> - dplyr::select( - "lane", "SampleID", "barcode", "n_reads", - "R1_AdapterBases", "R2_AdapterBases", - "R1_SampleBases", "R2_SampleBases" - ) + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("barcode", .after = "Sample_ID") |> + rlang::set_names(cnames$new) } read_indexhoppingcounts <- function(x) { - cnames <- c("Lane", "SampleID", "index", "index2", "# Reads") - ctypes <- "ccccd" + cnames <- list( + old = c( + "Lane", "SampleID", "index", "index2", "# Reads", + "% of Hopped Reads", "% of All Reads" + ), + new = c( + "lane", "sampleid", "barcode", + "reads_n", "reads_hopped_pct", "reads_pct" + ) + ) + ctypes <- list( + old = "ccccd", + new = "cccddd" + ) if (!file.exists(x)) { - return(empty_tbl(cnames, ctypes)) + return(empty_tbl(cnames$new, ctypes$new)) } - d <- readr::read_csv(x, col_types = ctypes) - assertthat::assert_that(all(colnames(d) == cnames)) + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) d |> - dplyr::rename(index1 = "index", n_reads = "# Reads", lane = "Lane") |> - dplyr::mutate(barcode = glue("{.data$index1}-{.data$index2}")) |> - dplyr::select("lane", "SampleID", "barcode", "n_reads") + dplyr::mutate(barcode = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("barcode", .after = "SampleID") |> + rlang::set_names(cnames$new) } read_demultiplexstats <- function(x) { - nms <- tibble::tribble( - ~new_nm, ~old_nm, ~class, - "lane", "Lane", "c", - "SampleID", "SampleID", "c", - "barcode", "Index", "c", - "n_reads", "# Reads", "d", - "n_perfect_idxreads", "# Perfect Index Reads", "d", - "n_one_mismatch_idxreads", "# One Mismatch Index Reads", "d", - "n_q30_bases", "# of >= Q30 Bases (PF)", "d", - "mean_quality_score", "Mean Quality Score (PF)", "d" + cnames <- list( + old = c( + "Lane", "SampleID", "Index", "# Reads", "# Perfect Index Reads", + "# One Mismatch Index Reads", "# Two Mismatch Index Reads", + "% Reads", "% Perfect Index Reads", "% One Mismatch Index Reads", + "% Two Mismatch Index Reads" + ), + new = c( + "lane", "sampleid", "barcode", "reads_n", "perfect_idxreads_n", + "one_mismatch_idxreads_n", "two_mismatch_idxreads_n", + "reads_pct", "perfect_idxreads_pct", + "one_mismatch_idxreads_pct", "two_mismatch_idxreads_pct" + ) + ) + ctypes <- list( + old = "cccdddddddd", + new = "cccdddddddd" ) - lookup <- tibble::deframe(nms[c("new_nm", "old_nm")]) - cnames <- nms[["old_nm"]] - ctypes <- nms[["class"]] if (!file.exists(x)) { - return(empty_tbl(cnames, ctypes)) + return(empty_tbl(cnames$new, ctypes$new)) } - d <- readr::read_csv(x, col_types = ctypes) - assertthat::assert_that(all(colnames(d) == cnames)) + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) d |> - dplyr::rename(dplyr::all_of(lookup)) + rlang::set_names(cnames$new) } read_fastqlist <- function(x) { - nms <- tibble::tribble( - ~new_nm, ~old_nm, ~class, - "rgid", "RGID", "c", - "rgsm", "RGSM", "c", - "rglb", "RGLB", "c", - "lane", "Lane", "c", - "1", "Read1File", "c", - "2", "Read2File", "c" + cnames <- list( + old = c("RGID", "RGSM", "RGLB", "Lane", "Read1File", "Read2File"), + new = c("rgid", "rgsm", "rglb", "lane", "readnum", "filepath") + ) + ctypes <- list( + old = c("cccccc"), + new = c("cccccc") ) - lookup <- tibble::deframe(nms[c("new_nm", "old_nm")]) - cnames <- nms[["old_nm"]] - ctypes <- nms[["class"]] if (!file.exists(x)) { - return(empty_tbl(cnames, ctypes)) + return(empty_tbl(cnames$new, ctypes$new)) } - d <- readr::read_csv(x, col_types = ctypes) - assertthat::assert_that(all(colnames(d) == cnames)) + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) d |> - dplyr::rename(dplyr::all_of(lookup)) |> - tidyr::pivot_longer(c("1", "2"), names_to = "read", values_to = "path") + tidyr::pivot_longer(c("Read1File", "Read2File"), names_to = "readnum", values_to = "filepath") |> + dplyr::mutate(readnum = sub("Read(.)File", "\\1", .data$readnum)) |> + rlang::set_names(cnames$new) } # now return all as list elements + ac <- read_adaptercyclemetrics(file.path(p, "Adapter_Cycle_Metrics.csv")) am <- read_adaptermetrics(file.path(p, "Adapter_Metrics.csv")) ds <- read_demultiplexstats(file.path(p, "Demultiplex_Stats.csv")) ih <- read_indexhoppingcounts(file.path(p, "Index_Hopping_Counts.csv")) ub <- read_topunknownbarcodes(file.path(p, "Top_Unknown_Barcodes.csv")) fq <- read_fastqlist(file.path(p, "fastq_list.csv")) list( + adapter_cycle_metrics = ac, adapter_metrics = am, demultiplex_stats = ds, index_hopping_counts = ih, From 51bcd0532a91a0dea0c3476dd0ebe02784016b95 Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Sat, 31 Aug 2024 20:40:07 +1000 Subject: [PATCH 5/8] refactor bclc reports parsers --- R/bclconvert.R | 381 +++++++++++++++++++-------------- man/BclconvertReports.Rd | 176 ++++++++++++++- man/Wf_bcl_convert.Rd | 2 +- man/Wf_tso_ctdna_tumor_only.Rd | 2 +- 4 files changed, 402 insertions(+), 159 deletions(-) diff --git a/R/bclconvert.R b/R/bclconvert.R index 19c73e9..d47b72c 100644 --- a/R/bclconvert.R +++ b/R/bclconvert.R @@ -37,7 +37,8 @@ Wf_bcl_convert <- R6::R6Class( #' #' @description #' Reads and writes tidy versions of files within the `Reports` directory output -#' from BCLConvert. +#' from BCLConvert v4.2.7. See the DRAGEN v4.2 documentation at +#' https://support-docs.illumina.com/SW/dragen_v42/Content/SW/DRAGEN/OutputFiles.htm. #' #' @examples #' \dontrun{ @@ -90,173 +91,243 @@ BclconvertReports <- R6::R6Class( invisible(self) }, - #' @description - #' Reads contents of `Reports` directory output by BCLConvert. + #' @description Read Adapter_Metrics.csv file. #' - #' @return A list of tibbles. - #' @export - read = function() { - p <- self$path - read_adaptercyclemetrics <- function(x) { - cnames <- list( - old = c( - "Lane", "Sample_ID", "index", "index2", "ReadNumber", "Cycle", - "NumClustersWithAdapterAtCycle", "% At Cycle" - ), - new = c( - "lane", "sampleid", "barcode", "readnum", "cycle", - "clustadapt_n", "cycle_pct" - ) - ) - ctypes <- list( - old = "ccccccdd", - new = "cccccdd" + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - readnum: read number. + #' - adapter_bases: total number of bases trimmed as adapter from the read. + #' - sample_bases: total number of bases not trimmed from the read. + #' - adapter_bases_pct: percentage of bases trimmed as adapter from the read. + #' @param x (`character(1)`)\cr + #' Path to Adapter_Metrics.csv file. + read_adaptermetrics = function(x) { + cnames <- list( + old = c( + "Lane", "Sample_ID", "index", "index2", "ReadNumber", + "AdapterBases", "SampleBases", "% Adapter Bases" + ), + new = c( + "lane", "sampleid", "indexes", "readnum", "adapter_bases", + "sample_bases", "adapter_bases_pct" ) - if (!file.exists(x)) { - return(empty_tbl(cnames$new, ctypes$new)) - } - d <- readr::read_csv(x, col_types = ctypes$old) - assertthat::assert_that(all(colnames(d) == cnames$old)) - d |> - dplyr::mutate(barcode = paste0(.data$index, "-", .data$index2)) |> - dplyr::select(-c("index", "index2")) |> - dplyr::relocate("barcode", .after = "Sample_ID") |> - rlang::set_names(cnames$new) + ) + ctypes <- list( + old = "cccccddd", + new = "ccccddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) } - read_topunknownbarcodes <- function(x) { - cnames <- list( - old = c("Lane", "index", "index2", "# Reads", "% of Unknown Barcodes", "% of All Reads"), - new = c("lane", "barcode", "reads_n", "unknownbcodes_pct", "reads_pct") - ) - ctypes <- list( - old = "cccddd", - new = "ccddd" + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = ifelse( + is.na(.data$index), NA_character_, paste0(.data$index, "-", .data$index2) + )) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "Sample_ID") |> + rlang::set_names(cnames$new) + }, + + #' @description Read Adapter_Cycle_Metrics.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - read: read number. + #' - cycle: cycle number. + #' - cluster_n: number of clusters where the adapter was detected + #' to begin precisely at this cycle. + #' - cluster_pct: percentage of all clusters where the adapter was detected + #' to begin precisely at this cycle. + #' @param x (`character(1)`)\cr + #' Path to Adapter_Cycle_Metrics.csv file. + read_adaptercyclemetrics = function(x) { + cnames <- list( + old = c( + "Lane", "Sample_ID", "index", "index2", "ReadNumber", "Cycle", + "NumClustersWithAdapterAtCycle", "% At Cycle" + ), + new = c( + "lane", "sampleid", "indexes", "read", "cycle", + "cluster_n", "cluster_pct" ) - if (!file.exists(x)) { - return(empty_tbl(cnames$new, ctypes$new)) - } - d <- readr::read_csv(x, col_types = ctypes$old) - assertthat::assert_that(all(colnames(d) == cnames$old)) - d |> - dplyr::mutate(barcode = paste0(.data$index, "-", .data$index2)) |> - dplyr::select(-c("index", "index2")) |> - dplyr::relocate("barcode", .after = "Lane") |> - rlang::set_names(cnames$new) + ) + ctypes <- list( + old = "ccccccdd", + new = "cccccdd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) } - read_adaptermetrics <- function(x) { - cnames <- list( - old = c( - "Lane", "Sample_ID", "index", "index2", "ReadNumber", - "AdapterBases", "SampleBases", "% Adapter Bases" - ), - new = c( - "lane", "sampleid", "barcode", "readnum", "adapter_bases", - "sample_bases", "adapter_bases_pct" - ) - ) - ctypes <- list( - old = "cccccddd", - new = "ccccddd" + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "Sample_ID") |> + rlang::set_names(cnames$new) + }, + + #' @description Read Demultiplex_Stats.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - reads_n: total number of pass-filter reads mapping to this sample for the lane. + #' - perfect_idxreads_n: number of mapped reads with barcodes matching the indexes exactly. + #' - one_mismatch_idxreads_n: number of mapped reads with barcodes matched with one base mismatched. + #' - two_mismatch_idxreads_n: number of mapped reads with barcodes matched with two bases mismatched. + #' - reads_pct: percentage of pass-filter reads mapping to this sample for the lane. + #' - perfect_idxreads_pct: percentage of mapped reads with barcodes matching the indexess exactly. + #' - one_mismatch_idxreads_pct: percentage of mapped reads with one mismatch to the indexes. + #' - two_mismatch_idxreads_pct: percentage of mapped reads with two mismatches to the indexes. + #' @param x (`character(1)`)\cr + #' Path to Demultiplex_Stats.csv file. + read_demultiplexstats = function(x) { + cnames <- list( + old = c( + "Lane", "SampleID", "Index", "# Reads", "# Perfect Index Reads", + "# One Mismatch Index Reads", "# Two Mismatch Index Reads", + "% Reads", "% Perfect Index Reads", "% One Mismatch Index Reads", + "% Two Mismatch Index Reads" + ), + new = c( + "lane", "sampleid", "indexes", "reads_n", "perfect_idxreads_n", + "one_mismatch_idxreads_n", "two_mismatch_idxreads_n", + "reads_pct", "perfect_idxreads_pct", + "one_mismatch_idxreads_pct", "two_mismatch_idxreads_pct" ) - if (!file.exists(x)) { - return(empty_tbl(cnames$new, ctypes$new)) - } - d <- readr::read_csv(x, col_types = ctypes$old) - assertthat::assert_that(all(colnames(d) == cnames$old)) - d |> - dplyr::mutate(barcode = ifelse( - is.na(.data$index), NA_character_, paste0(.data$index, "-", .data$index2) - )) |> - dplyr::select(-c("index", "index2")) |> - dplyr::relocate("barcode", .after = "Sample_ID") |> - rlang::set_names(cnames$new) + ) + ctypes <- list( + old = "cccdddddddd", + new = "cccdddddddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) } - read_indexhoppingcounts <- function(x) { - cnames <- list( - old = c( - "Lane", "SampleID", "index", "index2", "# Reads", - "% of Hopped Reads", "% of All Reads" - ), - new = c( - "lane", "sampleid", "barcode", - "reads_n", "reads_hopped_pct", "reads_pct" - ) - ) - ctypes <- list( - old = "ccccd", - new = "cccddd" + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + rlang::set_names(cnames$new) + }, + + + #' @description Read Index_Hopping_Counts.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - reads_n: total number of pass-filter reads mapping to the indexes. + #' - reads_hopped_pct: percentage of hopped pass-filter reads mapping to the indexes. + #' - reads_pct: percentage of all pass-filter reads mapping to the indexes. + #' @param x (`character(1)`)\cr + #' Path to Index_Hopping_Counts.csv file. + read_indexhoppingcounts = function(x) { + cnames <- list( + old = c( + "Lane", "SampleID", "index", "index2", "# Reads", + "% of Hopped Reads", "% of All Reads" + ), + new = c( + "lane", "sampleid", "indexes", + "reads_n", "reads_hopped_pct", "reads_pct" ) - if (!file.exists(x)) { - return(empty_tbl(cnames$new, ctypes$new)) - } - d <- readr::read_csv(x, col_types = ctypes$old) - assertthat::assert_that(all(colnames(d) == cnames$old)) - d |> - dplyr::mutate(barcode = paste0(.data$index, "-", .data$index2)) |> - dplyr::select(-c("index", "index2")) |> - dplyr::relocate("barcode", .after = "SampleID") |> - rlang::set_names(cnames$new) + ) + ctypes <- list( + old = "ccccd", + new = "cccddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) } - read_demultiplexstats <- function(x) { - cnames <- list( - old = c( - "Lane", "SampleID", "Index", "# Reads", "# Perfect Index Reads", - "# One Mismatch Index Reads", "# Two Mismatch Index Reads", - "% Reads", "% Perfect Index Reads", "% One Mismatch Index Reads", - "% Two Mismatch Index Reads" - ), - new = c( - "lane", "sampleid", "barcode", "reads_n", "perfect_idxreads_n", - "one_mismatch_idxreads_n", "two_mismatch_idxreads_n", - "reads_pct", "perfect_idxreads_pct", - "one_mismatch_idxreads_pct", "two_mismatch_idxreads_pct" - ) - ) - ctypes <- list( - old = "cccdddddddd", - new = "cccdddddddd" - ) - if (!file.exists(x)) { - return(empty_tbl(cnames$new, ctypes$new)) - } - d <- readr::read_csv(x, col_types = ctypes$old) - assertthat::assert_that(all(colnames(d) == cnames$old)) - d |> - rlang::set_names(cnames$new) + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "SampleID") |> + rlang::set_names(cnames$new) + }, + + + #' @description Read Top_Unknown_Barcodes.csv file. + #' + #' - lane: lane number. + #' - indexes: index/index2 of this unlisted sequence. + #' - reads_n: total number of pass-filter reads mapping to the indexes. + #' - unknownbcodes_pct: percentage of unknown pass-filter reads mapping to the indexes. + #' @param x (`character(1)`)\cr + #' Path to Top_Unknown_Barcodes.csv file. + read_topunknownbarcodes = function(x) { + cnames <- list( + old = c("Lane", "index", "index2", "# Reads", "% of Unknown Barcodes", "% of All Reads"), + new = c("lane", "indexes", "reads_n", "unknownbcodes_pct", "reads_pct") + ) + ctypes <- list( + old = "cccddd", + new = "ccddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) } - read_fastqlist <- function(x) { - cnames <- list( - old = c("RGID", "RGSM", "RGLB", "Lane", "Read1File", "Read2File"), - new = c("rgid", "rgsm", "rglb", "lane", "readnum", "filepath") - ) - ctypes <- list( - old = c("cccccc"), - new = c("cccccc") - ) - if (!file.exists(x)) { - return(empty_tbl(cnames$new, ctypes$new)) - } - d <- readr::read_csv(x, col_types = ctypes$old) - assertthat::assert_that(all(colnames(d) == cnames$old)) - d |> - tidyr::pivot_longer(c("Read1File", "Read2File"), names_to = "readnum", values_to = "filepath") |> - dplyr::mutate(readnum = sub("Read(.)File", "\\1", .data$readnum)) |> - rlang::set_names(cnames$new) + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "Lane") |> + rlang::set_names(cnames$new) + }, + + + #' @description Read fastq_list.csv file. + #' + #' - rgid: read group. + #' - rgsm: sample ID. + #' - rglb: library. + #' - lane: flow cell lane. + #' - readnum: read number (1 or 2). + #' - filepath: path to the FASTQ file. + #' @param x (`character(1)`)\cr + #' Path to fastq_list.csv file. + read_fastqlist = function(x) { + cnames <- list( + old = c("RGID", "RGSM", "RGLB", "Lane", "Read1File", "Read2File"), + new = c("rgid", "rgsm", "rglb", "lane", "readnum", "filepath") + ) + ctypes <- list( + old = c("cccccc"), + new = c("cccccc") + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + tidyr::pivot_longer(c("Read1File", "Read2File"), names_to = "readnum", values_to = "filepath") |> + dplyr::mutate(readnum = sub("Read(.)File", "\\1", .data$readnum)) |> + rlang::set_names(cnames$new) + }, + + #' @description + #' Reads contents of `Reports` directory output by BCLConvert. + #' + #' @return A list of tibbles. + #' @export + read = function() { # now return all as list elements - ac <- read_adaptercyclemetrics(file.path(p, "Adapter_Cycle_Metrics.csv")) - am <- read_adaptermetrics(file.path(p, "Adapter_Metrics.csv")) - ds <- read_demultiplexstats(file.path(p, "Demultiplex_Stats.csv")) - ih <- read_indexhoppingcounts(file.path(p, "Index_Hopping_Counts.csv")) - ub <- read_topunknownbarcodes(file.path(p, "Top_Unknown_Barcodes.csv")) - fq <- read_fastqlist(file.path(p, "fastq_list.csv")) + p <- self$path list( - adapter_cycle_metrics = ac, - adapter_metrics = am, - demultiplex_stats = ds, - index_hopping_counts = ih, - top_unknown_barcodes = ub, - fastq_list = fq + adapter_cycle_metrics = read_adaptercyclemetrics(file.path(p, "Adapter_Cycle_Metrics.csv")), + adapter_metrics = read_adaptermetrics(file.path(p, "Adapter_Metrics.csv")), + demultiplex_stats = read_demultiplexstats(file.path(p, "Demultiplex_Stats.csv")), + index_hopping_counts = read_indexhoppingcounts(file.path(p, "Index_Hopping_Counts.csv")), + top_unknown_barcodes = read_topunknownbarcodes(file.path(p, "Top_Unknown_Barcodes.csv")), + fastq_list = read_fastqlist(file.path(p, "fastq_list.csv")) ) }, diff --git a/man/BclconvertReports.Rd b/man/BclconvertReports.Rd index 15aae0a..b88e949 100644 --- a/man/BclconvertReports.Rd +++ b/man/BclconvertReports.Rd @@ -5,11 +5,14 @@ \title{BclconvertReports R6 Class} \description{ Reads and writes tidy versions of files within the \code{Reports} directory output -from BCLConvert. +from BCLConvert v4.2.7. See the DRAGEN v4.2 documentation at +https://support-docs.illumina.com/SW/dragen_v42/Content/SW/DRAGEN/OutputFiles.htm. } \examples{ \dontrun{ -b <- BclconvertReports$new(here::here("nogit/bcl_convert/WGS_TsqNano/Reports")) +p1 <- "240816_A01052_0220_AHM7VHDSXC/202408195d4f2fc4/Reports" +b <- here::here("nogit/bcl_convert", p1) |> + BclconvertReports$new() b$path b$contents d <- b$read() @@ -31,6 +34,12 @@ b$write(d, out_dir = tempdir(), prefix = "sampleA", out_format = "tsv") \itemize{ \item \href{#method-BclconvertReports-new}{\code{BclconvertReports$new()}} \item \href{#method-BclconvertReports-print}{\code{BclconvertReports$print()}} +\item \href{#method-BclconvertReports-read_adaptermetrics}{\code{BclconvertReports$read_adaptermetrics()}} +\item \href{#method-BclconvertReports-read_adaptercyclemetrics}{\code{BclconvertReports$read_adaptercyclemetrics()}} +\item \href{#method-BclconvertReports-read_demultiplexstats}{\code{BclconvertReports$read_demultiplexstats()}} +\item \href{#method-BclconvertReports-read_indexhoppingcounts}{\code{BclconvertReports$read_indexhoppingcounts()}} +\item \href{#method-BclconvertReports-read_topunknownbarcodes}{\code{BclconvertReports$read_topunknownbarcodes()}} +\item \href{#method-BclconvertReports-read_fastqlist}{\code{BclconvertReports$read_fastqlist()}} \item \href{#method-BclconvertReports-read}{\code{BclconvertReports$read()}} \item \href{#method-BclconvertReports-write}{\code{BclconvertReports$write()}} \item \href{#method-BclconvertReports-clone}{\code{BclconvertReports$clone()}} @@ -71,6 +80,169 @@ Print details about the BclconvertReports directory. } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_adaptermetrics}{}}} +\subsection{Method \code{read_adaptermetrics()}}{ +Read Adapter_Metrics.csv file. +\itemize{ +\item lane: lane number. +\item sampleid: sample ID from sample sheet. +\item indexes: index/index2 from sample sheet for this sample. +\item readnum: read number. +\item adapter_bases: total number of bases trimmed as adapter from the read. +\item sample_bases: total number of bases not trimmed from the read. +\item adapter_bases_pct: percentage of bases trimmed as adapter from the read. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_adaptermetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Adapter_Metrics.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_adaptercyclemetrics}{}}} +\subsection{Method \code{read_adaptercyclemetrics()}}{ +Read Adapter_Cycle_Metrics.csv file. +\itemize{ +\item lane: lane number. +\item sampleid: sample ID from sample sheet. +\item indexes: index/index2 from sample sheet for this sample. +\item read: read number. +\item cycle: cycle number. +\item cluster_n: number of clusters where the adapter was detected +to begin precisely at this cycle. +\item cluster_pct: percentage of all clusters where the adapter was detected +to begin precisely at this cycle. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_adaptercyclemetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Adapter_Cycle_Metrics.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_demultiplexstats}{}}} +\subsection{Method \code{read_demultiplexstats()}}{ +Read Demultiplex_Stats.csv file. +\itemize{ +\item lane: lane number. +\item sampleid: sample ID from sample sheet. +\item indexes: index/index2 from sample sheet for this sample. +\item reads_n: total number of pass-filter reads mapping to this sample for the lane. +\item perfect_idxreads_n: number of mapped reads with barcodes matching the indexes exactly. +\item one_mismatch_idxreads_n: number of mapped reads with barcodes matched with one base mismatched. +\item two_mismatch_idxreads_n: number of mapped reads with barcodes matched with two bases mismatched. +\item reads_pct: percentage of pass-filter reads mapping to this sample for the lane. +\item perfect_idxreads_pct: percentage of mapped reads with barcodes matching the indexess exactly. +\item one_mismatch_idxreads_pct: percentage of mapped reads with one mismatch to the indexes. +\item two_mismatch_idxreads_pct: percentage of mapped reads with two mismatches to the indexes. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_demultiplexstats(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Demultiplex_Stats.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_indexhoppingcounts}{}}} +\subsection{Method \code{read_indexhoppingcounts()}}{ +Read Index_Hopping_Counts.csv file. +\itemize{ +\item lane: lane number. +\item sampleid: sample ID from sample sheet. +\item indexes: index/index2 from sample sheet for this sample. +\item reads_n: total number of pass-filter reads mapping to the indexes. +\item reads_hopped_pct: percentage of hopped pass-filter reads mapping to the indexes. +\item reads_pct: percentage of all pass-filter reads mapping to the indexes. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_indexhoppingcounts(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Index_Hopping_Counts.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_topunknownbarcodes}{}}} +\subsection{Method \code{read_topunknownbarcodes()}}{ +Read Top_Unknown_Barcodes.csv file. +\itemize{ +\item lane: lane number. +\item indexes: index/index2 of this unlisted sequence. +\item reads_n: total number of pass-filter reads mapping to the indexes. +\item unknownbcodes_pct: percentage of unknown pass-filter reads mapping to the indexes. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_topunknownbarcodes(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Top_Unknown_Barcodes.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_fastqlist}{}}} +\subsection{Method \code{read_fastqlist()}}{ +Read fastq_list.csv file. +\itemize{ +\item rgid: read group. +\item rgsm: sample ID. +\item rglb: library. +\item lane: flow cell lane. +\item readnum: read number (1 or 2). +\item filepath: path to the FASTQ file. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_fastqlist(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to fastq_list.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-BclconvertReports-read}{}}} \subsection{Method \code{read()}}{ diff --git a/man/Wf_bcl_convert.Rd b/man/Wf_bcl_convert.Rd index 60256be..1beb85a 100644 --- a/man/Wf_bcl_convert.Rd +++ b/man/Wf_bcl_convert.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/bclconvert.R \name{Wf_bcl_convert} \alias{Wf_bcl_convert} -\title{bcl_convert Wf R6 Class} +\title{Wf bcl_convert R6 Class} \description{ Contains methods for reading and processing files output from the UMCCR \code{bcl_convert} workflow. diff --git a/man/Wf_tso_ctdna_tumor_only.Rd b/man/Wf_tso_ctdna_tumor_only.Rd index 81c2baa..0af943d 100644 --- a/man/Wf_tso_ctdna_tumor_only.Rd +++ b/man/Wf_tso_ctdna_tumor_only.Rd @@ -12,7 +12,7 @@ Contains methods for reading and processing files output from the UMCCR x <- file.path( "~/icav1/g/production/analysis_data/SBJ00596/tso_ctdna_tumor_only", "2024050555972acf/L2400482/Results/PTC_ctTSO240429_L2400482/dracarys_gds_sync" - ) +) sample_id <- "PTC_ctTSO240429" library_id <- "L2400482" d <- TsoCombinedVariantOutputFile$new(x) From aec1156a9c5d6ba7f3d534fa00c891c6d901a6ad Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Sun, 1 Sep 2024 16:04:27 +1000 Subject: [PATCH 6/8] bclc reports: support demultiplextilestats, qualitymetrics, qualitytilemetrics --- R/bclconvert.R | 190 ++++++++++++++++++++++++++++++--------- man/BclconvertReports.Rd | 134 ++++++++++++++++++++++----- 2 files changed, 261 insertions(+), 63 deletions(-) diff --git a/R/bclconvert.R b/R/bclconvert.R index d47b72c..24634ae 100644 --- a/R/bclconvert.R +++ b/R/bclconvert.R @@ -91,31 +91,33 @@ BclconvertReports <- R6::R6Class( invisible(self) }, - #' @description Read Adapter_Metrics.csv file. + #' @description Read Adapter_Cycle_Metrics.csv file. #' #' - lane: lane number. #' - sampleid: sample ID from sample sheet. #' - indexes: index/index2 from sample sheet for this sample. - #' - readnum: read number. - #' - adapter_bases: total number of bases trimmed as adapter from the read. - #' - sample_bases: total number of bases not trimmed from the read. - #' - adapter_bases_pct: percentage of bases trimmed as adapter from the read. + #' - read: read number. + #' - cycle: cycle number. + #' - cluster_n: number of clusters where the adapter was detected + #' to begin precisely at this cycle. + #' - cluster_pct: percentage of all clusters where the adapter was detected + #' to begin precisely at this cycle. #' @param x (`character(1)`)\cr - #' Path to Adapter_Metrics.csv file. - read_adaptermetrics = function(x) { + #' Path to Adapter_Cycle_Metrics.csv file. + read_adaptercyclemetrics = function(x) { cnames <- list( old = c( - "Lane", "Sample_ID", "index", "index2", "ReadNumber", - "AdapterBases", "SampleBases", "% Adapter Bases" + "Lane", "Sample_ID", "index", "index2", "ReadNumber", "Cycle", + "NumClustersWithAdapterAtCycle", "% At Cycle" ), new = c( - "lane", "sampleid", "indexes", "readnum", "adapter_bases", - "sample_bases", "adapter_bases_pct" + "lane", "sampleid", "indexes", "read", "cycle", + "cluster_n", "cluster_pct" ) ) ctypes <- list( - old = "cccccddd", - new = "ccccddd" + old = "ccccccdd", + new = "cccccdd" ) if (!file.exists(x)) { return(empty_tbl(cnames$new, ctypes$new)) @@ -123,41 +125,37 @@ BclconvertReports <- R6::R6Class( d <- readr::read_csv(x, col_types = ctypes$old) assertthat::assert_that(all(colnames(d) == cnames$old)) d |> - dplyr::mutate(indexes = ifelse( - is.na(.data$index), NA_character_, paste0(.data$index, "-", .data$index2) - )) |> + dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> dplyr::select(-c("index", "index2")) |> dplyr::relocate("indexes", .after = "Sample_ID") |> rlang::set_names(cnames$new) }, - #' @description Read Adapter_Cycle_Metrics.csv file. + #' @description Read Adapter_Metrics.csv file. #' #' - lane: lane number. #' - sampleid: sample ID from sample sheet. #' - indexes: index/index2 from sample sheet for this sample. - #' - read: read number. - #' - cycle: cycle number. - #' - cluster_n: number of clusters where the adapter was detected - #' to begin precisely at this cycle. - #' - cluster_pct: percentage of all clusters where the adapter was detected - #' to begin precisely at this cycle. + #' - readnum: read number. + #' - adapter_bases: total number of bases trimmed as adapter from the read. + #' - sample_bases: total number of bases not trimmed from the read. + #' - adapter_bases_pct: percentage of bases trimmed as adapter from the read. #' @param x (`character(1)`)\cr - #' Path to Adapter_Cycle_Metrics.csv file. - read_adaptercyclemetrics = function(x) { + #' Path to Adapter_Metrics.csv file. + read_adaptermetrics = function(x) { cnames <- list( old = c( - "Lane", "Sample_ID", "index", "index2", "ReadNumber", "Cycle", - "NumClustersWithAdapterAtCycle", "% At Cycle" + "Lane", "Sample_ID", "index", "index2", "ReadNumber", + "AdapterBases", "SampleBases", "% Adapter Bases" ), new = c( - "lane", "sampleid", "indexes", "read", "cycle", - "cluster_n", "cluster_pct" + "lane", "sampleid", "indexes", "readnum", "adapter_bases", + "sample_bases", "adapter_bases_pct" ) ) ctypes <- list( - old = "ccccccdd", - new = "cccccdd" + old = "cccccddd", + new = "ccccddd" ) if (!file.exists(x)) { return(empty_tbl(cnames$new, ctypes$new)) @@ -165,7 +163,9 @@ BclconvertReports <- R6::R6Class( d <- readr::read_csv(x, col_types = ctypes$old) assertthat::assert_that(all(colnames(d) == cnames$old)) d |> - dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> + dplyr::mutate(indexes = ifelse( + is.na(.data$index), NA_character_, paste0(.data$index, "-", .data$index2) + )) |> dplyr::select(-c("index", "index2")) |> dplyr::relocate("indexes", .after = "Sample_ID") |> rlang::set_names(cnames$new) @@ -214,6 +214,115 @@ BclconvertReports <- R6::R6Class( rlang::set_names(cnames$new) }, + #' @description Read Demultiplex_Tile_Stats.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - tile: tile number. + #' - reads_n: total number of pass-filter reads mapping to this sample for the lane. + #' - reads_pct: percentage of pass-filter reads mapping to this sample for the lane. + #' @param x (`character(1)`)\cr + #' Path to Demultiplex_Tile_Stats.csv file. + read_demultiplextilestats = function(x) { + cnames <- list( + old = c("Lane", "SampleID", "Index", "Tile", "# Reads", "% Reads"), + new = c("lane", "sampleid", "indexes", "tile", "reads_n", "reads_pct") + ) + ctypes <- list( + old = "ccccdd", + new = "ccccdd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) + } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + rlang::set_names(cnames$new) + }, + + #' @description Read Quality_Metrics.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - readnum: read number (1 or 2). + #' - yield: number of bases mapping. + #' - yieldq30: number of bases with quality score >= 30 mapping. + #' - qscore_sum: sum of quality scores of bases mapping. + #' - qscore_mean_pf: mean quality score of bases mapping. + #' - q30_pct: percentage of bases with quality score >= 30 mapping. + #' @param x (`character(1)`)\cr + #' Path to Quality_Metrics.csv file. + read_qualitymetrics = function(x) { + cnames <- list( + old = c( + "Lane", "SampleID", "index", "index2", "ReadNumber", "Yield", + "YieldQ30", "QualityScoreSum", "Mean Quality Score (PF)", "% Q30" + ), + new = c( + "lane", "sampleid", "indexes", "readnum", "yield", + "yieldq30", "qscore_sum", "qscore_mean_pf", "q30_pct" + ) + ) + ctypes <- list( + old = "cccccddddd", + new = "ccccddddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) + } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "SampleID") |> + rlang::set_names(cnames$new) + }, + + + #' @description Read Quality_Tile_Metrics.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - readnum: read number (1 or 2). + #' - tile: tile number. + #' - yield: number of bases mapping. + #' - yieldq30: number of bases with quality score >= 30 mapping. + #' - qscore_sum: sum of quality scores of bases mapping. + #' - qscore_mean_pf: mean quality score of bases mapping. + #' - q30_pct: percentage of bases with quality score >= 30 mapping. + #' @param x (`character(1)`)\cr + #' Path to Quality_Tile_Metrics.csv file. + read_qualitytilemetrics = function(x) { + cnames <- list( + old = c( + "Lane", "SampleID", "index", "index2", "ReadNumber", "Tile", "Yield", + "YieldQ30", "QualityScoreSum", "Mean Quality Score (PF)", "% Q30" + ), + new = c( + "lane", "sampleid", "indexes", "readnum", "tile", "yield", + "yieldq30", "qscore_sum", "qscore_mean_pf", "q30_pct" + ) + ) + ctypes <- list( + old = "ccccccddddd", + new = "cccccddddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) + } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "SampleID") |> + rlang::set_names(cnames$new) + }, #' @description Read Index_Hopping_Counts.csv file. #' @@ -252,7 +361,6 @@ BclconvertReports <- R6::R6Class( rlang::set_names(cnames$new) }, - #' @description Read Top_Unknown_Barcodes.csv file. #' #' - lane: lane number. @@ -282,7 +390,6 @@ BclconvertReports <- R6::R6Class( rlang::set_names(cnames$new) }, - #' @description Read fastq_list.csv file. #' #' - rgid: read group. @@ -322,12 +429,15 @@ BclconvertReports <- R6::R6Class( # now return all as list elements p <- self$path list( - adapter_cycle_metrics = read_adaptercyclemetrics(file.path(p, "Adapter_Cycle_Metrics.csv")), - adapter_metrics = read_adaptermetrics(file.path(p, "Adapter_Metrics.csv")), - demultiplex_stats = read_demultiplexstats(file.path(p, "Demultiplex_Stats.csv")), - index_hopping_counts = read_indexhoppingcounts(file.path(p, "Index_Hopping_Counts.csv")), - top_unknown_barcodes = read_topunknownbarcodes(file.path(p, "Top_Unknown_Barcodes.csv")), - fastq_list = read_fastqlist(file.path(p, "fastq_list.csv")) + adapter_cycle_metrics = self$read_adaptercyclemetrics(file.path(p, "Adapter_Cycle_Metrics.csv")), + adapter_metrics = self$read_adaptermetrics(file.path(p, "Adapter_Metrics.csv")), + demultiplex_stats = self$read_demultiplexstats(file.path(p, "Demultiplex_Stats.csv")), + demultiplex_tile_stats = self$read_demultiplextilestats(file.path(p, "Demultiplex_Tile_Stats.csv")), + quality_metrics = self$read_qualitymetrics(file.path(p, "Quality_Metrics.csv")), + quality_tile_metrics = self$read_qualitytilemetrics(file.path(p, "Quality_Tile_Metrics.csv")), + index_hopping_counts = self$read_indexhoppingcounts(file.path(p, "Index_Hopping_Counts.csv")), + top_unknown_barcodes = self$read_topunknownbarcodes(file.path(p, "Top_Unknown_Barcodes.csv")), + fastq_list = self$read_fastqlist(file.path(p, "fastq_list.csv")) ) }, diff --git a/man/BclconvertReports.Rd b/man/BclconvertReports.Rd index b88e949..47b0f6c 100644 --- a/man/BclconvertReports.Rd +++ b/man/BclconvertReports.Rd @@ -34,9 +34,12 @@ b$write(d, out_dir = tempdir(), prefix = "sampleA", out_format = "tsv") \itemize{ \item \href{#method-BclconvertReports-new}{\code{BclconvertReports$new()}} \item \href{#method-BclconvertReports-print}{\code{BclconvertReports$print()}} -\item \href{#method-BclconvertReports-read_adaptermetrics}{\code{BclconvertReports$read_adaptermetrics()}} \item \href{#method-BclconvertReports-read_adaptercyclemetrics}{\code{BclconvertReports$read_adaptercyclemetrics()}} +\item \href{#method-BclconvertReports-read_adaptermetrics}{\code{BclconvertReports$read_adaptermetrics()}} \item \href{#method-BclconvertReports-read_demultiplexstats}{\code{BclconvertReports$read_demultiplexstats()}} +\item \href{#method-BclconvertReports-read_demultiplextilestats}{\code{BclconvertReports$read_demultiplextilestats()}} +\item \href{#method-BclconvertReports-read_qualitymetrics}{\code{BclconvertReports$read_qualitymetrics()}} +\item \href{#method-BclconvertReports-read_qualitytilemetrics}{\code{BclconvertReports$read_qualitytilemetrics()}} \item \href{#method-BclconvertReports-read_indexhoppingcounts}{\code{BclconvertReports$read_indexhoppingcounts()}} \item \href{#method-BclconvertReports-read_topunknownbarcodes}{\code{BclconvertReports$read_topunknownbarcodes()}} \item \href{#method-BclconvertReports-read_fastqlist}{\code{BclconvertReports$read_fastqlist()}} @@ -80,57 +83,57 @@ Print details about the BclconvertReports directory. } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-BclconvertReports-read_adaptermetrics}{}}} -\subsection{Method \code{read_adaptermetrics()}}{ -Read Adapter_Metrics.csv file. +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_adaptercyclemetrics}{}}} +\subsection{Method \code{read_adaptercyclemetrics()}}{ +Read Adapter_Cycle_Metrics.csv file. \itemize{ \item lane: lane number. \item sampleid: sample ID from sample sheet. \item indexes: index/index2 from sample sheet for this sample. -\item readnum: read number. -\item adapter_bases: total number of bases trimmed as adapter from the read. -\item sample_bases: total number of bases not trimmed from the read. -\item adapter_bases_pct: percentage of bases trimmed as adapter from the read. +\item read: read number. +\item cycle: cycle number. +\item cluster_n: number of clusters where the adapter was detected +to begin precisely at this cycle. +\item cluster_pct: percentage of all clusters where the adapter was detected +to begin precisely at this cycle. } \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{BclconvertReports$read_adaptermetrics(x)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{BclconvertReports$read_adaptercyclemetrics(x)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{x}}{(\code{character(1)})\cr -Path to Adapter_Metrics.csv file.} +Path to Adapter_Cycle_Metrics.csv file.} } \if{html}{\out{
}} } } \if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-BclconvertReports-read_adaptercyclemetrics}{}}} -\subsection{Method \code{read_adaptercyclemetrics()}}{ -Read Adapter_Cycle_Metrics.csv file. +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_adaptermetrics}{}}} +\subsection{Method \code{read_adaptermetrics()}}{ +Read Adapter_Metrics.csv file. \itemize{ \item lane: lane number. \item sampleid: sample ID from sample sheet. \item indexes: index/index2 from sample sheet for this sample. -\item read: read number. -\item cycle: cycle number. -\item cluster_n: number of clusters where the adapter was detected -to begin precisely at this cycle. -\item cluster_pct: percentage of all clusters where the adapter was detected -to begin precisely at this cycle. +\item readnum: read number. +\item adapter_bases: total number of bases trimmed as adapter from the read. +\item sample_bases: total number of bases not trimmed from the read. +\item adapter_bases_pct: percentage of bases trimmed as adapter from the read. } \subsection{Usage}{ -\if{html}{\out{
}}\preformatted{BclconvertReports$read_adaptercyclemetrics(x)}\if{html}{\out{
}} +\if{html}{\out{
}}\preformatted{BclconvertReports$read_adaptermetrics(x)}\if{html}{\out{
}} } \subsection{Arguments}{ \if{html}{\out{
}} \describe{ \item{\code{x}}{(\code{character(1)})\cr -Path to Adapter_Cycle_Metrics.csv file.} +Path to Adapter_Metrics.csv file.} } \if{html}{\out{
}} } @@ -167,6 +170,91 @@ Path to Demultiplex_Stats.csv file.} } } \if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_demultiplextilestats}{}}} +\subsection{Method \code{read_demultiplextilestats()}}{ +Read Demultiplex_Tile_Stats.csv file. +\itemize{ +\item lane: lane number. +\item sampleid: sample ID from sample sheet. +\item indexes: index/index2 from sample sheet for this sample. +\item tile: tile number. +\item reads_n: total number of pass-filter reads mapping to this sample for the lane. +\item reads_pct: percentage of pass-filter reads mapping to this sample for the lane. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_demultiplextilestats(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Demultiplex_Tile_Stats.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_qualitymetrics}{}}} +\subsection{Method \code{read_qualitymetrics()}}{ +Read Quality_Metrics.csv file. +\itemize{ +\item lane: lane number. +\item sampleid: sample ID from sample sheet. +\item indexes: index/index2 from sample sheet for this sample. +\item readnum: read number (1 or 2). +\item yield: number of bases mapping. +\item yieldq30: number of bases with quality score >= 30 mapping. +\item qscore_sum: sum of quality scores of bases mapping. +\item qscore_mean_pf: mean quality score of bases mapping. +\item q30_pct: percentage of bases with quality score >= 30 mapping. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_qualitymetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Quality_Metrics.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports-read_qualitytilemetrics}{}}} +\subsection{Method \code{read_qualitytilemetrics()}}{ +Read Quality_Tile_Metrics.csv file. +\itemize{ +\item lane: lane number. +\item sampleid: sample ID from sample sheet. +\item indexes: index/index2 from sample sheet for this sample. +\item readnum: read number (1 or 2). +\item tile: tile number. +\item yield: number of bases mapping. +\item yieldq30: number of bases with quality score >= 30 mapping. +\item qscore_sum: sum of quality scores of bases mapping. +\item qscore_mean_pf: mean quality score of bases mapping. +\item q30_pct: percentage of bases with quality score >= 30 mapping. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports$read_qualitytilemetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Quality_Tile_Metrics.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} \if{html}{\out{}} \if{latex}{\out{\hypertarget{method-BclconvertReports-read_indexhoppingcounts}{}}} \subsection{Method \code{read_indexhoppingcounts()}}{ From 9b18a99f69039dc832c4e2283ff7a4a4d056cf7e Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Tue, 3 Sep 2024 14:38:21 +1000 Subject: [PATCH 7/8] keep support for bclconvert 3.7.5 --- R/bclconvert.R | 317 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 282 insertions(+), 35 deletions(-) diff --git a/R/bclconvert.R b/R/bclconvert.R index 24634ae..82587da 100644 --- a/R/bclconvert.R +++ b/R/bclconvert.R @@ -1,38 +1,3 @@ -#' Wf bcl_convert R6 Class -#' -#' @description -#' Contains methods for reading and processing files output from the UMCCR -#' `bcl_convert` workflow. -#' -#' @examples -#' \dontrun{ -#' indir <- file.path() -#' sample_id <- "PTC_ctTSO240429" -#' library_id <- "L2400482" -#' d <- TsoCombinedVariantOutputFile$new(x) -#' d$read() -#' } -#' @export -Wf_bcl_convert <- R6::R6Class( - "Wf_bcl_convert", - public = list( - #' @field indir Input directory containing Reports per assay type - #' (e.g. /primary_data/240607_A01052_0209_BHLHFTDSXC/2024061140802544/). - indir = NULL, - - #' @description Create a new Wf_bcl_convert object. - #' @param indir Input directory containing Reports per assay type. - initialize = function(indir) { - self$indir <- indir - }, - #' @description Print details about the Workflow - #' @param ... (ignored). - print = function(...) { - - } - ) -) - #' BclconvertReports R6 Class #' #' @description @@ -468,3 +433,285 @@ BclconvertReports <- R6::R6Class( } ) ) + +#' BclconvertReports375 R6 Class +#' +#' @description +#' Reads and writes tidy versions of files within the `Reports` directory output +#' from BCLConvert v4.2.7. See the DRAGEN v4.2 documentation at +#' https://support-docs.illumina.com/SW/dragen_v42/Content/SW/DRAGEN/OutputFiles.htm. +#' +#' @examples +#' \dontrun{ +#' p1 <- "240816_A01052_0220_AHM7VHDSXC/202408195d4f2fc4/Reports" +#' b <- here::here("nogit/bcl_convert", p1) |> +#' BclconvertReports375$new() +#' b$path +#' b$contents +#' d <- b$read() +#' b$write(d, out_dir = tempdir(), prefix = "sampleA", out_format = "tsv") +#' } +#' +#' @export +BclconvertReports375 <- R6::R6Class( + "BclconvertReports375", + public = list( + #' @field path Path to the `Reports` directory. + #' @field contents Tibble with file path, basename, and size. + path = NULL, + contents = NULL, + #' @description Create a new BclconvertReports375 object. + #' @param path Path to the `Reports` directory. + initialize = function(path = NULL) { + stopifnot(is.character(path), length(path) == 1) + self$path <- normalizePath(path) + self$contents <- fs::dir_info(path) |> + dplyr::mutate( + bname = basename(.data$path), + size = as.character(trimws(.data$size)) + ) |> + dplyr::select("path", "bname", "size") + }, + #' @description Print details about the BclconvertReports375 directory. + #' @param ... (ignored). + print = function(...) { + bnames <- self$contents |> + dplyr::mutate( + low = tolower(.data$bname), + ) |> + dplyr::arrange(.data$low) |> + dplyr::mutate( + n = dplyr::row_number(), + bn = glue("{.data$n}. {.data$bname} ({.data$size})") + ) |> + dplyr::pull("bn") + cat("#--- BclconvertReports ---#\n") + cat(glue("Path: {self$path}"), "\n") + cat("Contents:\n") + cat(bnames, sep = "\n") + invisible(self) + }, + + #' @description Read Adapter_Metrics.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - readnum: read number. + #' - adapter_bases: total number of bases trimmed as adapter from the read. + #' - sample_bases: total number of bases not trimmed from the read. + #' - adapter_bases_pct: percentage of bases trimmed as adapter from the read. + #' @param x (`character(1)`)\cr + #' Path to Adapter_Metrics.csv file. + read_adaptermetrics = function(x) { + cnames <- list( + old = c( + "Lane", "Sample_ID", "index", "index2", "ReadNumber", + "AdapterBases", "SampleBases", "% Adapter Bases" + ), + new = c( + "lane", "sampleid", "indexes", "readnum", "adapter_bases", + "sample_bases", "adapter_bases_pct" + ) + ) + ctypes <- list( + old = "cccccddd", + new = "ccccddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) + } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = ifelse( + is.na(.data$index), NA_character_, paste0(.data$index, "-", .data$index2) + )) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "Sample_ID") |> + rlang::set_names(cnames$new) + }, + + #' @description Read Demultiplex_Stats.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - reads_n: total number of pass-filter reads mapping to this sample for the lane. + #' - perfect_idxreads_n: number of mapped reads with barcodes matching the indexes exactly. + #' - one_mismatch_idxreads_n: number of mapped reads with barcodes matched with one base mismatched. + #' - two_mismatch_idxreads_n: number of mapped reads with barcodes matched with two bases mismatched. + #' - reads_pct: percentage of pass-filter reads mapping to this sample for the lane. + #' - perfect_idxreads_pct: percentage of mapped reads with barcodes matching the indexess exactly. + #' - one_mismatch_idxreads_pct: percentage of mapped reads with one mismatch to the indexes. + #' - two_mismatch_idxreads_pct: percentage of mapped reads with two mismatches to the indexes. + #' @param x (`character(1)`)\cr + #' Path to Demultiplex_Stats.csv file. + read_demultiplexstats = function(x) { + cnames <- list( + old = c( + "Lane", "SampleID", "Index", "# Reads", "# Perfect Index Reads", + "# One Mismatch Index Reads", "# Two Mismatch Index Reads", + "% Reads", "% Perfect Index Reads", "% One Mismatch Index Reads", + "% Two Mismatch Index Reads" + ), + new = c( + "lane", "sampleid", "indexes", "reads_n", "perfect_idxreads_n", + "one_mismatch_idxreads_n", "two_mismatch_idxreads_n", + "reads_pct", "perfect_idxreads_pct", + "one_mismatch_idxreads_pct", "two_mismatch_idxreads_pct" + ) + ) + ctypes <- list( + old = "cccdddddddd", + new = "cccdddddddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) + } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + rlang::set_names(cnames$new) + }, + + #' @description Read Index_Hopping_Counts.csv file. + #' + #' - lane: lane number. + #' - sampleid: sample ID from sample sheet. + #' - indexes: index/index2 from sample sheet for this sample. + #' - reads_n: total number of pass-filter reads mapping to the indexes. + #' - reads_hopped_pct: percentage of hopped pass-filter reads mapping to the indexes. + #' - reads_pct: percentage of all pass-filter reads mapping to the indexes. + #' @param x (`character(1)`)\cr + #' Path to Index_Hopping_Counts.csv file. + read_indexhoppingcounts = function(x) { + cnames <- list( + old = c( + "Lane", "SampleID", "index", "index2", "# Reads", + "% of Hopped Reads", "% of All Reads" + ), + new = c( + "lane", "sampleid", "indexes", + "reads_n", "reads_hopped_pct", "reads_pct" + ) + ) + ctypes <- list( + old = "ccccd", + new = "cccddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) + } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "SampleID") |> + rlang::set_names(cnames$new) + }, + + #' @description Read Top_Unknown_Barcodes.csv file. + #' + #' - lane: lane number. + #' - indexes: index/index2 of this unlisted sequence. + #' - reads_n: total number of pass-filter reads mapping to the indexes. + #' - unknownbcodes_pct: percentage of unknown pass-filter reads mapping to the indexes. + #' @param x (`character(1)`)\cr + #' Path to Top_Unknown_Barcodes.csv file. + read_topunknownbarcodes = function(x) { + cnames <- list( + old = c("Lane", "index", "index2", "# Reads", "% of Unknown Barcodes", "% of All Reads"), + new = c("lane", "indexes", "reads_n", "unknownbcodes_pct", "reads_pct") + ) + ctypes <- list( + old = "cccddd", + new = "ccddd" + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) + } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + dplyr::mutate(indexes = paste0(.data$index, "-", .data$index2)) |> + dplyr::select(-c("index", "index2")) |> + dplyr::relocate("indexes", .after = "Lane") |> + rlang::set_names(cnames$new) + }, + + #' @description Read fastq_list.csv file. + #' + #' - rgid: read group. + #' - rgsm: sample ID. + #' - rglb: library. + #' - lane: flow cell lane. + #' - readnum: read number (1 or 2). + #' - filepath: path to the FASTQ file. + #' @param x (`character(1)`)\cr + #' Path to fastq_list.csv file. + read_fastqlist = function(x) { + cnames <- list( + old = c("RGID", "RGSM", "RGLB", "Lane", "Read1File", "Read2File"), + new = c("rgid", "rgsm", "rglb", "lane", "readnum", "filepath") + ) + ctypes <- list( + old = c("cccccc"), + new = c("cccccc") + ) + if (!file.exists(x)) { + return(empty_tbl(cnames$new, ctypes$new)) + } + d <- readr::read_csv(x, col_types = ctypes$old) + assertthat::assert_that(all(colnames(d) == cnames$old)) + d |> + tidyr::pivot_longer(c("Read1File", "Read2File"), names_to = "readnum", values_to = "filepath") |> + dplyr::mutate(readnum = sub("Read(.)File", "\\1", .data$readnum)) |> + rlang::set_names(cnames$new) + }, + + #' @description + #' Reads contents of `Reports` directory output by BCLConvert. + #' + #' @return A list of tibbles. + #' @export + read = function() { + # now return all as list elements + p <- self$path + list( + adapter_metrics = self$read_adaptermetrics(file.path(p, "Adapter_Metrics.csv")), + demultiplex_stats = self$read_demultiplexstats(file.path(p, "Demultiplex_Stats.csv")), + index_hopping_counts = self$read_indexhoppingcounts(file.path(p, "Index_Hopping_Counts.csv")), + top_unknown_barcodes = self$read_topunknownbarcodes(file.path(p, "Top_Unknown_Barcodes.csv")), + fastq_list = self$read_fastqlist(file.path(p, "fastq_list.csv")) + ) + }, + + #' @description + #' Writes tidied contents of `Reports` directory output by BCLConvert. + #' + #' @param d Parsed object from `self$read()`. + #' @param prefix Prefix of output file(s). + #' @param out_dir Output directory. + #' @param out_format Format of output file(s). + #' @param drid dracarys ID to use for the dataset (e.g. `wfrid.123`, `prid.456`). + write = function(d, out_dir = NULL, prefix, out_format = "tsv", drid = NULL) { + if (!is.null(out_dir)) { + prefix <- file.path(out_dir, prefix) + } + d_write <- d |> + tibble::enframe(name = "section") |> + dplyr::rowwise() |> + dplyr::mutate( + section_low = tolower(.data$section), + p = glue("{prefix}_{.data$section_low}"), + out = list(write_dracarys(obj = .data$value, prefix = .data$p, out_format = out_format, drid = drid)) + ) |> + dplyr::ungroup() |> + dplyr::select("section", "value") |> + tibble::deframe() + invisible(d_write) + } + ) +) From 6381b935bb31a046b40b4da30a8158a1ae78bf1e Mon Sep 17 00:00:00 2001 From: pdiakumis Date: Tue, 3 Sep 2024 20:57:51 +1000 Subject: [PATCH 8/8] bclc: fix old parsers --- NAMESPACE | 2 +- R/bclconvert.R | 77 +++--------- man/BclconvertReports375.Rd | 240 ++++++++++++++++++++++++++++++++++++ man/Wf_bcl_convert.Rd | 86 ------------- 4 files changed, 261 insertions(+), 144 deletions(-) create mode 100644 man/BclconvertReports375.Rd delete mode 100644 man/Wf_bcl_convert.Rd diff --git a/NAMESPACE b/NAMESPACE index d1e12a5..9df48ec 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(read,File) export(BcftoolsStatsFile) export(BclconvertReports) +export(BclconvertReports375) export(FastqcMetricsFile) export(File) export(FragmentLengthHistFile) @@ -33,7 +34,6 @@ export(UmQcSumFile) export(UmSigsSnvFile) export(VCMetricsFile) export(Wf) -export(Wf_bcl_convert) export(Wf_tso_ctdna_tumor_only) export(WgsContigMeanCovFile) export(WgsCoverageMetricsFile) diff --git a/R/bclconvert.R b/R/bclconvert.R index 82587da..c5d671a 100644 --- a/R/bclconvert.R +++ b/R/bclconvert.R @@ -443,8 +443,8 @@ BclconvertReports <- R6::R6Class( #' #' @examples #' \dontrun{ -#' p1 <- "240816_A01052_0220_AHM7VHDSXC/202408195d4f2fc4/Reports" -#' b <- here::here("nogit/bcl_convert", p1) |> +#' p1 <- "nogit/bcl_convert/WGS_TsqNano/Reports" +#' b <- here::here(p1) |> #' BclconvertReports375$new() #' b$path #' b$contents @@ -494,29 +494,22 @@ BclconvertReports375 <- R6::R6Class( #' @description Read Adapter_Metrics.csv file. #' - #' - lane: lane number. - #' - sampleid: sample ID from sample sheet. - #' - indexes: index/index2 from sample sheet for this sample. - #' - readnum: read number. - #' - adapter_bases: total number of bases trimmed as adapter from the read. - #' - sample_bases: total number of bases not trimmed from the read. - #' - adapter_bases_pct: percentage of bases trimmed as adapter from the read. #' @param x (`character(1)`)\cr #' Path to Adapter_Metrics.csv file. read_adaptermetrics = function(x) { cnames <- list( old = c( - "Lane", "Sample_ID", "index", "index2", "ReadNumber", - "AdapterBases", "SampleBases", "% Adapter Bases" + "Lane", "Sample_ID", "index", "index2", "R1_AdapterBases", + "R1_SampleBases", "R2_AdapterBases", "R2_SampleBases", "# Reads" ), new = c( - "lane", "sampleid", "indexes", "readnum", "adapter_bases", - "sample_bases", "adapter_bases_pct" + "lane", "sampleid", "indexes", "adapter_bases_r1", "sample_bases_r1", + "adapter_bases_r2", "sample_bases_r2", "reads_n" ) ) ctypes <- list( - old = "cccccddd", - new = "ccccddd" + old = "ccccddddd", + new = "cccddddd" ) if (!file.exists(x)) { return(empty_tbl(cnames$new, ctypes$new)) @@ -534,37 +527,23 @@ BclconvertReports375 <- R6::R6Class( #' @description Read Demultiplex_Stats.csv file. #' - #' - lane: lane number. - #' - sampleid: sample ID from sample sheet. - #' - indexes: index/index2 from sample sheet for this sample. - #' - reads_n: total number of pass-filter reads mapping to this sample for the lane. - #' - perfect_idxreads_n: number of mapped reads with barcodes matching the indexes exactly. - #' - one_mismatch_idxreads_n: number of mapped reads with barcodes matched with one base mismatched. - #' - two_mismatch_idxreads_n: number of mapped reads with barcodes matched with two bases mismatched. - #' - reads_pct: percentage of pass-filter reads mapping to this sample for the lane. - #' - perfect_idxreads_pct: percentage of mapped reads with barcodes matching the indexess exactly. - #' - one_mismatch_idxreads_pct: percentage of mapped reads with one mismatch to the indexes. - #' - two_mismatch_idxreads_pct: percentage of mapped reads with two mismatches to the indexes. #' @param x (`character(1)`)\cr #' Path to Demultiplex_Stats.csv file. read_demultiplexstats = function(x) { cnames <- list( old = c( "Lane", "SampleID", "Index", "# Reads", "# Perfect Index Reads", - "# One Mismatch Index Reads", "# Two Mismatch Index Reads", - "% Reads", "% Perfect Index Reads", "% One Mismatch Index Reads", - "% Two Mismatch Index Reads" + "# One Mismatch Index Reads", "# of >= Q30 Bases (PF)", + "Mean Quality Score (PF)" ), new = c( "lane", "sampleid", "indexes", "reads_n", "perfect_idxreads_n", - "one_mismatch_idxreads_n", "two_mismatch_idxreads_n", - "reads_pct", "perfect_idxreads_pct", - "one_mismatch_idxreads_pct", "two_mismatch_idxreads_pct" + "one_mismatch_idxreads_n", "q30_bases_n", "qscore_mean_pf" ) ) ctypes <- list( - old = "cccdddddddd", - new = "cccdddddddd" + old = "cccddddd", + new = "cccddddd" ) if (!file.exists(x)) { return(empty_tbl(cnames$new, ctypes$new)) @@ -577,28 +556,16 @@ BclconvertReports375 <- R6::R6Class( #' @description Read Index_Hopping_Counts.csv file. #' - #' - lane: lane number. - #' - sampleid: sample ID from sample sheet. - #' - indexes: index/index2 from sample sheet for this sample. - #' - reads_n: total number of pass-filter reads mapping to the indexes. - #' - reads_hopped_pct: percentage of hopped pass-filter reads mapping to the indexes. - #' - reads_pct: percentage of all pass-filter reads mapping to the indexes. #' @param x (`character(1)`)\cr #' Path to Index_Hopping_Counts.csv file. read_indexhoppingcounts = function(x) { cnames <- list( - old = c( - "Lane", "SampleID", "index", "index2", "# Reads", - "% of Hopped Reads", "% of All Reads" - ), - new = c( - "lane", "sampleid", "indexes", - "reads_n", "reads_hopped_pct", "reads_pct" - ) + old = c("Lane", "SampleID", "index", "index2", "# Reads"), + new = c("lane", "sampleid", "indexes", "reads_n") ) ctypes <- list( old = "ccccd", - new = "cccddd" + new = "cccd" ) if (!file.exists(x)) { return(empty_tbl(cnames$new, ctypes$new)) @@ -614,20 +581,16 @@ BclconvertReports375 <- R6::R6Class( #' @description Read Top_Unknown_Barcodes.csv file. #' - #' - lane: lane number. - #' - indexes: index/index2 of this unlisted sequence. - #' - reads_n: total number of pass-filter reads mapping to the indexes. - #' - unknownbcodes_pct: percentage of unknown pass-filter reads mapping to the indexes. #' @param x (`character(1)`)\cr #' Path to Top_Unknown_Barcodes.csv file. read_topunknownbarcodes = function(x) { cnames <- list( - old = c("Lane", "index", "index2", "# Reads", "% of Unknown Barcodes", "% of All Reads"), - new = c("lane", "indexes", "reads_n", "unknownbcodes_pct", "reads_pct") + old = c("Lane", "index", "index2", "# Reads"), + new = c("lane", "indexes", "reads_n") ) ctypes <- list( - old = "cccddd", - new = "ccddd" + old = "cccd", + new = "ccd" ) if (!file.exists(x)) { return(empty_tbl(cnames$new, ctypes$new)) diff --git a/man/BclconvertReports375.Rd b/man/BclconvertReports375.Rd new file mode 100644 index 0000000..01aba8b --- /dev/null +++ b/man/BclconvertReports375.Rd @@ -0,0 +1,240 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/bclconvert.R +\name{BclconvertReports375} +\alias{BclconvertReports375} +\title{BclconvertReports375 R6 Class} +\description{ +Reads and writes tidy versions of files within the \code{Reports} directory output +from BCLConvert v4.2.7. See the DRAGEN v4.2 documentation at +https://support-docs.illumina.com/SW/dragen_v42/Content/SW/DRAGEN/OutputFiles.htm. +} +\examples{ +\dontrun{ +p1 <- "240816_A01052_0220_AHM7VHDSXC/202408195d4f2fc4/Reports" +b <- here::here("nogit/bcl_convert", p1) |> + BclconvertReports375$new() +b$path +b$contents +d <- b$read() +b$write(d, out_dir = tempdir(), prefix = "sampleA", out_format = "tsv") +} + +} +\section{Public fields}{ +\if{html}{\out{
}} +\describe{ +\item{\code{path}}{Path to the \code{Reports} directory.} + +\item{\code{contents}}{Tibble with file path, basename, and size.} +} +\if{html}{\out{
}} +} +\section{Methods}{ +\subsection{Public methods}{ +\itemize{ +\item \href{#method-BclconvertReports375-new}{\code{BclconvertReports375$new()}} +\item \href{#method-BclconvertReports375-print}{\code{BclconvertReports375$print()}} +\item \href{#method-BclconvertReports375-read_adaptermetrics}{\code{BclconvertReports375$read_adaptermetrics()}} +\item \href{#method-BclconvertReports375-read_demultiplexstats}{\code{BclconvertReports375$read_demultiplexstats()}} +\item \href{#method-BclconvertReports375-read_indexhoppingcounts}{\code{BclconvertReports375$read_indexhoppingcounts()}} +\item \href{#method-BclconvertReports375-read_topunknownbarcodes}{\code{BclconvertReports375$read_topunknownbarcodes()}} +\item \href{#method-BclconvertReports375-read_fastqlist}{\code{BclconvertReports375$read_fastqlist()}} +\item \href{#method-BclconvertReports375-read}{\code{BclconvertReports375$read()}} +\item \href{#method-BclconvertReports375-write}{\code{BclconvertReports375$write()}} +\item \href{#method-BclconvertReports375-clone}{\code{BclconvertReports375$clone()}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-new}{}}} +\subsection{Method \code{new()}}{ +Create a new BclconvertReports375 object. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$new(path = NULL)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{path}}{Path to the \code{Reports} directory.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-print}{}}} +\subsection{Method \code{print()}}{ +Print details about the BclconvertReports375 directory. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$print(...)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{...}}{(ignored).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-read_adaptermetrics}{}}} +\subsection{Method \code{read_adaptermetrics()}}{ +Read Adapter_Metrics.csv file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$read_adaptermetrics(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Adapter_Metrics.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-read_demultiplexstats}{}}} +\subsection{Method \code{read_demultiplexstats()}}{ +Read Demultiplex_Stats.csv file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$read_demultiplexstats(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Demultiplex_Stats.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-read_indexhoppingcounts}{}}} +\subsection{Method \code{read_indexhoppingcounts()}}{ +Read Index_Hopping_Counts.csv file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$read_indexhoppingcounts(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Index_Hopping_Counts.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-read_topunknownbarcodes}{}}} +\subsection{Method \code{read_topunknownbarcodes()}}{ +Read Top_Unknown_Barcodes.csv file. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$read_topunknownbarcodes(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to Top_Unknown_Barcodes.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-read_fastqlist}{}}} +\subsection{Method \code{read_fastqlist()}}{ +Read fastq_list.csv file. +\itemize{ +\item rgid: read group. +\item rgsm: sample ID. +\item rglb: library. +\item lane: flow cell lane. +\item readnum: read number (1 or 2). +\item filepath: path to the FASTQ file. +} +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$read_fastqlist(x)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{x}}{(\code{character(1)})\cr +Path to fastq_list.csv file.} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-read}{}}} +\subsection{Method \code{read()}}{ +Reads contents of \code{Reports} directory output by BCLConvert. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$read()}\if{html}{\out{
}} +} + +\subsection{Returns}{ +A list of tibbles. +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-write}{}}} +\subsection{Method \code{write()}}{ +Writes tidied contents of \code{Reports} directory output by BCLConvert. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$write( + d, + out_dir = NULL, + prefix, + out_format = "tsv", + drid = NULL +)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{d}}{Parsed object from \code{self$read()}.} + +\item{\code{out_dir}}{Output directory.} + +\item{\code{prefix}}{Prefix of output file(s).} + +\item{\code{out_format}}{Format of output file(s).} + +\item{\code{drid}}{dracarys ID to use for the dataset (e.g. \code{wfrid.123}, \code{prid.456}).} +} +\if{html}{\out{
}} +} +} +\if{html}{\out{
}} +\if{html}{\out{}} +\if{latex}{\out{\hypertarget{method-BclconvertReports375-clone}{}}} +\subsection{Method \code{clone()}}{ +The objects of this class are cloneable with this method. +\subsection{Usage}{ +\if{html}{\out{
}}\preformatted{BclconvertReports375$clone(deep = FALSE)}\if{html}{\out{
}} +} + +\subsection{Arguments}{ +\if{html}{\out{
}} +\describe{ +\item{\code{deep}}{Whether to make a deep clone.} +} +\if{html}{\out{
}} +} +} +} diff --git a/man/Wf_bcl_convert.Rd b/man/Wf_bcl_convert.Rd deleted file mode 100644 index 1beb85a..0000000 --- a/man/Wf_bcl_convert.Rd +++ /dev/null @@ -1,86 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/bclconvert.R -\name{Wf_bcl_convert} -\alias{Wf_bcl_convert} -\title{Wf bcl_convert R6 Class} -\description{ -Contains methods for reading and processing files output from the UMCCR -\code{bcl_convert} workflow. -} -\examples{ -\dontrun{ -indir <- file.path() -sample_id <- "PTC_ctTSO240429" -library_id <- "L2400482" -d <- TsoCombinedVariantOutputFile$new(x) -d$read() -} -} -\section{Public fields}{ -\if{html}{\out{
}} -\describe{ -\item{\code{indir}}{Input directory containing Reports per assay type -(e.g. /primary_data/240607_A01052_0209_BHLHFTDSXC/2024061140802544/).} -} -\if{html}{\out{
}} -} -\section{Methods}{ -\subsection{Public methods}{ -\itemize{ -\item \href{#method-Wf_bcl_convert-new}{\code{Wf_bcl_convert$new()}} -\item \href{#method-Wf_bcl_convert-print}{\code{Wf_bcl_convert$print()}} -\item \href{#method-Wf_bcl_convert-clone}{\code{Wf_bcl_convert$clone()}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Wf_bcl_convert-new}{}}} -\subsection{Method \code{new()}}{ -Create a new Wf_bcl_convert object. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Wf_bcl_convert$new(indir)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{indir}}{Input directory containing Reports per assay type.} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Wf_bcl_convert-print}{}}} -\subsection{Method \code{print()}}{ -Print details about the Workflow -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Wf_bcl_convert$print(...)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{...}}{(ignored).} -} -\if{html}{\out{
}} -} -} -\if{html}{\out{
}} -\if{html}{\out{}} -\if{latex}{\out{\hypertarget{method-Wf_bcl_convert-clone}{}}} -\subsection{Method \code{clone()}}{ -The objects of this class are cloneable with this method. -\subsection{Usage}{ -\if{html}{\out{
}}\preformatted{Wf_bcl_convert$clone(deep = FALSE)}\if{html}{\out{
}} -} - -\subsection{Arguments}{ -\if{html}{\out{
}} -\describe{ -\item{\code{deep}}{Whether to make a deep clone.} -} -\if{html}{\out{
}} -} -} -}